
gust.linalg.cuda.matrix_kernels_double.ptx Maven / Gradle / Ivy
The newest version!
//
// Generated by NVIDIA NVVM Compiler
// Compiler built on Thu Sep 5 10:08:11 2013 (1378400891)
// Cuda compilation tools, release 5.5, V5.5.0
//
.version 3.2
.target sm_30
.address_size 64
.file 1 "/Users/dlwh/src/gust/src/main/resources/gust/linalg/cuda/matrix_kernels_float.cu", 1404413295, 7022
.file 2 "/Users/dlwh/src/gust/src/main/resources/gust/linalg/cuda/function_decls.cuh", 1413672495, 1872
.file 3 "/Developer/NVIDIA/CUDA-5.5/bin//../include/math_functions_dbl_ptx3.h", 1378419394, 118830
.file 4 "/Developer/NVIDIA/CUDA-5.5/bin//../include/device_functions.h", 1378419394, 185228
.file 5 "/Developer/NVIDIA/CUDA-5.5/bin//../include/sm_30_intrinsics.h", 1378419394, 6616
.func (.param .b64 func_retval0) __internal_trig_reduction_slowpathd
(
.param .b64 __internal_trig_reduction_slowpathd_param_0,
.param .b64 __internal_trig_reduction_slowpathd_param_1
)
;
.func (.param .b64 func_retval0) __internal_accurate_pow
(
.param .b64 __internal_accurate_pow_param_0,
.param .b64 __internal_accurate_pow_param_1
)
;
// map2_transpose_add_double$__cuda_local_var_32580_1747_non_const_tile has been demoted
// map2_transpose_sub_double$__cuda_local_var_32581_1747_non_const_tile has been demoted
// map2_transpose_mul_double$__cuda_local_var_32582_1747_non_const_tile has been demoted
// map2_transpose_div_double$__cuda_local_var_32583_1747_non_const_tile has been demoted
// map2_transpose_mod_double$__cuda_local_var_32584_1747_non_const_tile has been demoted
// map2_transpose_pow_double$__cuda_local_var_32585_1747_non_const_tile has been demoted
// map2_transpose_max_double$__cuda_local_var_32586_1747_non_const_tile has been demoted
// map2_transpose_min_double$__cuda_local_var_32587_1747_non_const_tile has been demoted
// map2_transpose_set_double$__cuda_local_var_32588_1747_non_const_tile has been demoted
.const .align 8 .b8 __cudart_i2opi_d[144] = {8, 93, 141, 31, 177, 95, 251, 107, 234, 146, 82, 138, 247, 57, 7, 61, 123, 241, 229, 235, 199, 186, 39, 117, 45, 234, 95, 158, 102, 63, 70, 79, 183, 9, 203, 39, 207, 126, 54, 109, 31, 109, 10, 90, 139, 17, 47, 239, 15, 152, 5, 222, 255, 151, 248, 31, 59, 40, 249, 189, 139, 95, 132, 156, 244, 57, 83, 131, 57, 214, 145, 57, 65, 126, 95, 180, 38, 112, 156, 233, 132, 68, 187, 46, 245, 53, 130, 232, 62, 167, 41, 177, 28, 235, 29, 254, 28, 146, 209, 9, 234, 46, 73, 6, 224, 210, 77, 66, 58, 110, 36, 183, 97, 197, 187, 222, 171, 99, 81, 254, 65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162};
.const .align 8 .b8 __cudart_sin_cos_coeffs[128] = {186, 94, 120, 249, 101, 219, 229, 61, 70, 210, 176, 44, 241, 229, 90, 190, 146, 227, 172, 105, 227, 29, 199, 62, 161, 98, 219, 25, 160, 1, 42, 191, 24, 8, 17, 17, 17, 17, 129, 63, 84, 85, 85, 85, 85, 85, 197, 191, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100, 129, 253, 32, 131, 255, 168, 189, 40, 133, 239, 193, 167, 238, 33, 62, 217, 230, 6, 142, 79, 126, 146, 190, 233, 188, 221, 25, 160, 1, 250, 62, 71, 93, 193, 22, 108, 193, 86, 191, 81, 85, 85, 85, 85, 85, 165, 63, 0, 0, 0, 0, 0, 0, 224, 191, 0, 0, 0, 0, 0, 0, 240, 63};
.global .align 1 .b8 $str[11] = {95, 95, 67, 85, 68, 65, 95, 70, 84, 90, 0};
.visible .entry map_negate_double(
.param .u32 map_negate_double_param_0,
.param .u32 map_negate_double_param_1,
.param .u64 map_negate_double_param_2,
.param .u32 map_negate_double_param_3,
.param .u64 map_negate_double_param_4,
.param .u32 map_negate_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_negate_double_param_0];
ld.param.u32 %r13, [map_negate_double_param_1];
ld.param.u64 %rd3, [map_negate_double_param_2];
ld.param.u32 %r14, [map_negate_double_param_3];
ld.param.u64 %rd4, [map_negate_double_param_4];
ld.param.u32 %r15, [map_negate_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 7 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 7 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB0_6;
.loc 2 7 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 7 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 7 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB0_2:
.loc 2 7 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB0_5;
.loc 2 7 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB0_4:
.loc 2 7 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 7 1
ld.global.f64 %fd1, [%rd6];
neg.f64 %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 7 1
st.global.f64 [%rd8], %fd2;
.loc 2 7 22
add.s32 %r10, %r5, %r9;
.loc 2 7 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB0_4;
BB0_5:
.loc 2 7 22
add.s32 %r25, %r4, %r25;
.loc 2 7 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB0_2;
BB0_6:
.loc 2 7 2
ret;
}
.visible .entry map_acos_double(
.param .u32 map_acos_double_param_0,
.param .u32 map_acos_double_param_1,
.param .u64 map_acos_double_param_2,
.param .u32 map_acos_double_param_3,
.param .u64 map_acos_double_param_4,
.param .u32 map_acos_double_param_5
)
{
.reg .pred %p<10>;
.reg .f32 %f<5>;
.reg .s32 %r<35>;
.reg .s64 %rd<9>;
.reg .f64 %fd<94>;
ld.param.u32 %r10, [map_acos_double_param_0];
ld.param.u32 %r11, [map_acos_double_param_1];
ld.param.u64 %rd1, [map_acos_double_param_2];
ld.param.u32 %r12, [map_acos_double_param_3];
ld.param.u64 %rd2, [map_acos_double_param_4];
ld.param.u32 %r13, [map_acos_double_param_5];
.loc 2 8 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r33, %r14, %r15, %r16;
.loc 2 8 1
setp.ge.s32 %p1, %r33, %r11;
@%p1 bra BB1_16;
.loc 2 8 1
mov.u32 %r17, %ntid.y;
.loc 2 8 22
mov.u32 %r18, %nctaid.y;
mul.lo.s32 %r2, %r18, %r17;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB1_2:
.loc 2 8 1
mov.u32 %r19, %ctaid.y;
mov.u32 %r21, %tid.y;
mad.lo.s32 %r34, %r17, %r19, %r21;
.loc 2 8 1
setp.ge.s32 %p2, %r34, %r10;
@%p2 bra BB1_15;
BB1_3:
.loc 2 8 1
mad.lo.s32 %r26, %r33, %r13, %r34;
mul.wide.s32 %rd4, %r26, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 2 8 1
ld.global.f64 %fd15, [%rd5];
.loc 3 293 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r6}, %fd15;
}
abs.f64 %fd1, %fd15;
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd1;
}
setp.lt.s32 %p3, %r27, 1071801958;
@%p3 bra BB1_11;
mov.f64 %fd18, 0d3FF0000000000000;
.loc 3 293 10
sub.f64 %fd2, %fd18, %fd1;
{
.reg .b32 %temp;
mov.b64 {%r28, %temp}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r7}, %fd2;
}
add.s32 %r29, %r7, -1048576;
mov.b64 %fd16, {%r28, %r29};
// inline asm
cvt.rn.f32.f64 %f1, %fd16;
// inline asm
// inline asm
rsqrt.approx.ftz.f32 %f2, %f1;
// inline asm
// inline asm
cvt.f64.f32 %fd17, %f2;
// inline asm
mul.rn.f64 %fd19, %fd17, %fd17;
neg.f64 %fd20, %fd19;
fma.rn.f64 %fd21, %fd16, %fd20, %fd18;
mov.f64 %fd22, 0d3FE0000000000000;
mov.f64 %fd23, 0d3FD8000000000000;
.loc 3 293 10
fma.rn.f64 %fd24, %fd23, %fd21, %fd22;
mul.rn.f64 %fd25, %fd21, %fd17;
fma.rn.f64 %fd3, %fd24, %fd25, %fd17;
mov.f64 %fd26, 0dBEBAC2FE66FAAC4B;
mov.f64 %fd27, 0d3EC715B371155F70;
.loc 3 293 10
fma.rn.f64 %fd28, %fd27, %fd2, %fd26;
mov.f64 %fd29, 0d3ED9A9B88EFCD9B8;
.loc 3 293 10
fma.rn.f64 %fd30, %fd28, %fd2, %fd29;
mov.f64 %fd31, 0d3EDD0F40A8A0C4C3;
.loc 3 293 10
fma.rn.f64 %fd32, %fd30, %fd2, %fd31;
mov.f64 %fd33, 0d3EF46D4CFA9E0E1F;
.loc 3 293 10
fma.rn.f64 %fd34, %fd32, %fd2, %fd33;
mov.f64 %fd35, 0d3F079C168D1E2422;
.loc 3 293 10
fma.rn.f64 %fd36, %fd34, %fd2, %fd35;
mov.f64 %fd37, 0d3F1C9A88C3BCA540;
.loc 3 293 10
fma.rn.f64 %fd38, %fd36, %fd2, %fd37;
mov.f64 %fd39, 0d3F31C4E64BD476DF;
.loc 3 293 10
fma.rn.f64 %fd40, %fd38, %fd2, %fd39;
mov.f64 %fd41, 0d3F46E8BA60009C8F;
.loc 3 293 10
fma.rn.f64 %fd42, %fd40, %fd2, %fd41;
mov.f64 %fd43, 0d3F5F1C71C62B05A2;
.loc 3 293 10
fma.rn.f64 %fd44, %fd42, %fd2, %fd43;
mov.f64 %fd45, 0d3F76DB6DB6DC9F2C;
.loc 3 293 10
fma.rn.f64 %fd46, %fd44, %fd2, %fd45;
mov.f64 %fd47, 0d3F9333333333329C;
.loc 3 293 10
fma.rn.f64 %fd48, %fd46, %fd2, %fd47;
mov.f64 %fd49, 0d3FB5555555555555;
.loc 3 293 10
fma.rn.f64 %fd50, %fd48, %fd2, %fd49;
mul.f64 %fd51, %fd50, %fd2;
mul.f64 %fd52, %fd51, %fd2;
mul.f64 %fd4, %fd52, %fd3;
setp.lt.s32 %p4, %r7, 1;
@%p4 bra BB1_6;
fma.rn.f64 %fd93, %fd3, %fd2, %fd4;
bra.uni BB1_7;
BB1_6:
mov.f64 %fd53, 0d0000000000000000;
.loc 3 293 10
mul.rn.f64 %fd93, %fd1, %fd53;
BB1_7:
setp.gt.s32 %p5, %r7, -1;
@%p5 bra BB1_9;
mov.f64 %fd54, 0d7FF0000000000000;
.loc 3 293 10
mul.rn.f64 %fd93, %fd93, %fd54;
BB1_9:
setp.gt.s32 %p6, %r6, -1;
@%p6 bra BB1_14;
mov.f64 %fd55, 0dBCA1A62633145C07;
.loc 3 293 10
add.rn.f64 %fd56, %fd93, %fd55;
neg.f64 %fd57, %fd56;
mov.f64 %fd58, 0d400921FB54442D18;
.loc 3 293 10
add.rn.f64 %fd93, %fd58, %fd57;
bra.uni BB1_14;
BB1_11:
.loc 3 293 10
mul.f64 %fd59, %fd1, %fd1;
mov.f64 %fd60, 0dBFB3823B180754AF;
mov.f64 %fd61, 0d3FB0066BDC1895E9;
.loc 3 293 10
fma.rn.f64 %fd62, %fd61, %fd59, %fd60;
mov.f64 %fd63, 0d3FB11E52CC2F79AE;
.loc 3 293 10
fma.rn.f64 %fd64, %fd62, %fd59, %fd63;
mov.f64 %fd65, 0dBF924EAF3526861B;
.loc 3 293 10
fma.rn.f64 %fd66, %fd64, %fd59, %fd65;
mov.f64 %fd67, 0d3F91DF02A31E6CB7;
.loc 3 293 10
fma.rn.f64 %fd68, %fd66, %fd59, %fd67;
mov.f64 %fd69, 0d3F847D18B0EEC6CC;
.loc 3 293 10
fma.rn.f64 %fd70, %fd68, %fd59, %fd69;
mov.f64 %fd71, 0d3F8D0AF961BA53B0;
.loc 3 293 10
fma.rn.f64 %fd72, %fd70, %fd59, %fd71;
mov.f64 %fd73, 0d3F91BF7734CF1C48;
.loc 3 293 10
fma.rn.f64 %fd74, %fd72, %fd59, %fd73;
mov.f64 %fd75, 0d3F96E91483144EF7;
.loc 3 293 10
fma.rn.f64 %fd76, %fd74, %fd59, %fd75;
mov.f64 %fd77, 0d3F9F1C6E0A4F9F81;
.loc 3 293 10
fma.rn.f64 %fd78, %fd76, %fd59, %fd77;
mov.f64 %fd79, 0d3FA6DB6DC27FA92B;
.loc 3 293 10
fma.rn.f64 %fd80, %fd78, %fd59, %fd79;
mov.f64 %fd81, 0d3FB333333320F91B;
.loc 3 293 10
fma.rn.f64 %fd82, %fd80, %fd59, %fd81;
mov.f64 %fd83, 0d3FC5555555555F4D;
.loc 3 293 10
fma.rn.f64 %fd84, %fd82, %fd59, %fd83;
mul.f64 %fd85, %fd84, %fd59;
fma.rn.f64 %fd11, %fd85, %fd1, %fd1;
setp.lt.s32 %p7, %r6, 0;
@%p7 bra BB1_13;
mov.f64 %fd86, 0dBC91A62633145C07;
.loc 3 293 10
add.rn.f64 %fd87, %fd11, %fd86;
neg.f64 %fd88, %fd87;
mov.f64 %fd89, 0d3FF921FB54442D18;
.loc 3 293 10
add.rn.f64 %fd93, %fd89, %fd88;
bra.uni BB1_14;
BB1_13:
mov.f64 %fd90, 0d3C91A62633145C07;
.loc 3 293 10
add.rn.f64 %fd91, %fd11, %fd90;
mov.f64 %fd92, 0d3FF921FB54442D18;
.loc 3 293 10
add.rn.f64 %fd93, %fd92, %fd91;
BB1_14:
.loc 2 8 42
mad.lo.s32 %r30, %r33, %r12, %r34;
mul.wide.s32 %rd7, %r30, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 8 42
st.global.f64 [%rd8], %fd93;
.loc 2 8 22
add.s32 %r34, %r2, %r34;
.loc 2 8 1
setp.lt.s32 %p8, %r34, %r10;
@%p8 bra BB1_3;
BB1_15:
.loc 2 8 22
mov.u32 %r31, %nctaid.x;
mad.lo.s32 %r33, %r31, %r14, %r33;
.loc 2 8 1
setp.lt.s32 %p9, %r33, %r11;
@%p9 bra BB1_2;
BB1_16:
.loc 2 8 2
ret;
}
.visible .entry map_acosh_double(
.param .u32 map_acosh_double_param_0,
.param .u32 map_acosh_double_param_1,
.param .u64 map_acosh_double_param_2,
.param .u32 map_acosh_double_param_3,
.param .u64 map_acosh_double_param_4,
.param .u32 map_acosh_double_param_5
)
{
.reg .pred %p<25>;
.reg .f32 %f<9>;
.reg .s32 %r<79>;
.reg .s64 %rd<9>;
.reg .f64 %fd<141>;
ld.param.u32 %r30, [map_acosh_double_param_0];
ld.param.u32 %r31, [map_acosh_double_param_1];
ld.param.u64 %rd3, [map_acosh_double_param_2];
ld.param.u32 %r32, [map_acosh_double_param_3];
ld.param.u64 %rd4, [map_acosh_double_param_4];
ld.param.u32 %r33, [map_acosh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 9 1
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %ctaid.x;
mov.u32 %r36, %tid.x;
mad.lo.s32 %r69, %r34, %r35, %r36;
.loc 2 9 1
setp.ge.s32 %p1, %r69, %r31;
@%p1 bra BB2_34;
.loc 2 9 1
mov.u32 %r37, %ntid.y;
.loc 2 9 22
mov.u32 %r38, %nctaid.y;
mul.lo.s32 %r2, %r38, %r37;
BB2_2:
.loc 2 9 1
mov.u32 %r39, %ctaid.y;
mov.u32 %r41, %tid.y;
mad.lo.s32 %r70, %r37, %r39, %r41;
.loc 2 9 1
setp.ge.s32 %p2, %r70, %r30;
@%p2 bra BB2_33;
.loc 2 9 1
mul.lo.s32 %r4, %r69, %r33;
.loc 2 9 42
mul.lo.s32 %r5, %r69, %r32;
BB2_4:
.loc 2 9 1
add.s32 %r46, %r70, %r4;
mul.wide.s32 %rd5, %r46, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 9 1
ld.global.f64 %fd1, [%rd6];
.loc 3 298 10
add.f64 %fd2, %fd1, 0dBFF0000000000000;
abs.f64 %fd21, %fd2;
setp.gt.f64 %p3, %fd21, 0d4330000000000000;
@%p3 bra BB2_19;
fma.rn.f64 %fd22, %fd1, %fd2, %fd2;
sqrt.rn.f64 %fd23, %fd22;
add.f64 %fd3, %fd2, %fd23;
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd3;
}
setp.lt.u32 %p4, %r47, 1071994197;
setp.lt.s32 %p5, %r47, -1076258407;
or.pred %p6, %p4, %p5;
@%p6 bra BB2_18;
add.f64 %fd4, %fd3, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r71}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r72, %temp}, %fd4;
}
setp.gt.f64 %p7, %fd4, 0d0000000000000000;
setp.lt.f64 %p8, %fd4, 0d7FF0000000000000;
and.pred %p9, %p7, %p8;
@%p9 bra BB2_12;
abs.f64 %fd24, %fd4;
setp.gtu.f64 %p10, %fd24, 0d7FF0000000000000;
@%p10 bra BB2_11;
setp.neu.f64 %p11, %fd4, 0d0000000000000000;
@%p11 bra BB2_10;
mov.f64 %fd140, 0dFFF0000000000000;
bra.uni BB2_32;
BB2_10:
.loc 3 298 10
setp.eq.f64 %p12, %fd4, 0d7FF0000000000000;
selp.f64 %fd140, %fd4, 0dFFF8000000000000, %p12;
bra.uni BB2_32;
BB2_11:
.loc 3 298 10
add.f64 %fd140, %fd4, %fd4;
bra.uni BB2_32;
BB2_12:
.loc 3 298 10
setp.lt.u32 %p13, %r71, 1048576;
@%p13 bra BB2_14;
mov.u32 %r73, -1023;
bra.uni BB2_15;
BB2_14:
.loc 3 298 10
mul.f64 %fd26, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r71}, %fd26;
}
{
.reg .b32 %temp;
mov.b64 {%r72, %temp}, %fd26;
}
mov.u32 %r73, -1077;
BB2_15:
.loc 3 298 10
shr.s32 %r50, %r71, 20;
add.s32 %r74, %r73, %r50;
and.b32 %r51, %r71, -2146435073;
or.b32 %r52, %r51, 1072693248;
mov.b64 %fd137, {%r72, %r52};
setp.lt.u32 %p14, %r52, 1073127583;
@%p14 bra BB2_17;
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd137;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r54}, %fd137;
}
add.s32 %r55, %r54, -1048576;
mov.b64 %fd137, {%r53, %r55};
add.s32 %r74, %r74, 1;
BB2_17:
add.f64 %fd27, %fd137, 0d3FF0000000000000;
mov.f64 %fd29, 0d3FF0000000000000;
.loc 3 298 10
// inline asm
cvt.rn.f32.f64 %f1,%fd27;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd28,%f2;
// inline asm
neg.f64 %fd30, %fd27;
fma.rn.f64 %fd31, %fd30, %fd28, %fd29;
fma.rn.f64 %fd32, %fd31, %fd31, %fd31;
fma.rn.f64 %fd33, %fd32, %fd28, %fd28;
add.f64 %fd34, %fd137, 0dBFF0000000000000;
mul.f64 %fd35, %fd34, %fd33;
fma.rn.f64 %fd36, %fd34, %fd33, %fd35;
mul.f64 %fd37, %fd36, %fd36;
mov.f64 %fd38, 0d3ED0EE258B7A8B04;
mov.f64 %fd39, 0d3EB1380B3AE80F1E;
.loc 3 298 10
fma.rn.f64 %fd40, %fd39, %fd37, %fd38;
mov.f64 %fd41, 0d3EF3B2669F02676F;
.loc 3 298 10
fma.rn.f64 %fd42, %fd40, %fd37, %fd41;
mov.f64 %fd43, 0d3F1745CBA9AB0956;
.loc 3 298 10
fma.rn.f64 %fd44, %fd42, %fd37, %fd43;
mov.f64 %fd45, 0d3F3C71C72D1B5154;
.loc 3 298 10
fma.rn.f64 %fd46, %fd44, %fd37, %fd45;
mov.f64 %fd47, 0d3F624924923BE72D;
.loc 3 298 10
fma.rn.f64 %fd48, %fd46, %fd37, %fd47;
mov.f64 %fd49, 0d3F8999999999A3C4;
.loc 3 298 10
fma.rn.f64 %fd50, %fd48, %fd37, %fd49;
mov.f64 %fd51, 0d3FB5555555555554;
.loc 3 298 10
fma.rn.f64 %fd52, %fd50, %fd37, %fd51;
sub.f64 %fd53, %fd34, %fd36;
add.f64 %fd54, %fd53, %fd53;
neg.f64 %fd55, %fd36;
fma.rn.f64 %fd56, %fd55, %fd34, %fd54;
mul.f64 %fd57, %fd33, %fd56;
mul.f64 %fd58, %fd52, %fd37;
fma.rn.f64 %fd59, %fd58, %fd36, %fd57;
cvt.rn.f64.s32 %fd60, %r74;
mov.f64 %fd61, 0d3FE62E42FEFA39EF;
.loc 3 298 10
fma.rn.f64 %fd62, %fd60, %fd61, %fd36;
neg.s32 %r56, %r74;
cvt.rn.f64.s32 %fd63, %r56;
fma.rn.f64 %fd64, %fd63, %fd61, %fd62;
sub.f64 %fd65, %fd64, %fd36;
sub.f64 %fd66, %fd59, %fd65;
mov.f64 %fd67, 0d3C7ABC9E3B39803F;
.loc 3 298 10
fma.rn.f64 %fd68, %fd60, %fd67, %fd66;
add.f64 %fd140, %fd62, %fd68;
bra.uni BB2_32;
BB2_18:
.loc 3 298 10
add.f64 %fd69, %fd3, 0d4000000000000000;
div.rn.f64 %fd70, %fd3, %fd69;
neg.f64 %fd71, %fd3;
mul.f64 %fd72, %fd70, %fd71;
add.f64 %fd73, %fd3, %fd72;
mul.f64 %fd74, %fd73, %fd73;
mov.f64 %fd75, 0d3ED087FFCEB2DC44;
mov.f64 %fd76, 0d3EB372FB2FBE14B5;
.loc 3 298 10
fma.rn.f64 %fd77, %fd76, %fd74, %fd75;
mov.f64 %fd78, 0d3EF3B9FF890F468C;
.loc 3 298 10
fma.rn.f64 %fd79, %fd77, %fd74, %fd78;
mov.f64 %fd80, 0d3F17457EFD51BAF8;
.loc 3 298 10
fma.rn.f64 %fd81, %fd79, %fd74, %fd80;
mov.f64 %fd82, 0d3F3C71C8DE3CE825;
.loc 3 298 10
fma.rn.f64 %fd83, %fd81, %fd74, %fd82;
mov.f64 %fd84, 0d3F6249248FA4661F;
.loc 3 298 10
fma.rn.f64 %fd85, %fd83, %fd74, %fd84;
mov.f64 %fd86, 0d3F899999999D70C4;
.loc 3 298 10
fma.rn.f64 %fd87, %fd85, %fd74, %fd86;
mov.f64 %fd88, 0d3FB5555555555462;
.loc 3 298 10
fma.rn.f64 %fd89, %fd87, %fd74, %fd88;
mul.f64 %fd90, %fd89, %fd74;
fma.rn.f64 %fd91, %fd90, %fd73, %fd72;
add.f64 %fd140, %fd91, %fd3;
bra.uni BB2_32;
BB2_19:
.loc 3 298 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r75}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r76, %temp}, %fd1;
}
setp.lt.f64 %p15, %fd1, 0d7FF0000000000000;
setp.gt.f64 %p16, %fd1, 0d0000000000000000;
and.pred %p17, %p16, %p15;
@%p17 bra BB2_25;
abs.f64 %fd92, %fd1;
setp.gtu.f64 %p18, %fd92, 0d7FF0000000000000;
@%p18 bra BB2_24;
setp.neu.f64 %p19, %fd1, 0d0000000000000000;
@%p19 bra BB2_23;
mov.f64 %fd139, 0dFFF0000000000000;
bra.uni BB2_31;
BB2_23:
.loc 3 298 10
setp.eq.f64 %p20, %fd1, 0d7FF0000000000000;
selp.f64 %fd139, %fd1, 0dFFF8000000000000, %p20;
bra.uni BB2_31;
BB2_24:
.loc 3 298 10
add.f64 %fd139, %fd1, %fd1;
bra.uni BB2_31;
BB2_25:
.loc 3 298 10
setp.lt.u32 %p21, %r75, 1048576;
@%p21 bra BB2_27;
mov.u32 %r77, -1023;
bra.uni BB2_28;
BB2_27:
.loc 3 298 10
mul.f64 %fd94, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r75}, %fd94;
}
{
.reg .b32 %temp;
mov.b64 {%r76, %temp}, %fd94;
}
mov.u32 %r77, -1077;
BB2_28:
.loc 3 298 10
shr.s32 %r59, %r75, 20;
add.s32 %r78, %r77, %r59;
and.b32 %r60, %r75, -2146435073;
or.b32 %r61, %r60, 1072693248;
mov.b64 %fd138, {%r76, %r61};
setp.lt.u32 %p22, %r61, 1073127583;
@%p22 bra BB2_30;
{
.reg .b32 %temp;
mov.b64 {%r62, %temp}, %fd138;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r63}, %fd138;
}
add.s32 %r64, %r63, -1048576;
mov.b64 %fd138, {%r62, %r64};
add.s32 %r78, %r78, 1;
BB2_30:
add.f64 %fd95, %fd138, 0d3FF0000000000000;
mov.f64 %fd97, 0d3FF0000000000000;
.loc 3 298 10
// inline asm
cvt.rn.f32.f64 %f5,%fd95;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd96,%f6;
// inline asm
neg.f64 %fd98, %fd95;
fma.rn.f64 %fd99, %fd98, %fd96, %fd97;
fma.rn.f64 %fd100, %fd99, %fd99, %fd99;
fma.rn.f64 %fd101, %fd100, %fd96, %fd96;
add.f64 %fd102, %fd138, 0dBFF0000000000000;
mul.f64 %fd103, %fd102, %fd101;
fma.rn.f64 %fd104, %fd102, %fd101, %fd103;
mul.f64 %fd105, %fd104, %fd104;
mov.f64 %fd106, 0d3ED0EE258B7A8B04;
mov.f64 %fd107, 0d3EB1380B3AE80F1E;
.loc 3 298 10
fma.rn.f64 %fd108, %fd107, %fd105, %fd106;
mov.f64 %fd109, 0d3EF3B2669F02676F;
.loc 3 298 10
fma.rn.f64 %fd110, %fd108, %fd105, %fd109;
mov.f64 %fd111, 0d3F1745CBA9AB0956;
.loc 3 298 10
fma.rn.f64 %fd112, %fd110, %fd105, %fd111;
mov.f64 %fd113, 0d3F3C71C72D1B5154;
.loc 3 298 10
fma.rn.f64 %fd114, %fd112, %fd105, %fd113;
mov.f64 %fd115, 0d3F624924923BE72D;
.loc 3 298 10
fma.rn.f64 %fd116, %fd114, %fd105, %fd115;
mov.f64 %fd117, 0d3F8999999999A3C4;
.loc 3 298 10
fma.rn.f64 %fd118, %fd116, %fd105, %fd117;
mov.f64 %fd119, 0d3FB5555555555554;
.loc 3 298 10
fma.rn.f64 %fd120, %fd118, %fd105, %fd119;
sub.f64 %fd121, %fd102, %fd104;
add.f64 %fd122, %fd121, %fd121;
neg.f64 %fd123, %fd104;
fma.rn.f64 %fd124, %fd123, %fd102, %fd122;
mul.f64 %fd125, %fd101, %fd124;
mul.f64 %fd126, %fd120, %fd105;
fma.rn.f64 %fd127, %fd126, %fd104, %fd125;
cvt.rn.f64.s32 %fd128, %r78;
mov.f64 %fd129, 0d3FE62E42FEFA39EF;
.loc 3 298 10
fma.rn.f64 %fd130, %fd128, %fd129, %fd104;
neg.s32 %r65, %r78;
cvt.rn.f64.s32 %fd131, %r65;
fma.rn.f64 %fd132, %fd131, %fd129, %fd130;
sub.f64 %fd133, %fd132, %fd104;
sub.f64 %fd134, %fd127, %fd133;
mov.f64 %fd135, 0d3C7ABC9E3B39803F;
.loc 3 298 10
fma.rn.f64 %fd136, %fd128, %fd135, %fd134;
add.f64 %fd139, %fd130, %fd136;
BB2_31:
add.f64 %fd140, %fd139, 0d3FE62E42FEFA39EF;
BB2_32:
.loc 2 9 42
add.s32 %r66, %r70, %r5;
mul.wide.s32 %rd7, %r66, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 9 42
st.global.f64 [%rd8], %fd140;
.loc 2 9 22
add.s32 %r70, %r2, %r70;
.loc 2 9 1
setp.lt.s32 %p23, %r70, %r30;
@%p23 bra BB2_4;
BB2_33:
.loc 2 9 22
mov.u32 %r67, %nctaid.x;
mad.lo.s32 %r69, %r67, %r34, %r69;
.loc 2 9 1
setp.lt.s32 %p24, %r69, %r31;
@%p24 bra BB2_2;
BB2_34:
.loc 2 9 2
ret;
}
.visible .entry map_asin_double(
.param .u32 map_asin_double_param_0,
.param .u32 map_asin_double_param_1,
.param .u64 map_asin_double_param_2,
.param .u32 map_asin_double_param_3,
.param .u64 map_asin_double_param_4,
.param .u32 map_asin_double_param_5
)
{
.reg .pred %p<7>;
.reg .s32 %r<37>;
.reg .s64 %rd<9>;
.reg .f64 %fd<71>;
ld.param.u32 %r11, [map_asin_double_param_0];
ld.param.u32 %r12, [map_asin_double_param_1];
ld.param.u64 %rd3, [map_asin_double_param_2];
ld.param.u32 %r13, [map_asin_double_param_3];
ld.param.u64 %rd4, [map_asin_double_param_4];
ld.param.u32 %r14, [map_asin_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 10 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r35, %r15, %r16, %r17;
.loc 2 10 1
setp.ge.s32 %p1, %r35, %r12;
@%p1 bra BB3_10;
.loc 2 10 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r2, %r19, %r20, %r18;
.loc 2 10 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r3, %r21, %r19;
BB3_2:
.loc 2 10 1
setp.ge.s32 %p2, %r2, %r11;
@%p2 bra BB3_9;
.loc 2 10 1
mul.lo.s32 %r5, %r35, %r14;
.loc 2 10 42
mul.lo.s32 %r6, %r35, %r13;
mov.u32 %r36, %r2;
BB3_4:
.loc 2 10 1
mov.u32 %r7, %r36;
add.s32 %r22, %r7, %r5;
mul.wide.s32 %rd5, %r22, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 10 1
ld.global.f64 %fd6, [%rd6];
.loc 3 288 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd6;
}
abs.f64 %fd1, %fd6;
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd1;
}
setp.lt.s32 %p3, %r23, 1071801958;
@%p3 bra BB3_7;
mov.f64 %fd7, 0d3FE0000000000000;
mov.f64 %fd8, 0dBFE0000000000000;
.loc 3 288 10
fma.rn.f64 %fd9, %fd8, %fd1, %fd7;
sqrt.rn.f64 %fd10, %fd9;
mov.f64 %fd11, 0dBFB3823B180754AF;
mov.f64 %fd12, 0d3FB0066BDC1895E9;
.loc 3 288 10
fma.rn.f64 %fd13, %fd12, %fd9, %fd11;
mov.f64 %fd14, 0d3FB11E52CC2F79AE;
.loc 3 288 10
fma.rn.f64 %fd15, %fd13, %fd9, %fd14;
mov.f64 %fd16, 0dBF924EAF3526861B;
.loc 3 288 10
fma.rn.f64 %fd17, %fd15, %fd9, %fd16;
mov.f64 %fd18, 0d3F91DF02A31E6CB7;
.loc 3 288 10
fma.rn.f64 %fd19, %fd17, %fd9, %fd18;
mov.f64 %fd20, 0d3F847D18B0EEC6CC;
.loc 3 288 10
fma.rn.f64 %fd21, %fd19, %fd9, %fd20;
mov.f64 %fd22, 0d3F8D0AF961BA53B0;
.loc 3 288 10
fma.rn.f64 %fd23, %fd21, %fd9, %fd22;
mov.f64 %fd24, 0d3F91BF7734CF1C48;
.loc 3 288 10
fma.rn.f64 %fd25, %fd23, %fd9, %fd24;
mov.f64 %fd26, 0d3F96E91483144EF7;
.loc 3 288 10
fma.rn.f64 %fd27, %fd25, %fd9, %fd26;
mov.f64 %fd28, 0d3F9F1C6E0A4F9F81;
.loc 3 288 10
fma.rn.f64 %fd29, %fd27, %fd9, %fd28;
mov.f64 %fd30, 0d3FA6DB6DC27FA92B;
.loc 3 288 10
fma.rn.f64 %fd31, %fd29, %fd9, %fd30;
mov.f64 %fd32, 0d3FB333333320F91B;
.loc 3 288 10
fma.rn.f64 %fd33, %fd31, %fd9, %fd32;
mov.f64 %fd34, 0d3FC5555555555F4D;
.loc 3 288 10
fma.rn.f64 %fd35, %fd33, %fd9, %fd34;
mul.f64 %fd36, %fd35, %fd9;
mul.f64 %fd37, %fd10, 0dC000000000000000;
mov.f64 %fd38, 0d3C91A62633145C07;
.loc 3 288 10
fma.rn.f64 %fd39, %fd37, %fd36, %fd38;
add.f64 %fd40, %fd37, 0d3FE921FB54442D18;
add.f64 %fd41, %fd40, %fd39;
add.f64 %fd70, %fd41, 0d3FE921FB54442D18;
setp.gt.s32 %p4, %r8, 1072693247;
@%p4 bra BB3_8;
{
.reg .b32 %temp;
mov.b64 {%r24, %temp}, %fd70;
}
and.b32 %r25, %r8, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%temp, %r26}, %fd70;
}
or.b32 %r27, %r26, %r25;
mov.b64 %fd70, {%r24, %r27};
bra.uni BB3_8;
BB3_7:
.loc 3 288 10
mul.f64 %fd42, %fd1, %fd1;
mov.f64 %fd43, 0dBFB3823B180754AF;
mov.f64 %fd44, 0d3FB0066BDC1895E9;
.loc 3 288 10
fma.rn.f64 %fd45, %fd44, %fd42, %fd43;
mov.f64 %fd46, 0d3FB11E52CC2F79AE;
.loc 3 288 10
fma.rn.f64 %fd47, %fd45, %fd42, %fd46;
mov.f64 %fd48, 0dBF924EAF3526861B;
.loc 3 288 10
fma.rn.f64 %fd49, %fd47, %fd42, %fd48;
mov.f64 %fd50, 0d3F91DF02A31E6CB7;
.loc 3 288 10
fma.rn.f64 %fd51, %fd49, %fd42, %fd50;
mov.f64 %fd52, 0d3F847D18B0EEC6CC;
.loc 3 288 10
fma.rn.f64 %fd53, %fd51, %fd42, %fd52;
mov.f64 %fd54, 0d3F8D0AF961BA53B0;
.loc 3 288 10
fma.rn.f64 %fd55, %fd53, %fd42, %fd54;
mov.f64 %fd56, 0d3F91BF7734CF1C48;
.loc 3 288 10
fma.rn.f64 %fd57, %fd55, %fd42, %fd56;
mov.f64 %fd58, 0d3F96E91483144EF7;
.loc 3 288 10
fma.rn.f64 %fd59, %fd57, %fd42, %fd58;
mov.f64 %fd60, 0d3F9F1C6E0A4F9F81;
.loc 3 288 10
fma.rn.f64 %fd61, %fd59, %fd42, %fd60;
mov.f64 %fd62, 0d3FA6DB6DC27FA92B;
.loc 3 288 10
fma.rn.f64 %fd63, %fd61, %fd42, %fd62;
mov.f64 %fd64, 0d3FB333333320F91B;
.loc 3 288 10
fma.rn.f64 %fd65, %fd63, %fd42, %fd64;
mov.f64 %fd66, 0d3FC5555555555F4D;
.loc 3 288 10
fma.rn.f64 %fd67, %fd65, %fd42, %fd66;
mul.f64 %fd68, %fd67, %fd42;
fma.rn.f64 %fd69, %fd68, %fd1, %fd1;
{
.reg .b32 %temp;
mov.b64 {%r28, %temp}, %fd69;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r29}, %fd69;
}
and.b32 %r30, %r8, -2147483648;
or.b32 %r31, %r29, %r30;
mov.b64 %fd70, {%r28, %r31};
BB3_8:
.loc 2 10 42
add.s32 %r32, %r7, %r6;
mul.wide.s32 %rd7, %r32, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 10 42
st.global.f64 [%rd8], %fd70;
.loc 2 10 22
add.s32 %r9, %r3, %r7;
.loc 2 10 1
setp.lt.s32 %p5, %r9, %r11;
mov.u32 %r36, %r9;
@%p5 bra BB3_4;
BB3_9:
.loc 2 10 22
mov.u32 %r33, %nctaid.x;
mad.lo.s32 %r35, %r33, %r15, %r35;
.loc 2 10 1
setp.lt.s32 %p6, %r35, %r12;
@%p6 bra BB3_2;
BB3_10:
.loc 2 10 2
ret;
}
.visible .entry map_asinh_double(
.param .u32 map_asinh_double_param_0,
.param .u32 map_asinh_double_param_1,
.param .u64 map_asinh_double_param_2,
.param .u32 map_asinh_double_param_3,
.param .u64 map_asinh_double_param_4,
.param .u32 map_asinh_double_param_5
)
{
.reg .pred %p<25>;
.reg .f32 %f<9>;
.reg .s32 %r<84>;
.reg .s64 %rd<9>;
.reg .f64 %fd<144>;
ld.param.u32 %r30, [map_asinh_double_param_0];
ld.param.u32 %r31, [map_asinh_double_param_1];
ld.param.u64 %rd2, [map_asinh_double_param_2];
ld.param.u32 %r32, [map_asinh_double_param_3];
ld.param.u64 %rd3, [map_asinh_double_param_4];
ld.param.u32 %r33, [map_asinh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 2 11 1
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %ctaid.x;
mov.u32 %r36, %tid.x;
mad.lo.s32 %r74, %r34, %r35, %r36;
.loc 2 11 1
setp.ge.s32 %p1, %r74, %r31;
@%p1 bra BB4_34;
.loc 2 11 1
mov.u32 %r37, %ntid.y;
.loc 2 11 22
mov.u32 %r38, %nctaid.y;
mul.lo.s32 %r2, %r38, %r37;
cvta.to.global.u64 %rd6, %rd2;
BB4_2:
.loc 2 11 1
mov.u32 %r39, %ctaid.y;
mov.u32 %r41, %tid.y;
mad.lo.s32 %r75, %r37, %r39, %r41;
.loc 2 11 1
setp.ge.s32 %p2, %r75, %r30;
@%p2 bra BB4_33;
.loc 2 11 1
mul.lo.s32 %r4, %r74, %r33;
.loc 2 11 42
mul.lo.s32 %r5, %r74, %r32;
BB4_4:
.loc 2 11 1
add.s32 %r46, %r75, %r4;
mul.wide.s32 %rd4, %r46, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 2 11 1
ld.global.f64 %fd1, [%rd5];
.loc 3 303 10
abs.f64 %fd2, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r80}, %fd2;
}
setp.gt.s32 %p3, %r80, 1609564159;
@%p3 bra BB4_19;
mul.f64 %fd21, %fd2, %fd2;
add.f64 %fd22, %fd21, 0d3FF0000000000000;
sqrt.rn.f64 %fd23, %fd22;
add.f64 %fd24, %fd23, 0d3FF0000000000000;
div.rn.f64 %fd25, %fd21, %fd24;
add.f64 %fd3, %fd2, %fd25;
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd3;
}
setp.lt.u32 %p4, %r47, 1071994197;
setp.lt.s32 %p5, %r47, -1076258407;
or.pred %p6, %p4, %p5;
@%p6 bra BB4_18;
add.f64 %fd4, %fd3, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r76}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd4;
}
setp.gt.f64 %p7, %fd4, 0d0000000000000000;
setp.lt.f64 %p8, %fd4, 0d7FF0000000000000;
and.pred %p9, %p7, %p8;
@%p9 bra BB4_12;
abs.f64 %fd26, %fd4;
setp.gtu.f64 %p10, %fd26, 0d7FF0000000000000;
@%p10 bra BB4_11;
setp.neu.f64 %p11, %fd4, 0d0000000000000000;
@%p11 bra BB4_10;
mov.f64 %fd143, 0dFFF0000000000000;
bra.uni BB4_32;
BB4_10:
.loc 3 303 10
setp.eq.f64 %p12, %fd4, 0d7FF0000000000000;
selp.f64 %fd143, %fd4, 0dFFF8000000000000, %p12;
bra.uni BB4_32;
BB4_11:
.loc 3 303 10
add.f64 %fd143, %fd4, %fd4;
bra.uni BB4_32;
BB4_12:
.loc 3 303 10
setp.lt.u32 %p13, %r76, 1048576;
@%p13 bra BB4_14;
mov.u32 %r78, -1023;
bra.uni BB4_15;
BB4_14:
.loc 3 303 10
mul.f64 %fd28, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r76}, %fd28;
}
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd28;
}
mov.u32 %r78, -1077;
BB4_15:
.loc 3 303 10
shr.s32 %r50, %r76, 20;
add.s32 %r79, %r78, %r50;
and.b32 %r51, %r76, -2146435073;
or.b32 %r52, %r51, 1072693248;
mov.b64 %fd140, {%r77, %r52};
setp.lt.u32 %p14, %r52, 1073127583;
@%p14 bra BB4_17;
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd140;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r54}, %fd140;
}
add.s32 %r55, %r54, -1048576;
mov.b64 %fd140, {%r53, %r55};
add.s32 %r79, %r79, 1;
BB4_17:
add.f64 %fd29, %fd140, 0d3FF0000000000000;
mov.f64 %fd31, 0d3FF0000000000000;
.loc 3 303 10
// inline asm
cvt.rn.f32.f64 %f1,%fd29;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd30,%f2;
// inline asm
neg.f64 %fd32, %fd29;
fma.rn.f64 %fd33, %fd32, %fd30, %fd31;
fma.rn.f64 %fd34, %fd33, %fd33, %fd33;
fma.rn.f64 %fd35, %fd34, %fd30, %fd30;
add.f64 %fd36, %fd140, 0dBFF0000000000000;
mul.f64 %fd37, %fd36, %fd35;
fma.rn.f64 %fd38, %fd36, %fd35, %fd37;
mul.f64 %fd39, %fd38, %fd38;
mov.f64 %fd40, 0d3ED0EE258B7A8B04;
mov.f64 %fd41, 0d3EB1380B3AE80F1E;
.loc 3 303 10
fma.rn.f64 %fd42, %fd41, %fd39, %fd40;
mov.f64 %fd43, 0d3EF3B2669F02676F;
.loc 3 303 10
fma.rn.f64 %fd44, %fd42, %fd39, %fd43;
mov.f64 %fd45, 0d3F1745CBA9AB0956;
.loc 3 303 10
fma.rn.f64 %fd46, %fd44, %fd39, %fd45;
mov.f64 %fd47, 0d3F3C71C72D1B5154;
.loc 3 303 10
fma.rn.f64 %fd48, %fd46, %fd39, %fd47;
mov.f64 %fd49, 0d3F624924923BE72D;
.loc 3 303 10
fma.rn.f64 %fd50, %fd48, %fd39, %fd49;
mov.f64 %fd51, 0d3F8999999999A3C4;
.loc 3 303 10
fma.rn.f64 %fd52, %fd50, %fd39, %fd51;
mov.f64 %fd53, 0d3FB5555555555554;
.loc 3 303 10
fma.rn.f64 %fd54, %fd52, %fd39, %fd53;
sub.f64 %fd55, %fd36, %fd38;
add.f64 %fd56, %fd55, %fd55;
neg.f64 %fd57, %fd38;
fma.rn.f64 %fd58, %fd57, %fd36, %fd56;
mul.f64 %fd59, %fd35, %fd58;
mul.f64 %fd60, %fd54, %fd39;
fma.rn.f64 %fd61, %fd60, %fd38, %fd59;
cvt.rn.f64.s32 %fd62, %r79;
mov.f64 %fd63, 0d3FE62E42FEFA39EF;
.loc 3 303 10
fma.rn.f64 %fd64, %fd62, %fd63, %fd38;
neg.s32 %r56, %r79;
cvt.rn.f64.s32 %fd65, %r56;
fma.rn.f64 %fd66, %fd65, %fd63, %fd64;
sub.f64 %fd67, %fd66, %fd38;
sub.f64 %fd68, %fd61, %fd67;
mov.f64 %fd69, 0d3C7ABC9E3B39803F;
.loc 3 303 10
fma.rn.f64 %fd70, %fd62, %fd69, %fd68;
add.f64 %fd143, %fd64, %fd70;
bra.uni BB4_32;
BB4_18:
.loc 3 303 10
add.f64 %fd71, %fd3, 0d4000000000000000;
div.rn.f64 %fd72, %fd3, %fd71;
neg.f64 %fd73, %fd3;
mul.f64 %fd74, %fd72, %fd73;
add.f64 %fd75, %fd3, %fd74;
mul.f64 %fd76, %fd75, %fd75;
mov.f64 %fd77, 0d3ED087FFCEB2DC44;
mov.f64 %fd78, 0d3EB372FB2FBE14B5;
.loc 3 303 10
fma.rn.f64 %fd79, %fd78, %fd76, %fd77;
mov.f64 %fd80, 0d3EF3B9FF890F468C;
.loc 3 303 10
fma.rn.f64 %fd81, %fd79, %fd76, %fd80;
mov.f64 %fd82, 0d3F17457EFD51BAF8;
.loc 3 303 10
fma.rn.f64 %fd83, %fd81, %fd76, %fd82;
mov.f64 %fd84, 0d3F3C71C8DE3CE825;
.loc 3 303 10
fma.rn.f64 %fd85, %fd83, %fd76, %fd84;
mov.f64 %fd86, 0d3F6249248FA4661F;
.loc 3 303 10
fma.rn.f64 %fd87, %fd85, %fd76, %fd86;
mov.f64 %fd88, 0d3F899999999D70C4;
.loc 3 303 10
fma.rn.f64 %fd89, %fd87, %fd76, %fd88;
mov.f64 %fd90, 0d3FB5555555555462;
.loc 3 303 10
fma.rn.f64 %fd91, %fd89, %fd76, %fd90;
mul.f64 %fd92, %fd91, %fd76;
fma.rn.f64 %fd93, %fd92, %fd75, %fd74;
add.f64 %fd143, %fd93, %fd3;
bra.uni BB4_32;
BB4_19:
.loc 3 303 10
{
.reg .b32 %temp;
mov.b64 {%r81, %temp}, %fd2;
}
setp.lt.f64 %p15, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p16, %fd2, 0d0000000000000000;
and.pred %p17, %p16, %p15;
@%p17 bra BB4_25;
abs.f64 %fd94, %fd2;
setp.gtu.f64 %p18, %fd94, 0d7FF0000000000000;
@%p18 bra BB4_24;
setp.neu.f64 %p19, %fd2, 0d0000000000000000;
@%p19 bra BB4_23;
mov.f64 %fd142, 0dFFF0000000000000;
bra.uni BB4_31;
BB4_23:
.loc 3 303 10
setp.eq.f64 %p20, %fd2, 0d7FF0000000000000;
selp.f64 %fd142, %fd2, 0dFFF8000000000000, %p20;
bra.uni BB4_31;
BB4_24:
.loc 3 303 10
add.f64 %fd142, %fd2, %fd2;
bra.uni BB4_31;
BB4_25:
.loc 3 303 10
setp.lt.u32 %p21, %r80, 1048576;
@%p21 bra BB4_27;
mov.u32 %r82, -1023;
bra.uni BB4_28;
BB4_27:
.loc 3 303 10
mul.f64 %fd96, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r80}, %fd96;
}
{
.reg .b32 %temp;
mov.b64 {%r81, %temp}, %fd96;
}
mov.u32 %r82, -1077;
BB4_28:
.loc 3 303 10
shr.s32 %r59, %r80, 20;
add.s32 %r83, %r82, %r59;
and.b32 %r60, %r80, -2146435073;
or.b32 %r61, %r60, 1072693248;
mov.b64 %fd141, {%r81, %r61};
setp.lt.u32 %p22, %r61, 1073127583;
@%p22 bra BB4_30;
{
.reg .b32 %temp;
mov.b64 {%r62, %temp}, %fd141;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r63}, %fd141;
}
add.s32 %r64, %r63, -1048576;
mov.b64 %fd141, {%r62, %r64};
add.s32 %r83, %r83, 1;
BB4_30:
add.f64 %fd97, %fd141, 0d3FF0000000000000;
mov.f64 %fd99, 0d3FF0000000000000;
.loc 3 303 10
// inline asm
cvt.rn.f32.f64 %f5,%fd97;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd98,%f6;
// inline asm
neg.f64 %fd100, %fd97;
fma.rn.f64 %fd101, %fd100, %fd98, %fd99;
fma.rn.f64 %fd102, %fd101, %fd101, %fd101;
fma.rn.f64 %fd103, %fd102, %fd98, %fd98;
add.f64 %fd104, %fd141, 0dBFF0000000000000;
mul.f64 %fd105, %fd104, %fd103;
fma.rn.f64 %fd106, %fd104, %fd103, %fd105;
mul.f64 %fd107, %fd106, %fd106;
mov.f64 %fd108, 0d3ED0EE258B7A8B04;
mov.f64 %fd109, 0d3EB1380B3AE80F1E;
.loc 3 303 10
fma.rn.f64 %fd110, %fd109, %fd107, %fd108;
mov.f64 %fd111, 0d3EF3B2669F02676F;
.loc 3 303 10
fma.rn.f64 %fd112, %fd110, %fd107, %fd111;
mov.f64 %fd113, 0d3F1745CBA9AB0956;
.loc 3 303 10
fma.rn.f64 %fd114, %fd112, %fd107, %fd113;
mov.f64 %fd115, 0d3F3C71C72D1B5154;
.loc 3 303 10
fma.rn.f64 %fd116, %fd114, %fd107, %fd115;
mov.f64 %fd117, 0d3F624924923BE72D;
.loc 3 303 10
fma.rn.f64 %fd118, %fd116, %fd107, %fd117;
mov.f64 %fd119, 0d3F8999999999A3C4;
.loc 3 303 10
fma.rn.f64 %fd120, %fd118, %fd107, %fd119;
mov.f64 %fd121, 0d3FB5555555555554;
.loc 3 303 10
fma.rn.f64 %fd122, %fd120, %fd107, %fd121;
sub.f64 %fd123, %fd104, %fd106;
add.f64 %fd124, %fd123, %fd123;
neg.f64 %fd125, %fd106;
fma.rn.f64 %fd126, %fd125, %fd104, %fd124;
mul.f64 %fd127, %fd103, %fd126;
mul.f64 %fd128, %fd122, %fd107;
fma.rn.f64 %fd129, %fd128, %fd106, %fd127;
cvt.rn.f64.s32 %fd130, %r83;
mov.f64 %fd131, 0d3FE62E42FEFA39EF;
.loc 3 303 10
fma.rn.f64 %fd132, %fd130, %fd131, %fd106;
neg.s32 %r65, %r83;
cvt.rn.f64.s32 %fd133, %r65;
fma.rn.f64 %fd134, %fd133, %fd131, %fd132;
sub.f64 %fd135, %fd134, %fd106;
sub.f64 %fd136, %fd129, %fd135;
mov.f64 %fd137, 0d3C7ABC9E3B39803F;
.loc 3 303 10
fma.rn.f64 %fd138, %fd130, %fd137, %fd136;
add.f64 %fd142, %fd132, %fd138;
BB4_31:
add.f64 %fd143, %fd142, 0d3FE62E42FEFA39EF;
BB4_32:
{
.reg .b32 %temp;
mov.b64 {%temp, %r66}, %fd1;
}
and.b32 %r67, %r66, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%temp, %r68}, %fd143;
}
or.b32 %r69, %r68, %r67;
{
.reg .b32 %temp;
mov.b64 {%r70, %temp}, %fd143;
}
mov.b64 %fd139, {%r70, %r69};
.loc 2 11 42
add.s32 %r71, %r75, %r5;
mul.wide.s32 %rd7, %r71, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 11 42
st.global.f64 [%rd8], %fd139;
.loc 2 11 22
add.s32 %r75, %r2, %r75;
.loc 2 11 1
setp.lt.s32 %p23, %r75, %r30;
@%p23 bra BB4_4;
BB4_33:
.loc 2 11 22
mov.u32 %r72, %nctaid.x;
mad.lo.s32 %r74, %r72, %r34, %r74;
.loc 2 11 1
setp.lt.s32 %p24, %r74, %r31;
@%p24 bra BB4_2;
BB4_34:
.loc 2 11 2
ret;
}
.visible .entry map_atan_double(
.param .u32 map_atan_double_param_0,
.param .u32 map_atan_double_param_1,
.param .u64 map_atan_double_param_2,
.param .u32 map_atan_double_param_3,
.param .u64 map_atan_double_param_4,
.param .u32 map_atan_double_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<5>;
.reg .s32 %r<32>;
.reg .s64 %rd<9>;
.reg .f64 %fd<57>;
ld.param.u32 %r10, [map_atan_double_param_0];
ld.param.u32 %r11, [map_atan_double_param_1];
ld.param.u64 %rd3, [map_atan_double_param_2];
ld.param.u32 %r12, [map_atan_double_param_3];
ld.param.u64 %rd4, [map_atan_double_param_4];
ld.param.u32 %r13, [map_atan_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 12 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r30, %r14, %r15, %r16;
.loc 2 12 1
setp.ge.s32 %p1, %r30, %r11;
@%p1 bra BB5_8;
.loc 2 12 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 2 12 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB5_2:
.loc 2 12 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB5_7;
.loc 2 12 1
mul.lo.s32 %r5, %r30, %r13;
.loc 2 12 42
mul.lo.s32 %r6, %r30, %r12;
mov.u32 %r31, %r2;
BB5_4:
.loc 2 12 1
mov.u32 %r7, %r31;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 12 1
ld.global.f64 %fd1, [%rd6];
.loc 3 283 10
abs.f64 %fd2, %fd1;
setp.leu.f64 %p3, %fd2, 0d3FF0000000000000;
mov.f64 %fd56, %fd2;
@%p3 bra BB5_6;
// inline asm
cvt.rn.f32.f64 %f1,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd6,%f2;
// inline asm
neg.f64 %fd7, %fd2;
mov.f64 %fd8, 0d3FF0000000000000;
.loc 3 283 10
fma.rn.f64 %fd9, %fd7, %fd6, %fd8;
fma.rn.f64 %fd10, %fd9, %fd9, %fd9;
fma.rn.f64 %fd11, %fd10, %fd6, %fd6;
setp.eq.f64 %p4, %fd2, 0d7FF0000000000000;
selp.f64 %fd3, 0d0000000000000000, %fd11, %p4;
mov.f64 %fd56, %fd3;
BB5_6:
.loc 3 283 10
mov.f64 %fd4, %fd56;
mul.f64 %fd12, %fd4, %fd4;
mov.f64 %fd13, 0d3F2D3B63DBB65B49;
mov.f64 %fd14, 0dBEF53E1D2A25FF7E;
.loc 3 283 10
fma.rn.f64 %fd15, %fd14, %fd12, %fd13;
mov.f64 %fd16, 0dBF5312788DDE082E;
.loc 3 283 10
fma.rn.f64 %fd17, %fd15, %fd12, %fd16;
mov.f64 %fd18, 0d3F6F9690C8249315;
.loc 3 283 10
fma.rn.f64 %fd19, %fd17, %fd12, %fd18;
mov.f64 %fd20, 0dBF82CF5AABC7CF0D;
.loc 3 283 10
fma.rn.f64 %fd21, %fd19, %fd12, %fd20;
mov.f64 %fd22, 0d3F9162B0B2A3BFDE;
.loc 3 283 10
fma.rn.f64 %fd23, %fd21, %fd12, %fd22;
mov.f64 %fd24, 0dBF9A7256FEB6FC6B;
.loc 3 283 10
fma.rn.f64 %fd25, %fd23, %fd12, %fd24;
mov.f64 %fd26, 0d3FA171560CE4A489;
.loc 3 283 10
fma.rn.f64 %fd27, %fd25, %fd12, %fd26;
mov.f64 %fd28, 0dBFA4F44D841450E4;
.loc 3 283 10
fma.rn.f64 %fd29, %fd27, %fd12, %fd28;
mov.f64 %fd30, 0d3FA7EE3D3F36BB95;
.loc 3 283 10
fma.rn.f64 %fd31, %fd29, %fd12, %fd30;
mov.f64 %fd32, 0dBFAAD32AE04A9FD1;
.loc 3 283 10
fma.rn.f64 %fd33, %fd31, %fd12, %fd32;
mov.f64 %fd34, 0d3FAE17813D66954F;
.loc 3 283 10
fma.rn.f64 %fd35, %fd33, %fd12, %fd34;
mov.f64 %fd36, 0dBFB11089CA9A5BCD;
.loc 3 283 10
fma.rn.f64 %fd37, %fd35, %fd12, %fd36;
mov.f64 %fd38, 0d3FB3B12B2DB51738;
.loc 3 283 10
fma.rn.f64 %fd39, %fd37, %fd12, %fd38;
mov.f64 %fd40, 0dBFB745D022F8DC5C;
.loc 3 283 10
fma.rn.f64 %fd41, %fd39, %fd12, %fd40;
mov.f64 %fd42, 0d3FBC71C709DFE927;
.loc 3 283 10
fma.rn.f64 %fd43, %fd41, %fd12, %fd42;
mov.f64 %fd44, 0dBFC2492491FA1744;
.loc 3 283 10
fma.rn.f64 %fd45, %fd43, %fd12, %fd44;
mov.f64 %fd46, 0d3FC99999999840D2;
.loc 3 283 10
fma.rn.f64 %fd47, %fd45, %fd12, %fd46;
mov.f64 %fd48, 0dBFD555555555544C;
.loc 3 283 10
fma.rn.f64 %fd49, %fd47, %fd12, %fd48;
mul.f64 %fd50, %fd49, %fd12;
fma.rn.f64 %fd51, %fd50, %fd4, %fd4;
mov.f64 %fd52, 0d3FF921FB54442D18;
.loc 3 283 10
sub.f64 %fd53, %fd52, %fd51;
setp.gt.f64 %p5, %fd2, 0d3FF0000000000000;
.loc 3 283 10
selp.f64 %fd54, %fd53, %fd51, %p5;
{
.reg .b32 %temp;
mov.b64 {%r22, %temp}, %fd54;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd54;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r24}, %fd1;
}
and.b32 %r25, %r24, -2147483648;
or.b32 %r26, %r23, %r25;
mov.b64 %fd55, {%r22, %r26};
.loc 2 12 42
add.s32 %r27, %r7, %r6;
mul.wide.s32 %rd7, %r27, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 12 42
st.global.f64 [%rd8], %fd55;
.loc 2 12 22
add.s32 %r8, %r3, %r7;
.loc 2 12 1
setp.lt.s32 %p6, %r8, %r10;
mov.u32 %r31, %r8;
@%p6 bra BB5_4;
BB5_7:
.loc 2 12 22
mov.u32 %r28, %nctaid.x;
mad.lo.s32 %r30, %r28, %r14, %r30;
.loc 2 12 1
setp.lt.s32 %p7, %r30, %r11;
@%p7 bra BB5_2;
BB5_8:
.loc 2 12 2
ret;
}
.visible .entry map_atanh_double(
.param .u32 map_atanh_double_param_0,
.param .u32 map_atanh_double_param_1,
.param .u64 map_atanh_double_param_2,
.param .u32 map_atanh_double_param_3,
.param .u64 map_atanh_double_param_4,
.param .u32 map_atanh_double_param_5
)
{
.reg .pred %p<17>;
.reg .f32 %f<5>;
.reg .s32 %r<57>;
.reg .s64 %rd<9>;
.reg .f64 %fd<89>;
ld.param.u32 %r20, [map_atanh_double_param_0];
ld.param.u32 %r21, [map_atanh_double_param_1];
ld.param.u64 %rd2, [map_atanh_double_param_2];
ld.param.u32 %r22, [map_atanh_double_param_3];
ld.param.u64 %rd3, [map_atanh_double_param_4];
ld.param.u32 %r23, [map_atanh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 2 13 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r51, %r24, %r25, %r26;
.loc 2 13 1
setp.ge.s32 %p1, %r51, %r21;
@%p1 bra BB6_20;
.loc 2 13 1
mov.u32 %r27, %ntid.y;
.loc 2 13 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
cvta.to.global.u64 %rd6, %rd2;
BB6_2:
.loc 2 13 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r52, %r27, %r29, %r31;
.loc 2 13 1
setp.ge.s32 %p2, %r52, %r20;
@%p2 bra BB6_19;
.loc 2 13 1
mul.lo.s32 %r4, %r51, %r23;
.loc 2 13 42
mul.lo.s32 %r5, %r51, %r22;
BB6_4:
.loc 2 13 1
add.s32 %r36, %r52, %r4;
mul.wide.s32 %rd4, %r36, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 2 13 1
ld.global.f64 %fd1, [%rd5];
.loc 3 308 10
abs.f64 %fd12, %fd1;
add.f64 %fd13, %fd12, %fd12;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 3 308 10
sub.f64 %fd15, %fd14, %fd12;
div.rn.f64 %fd2, %fd13, %fd15;
{
.reg .b32 %temp;
mov.b64 {%temp, %r37}, %fd2;
}
setp.lt.u32 %p3, %r37, 1071994197;
setp.lt.s32 %p4, %r37, -1076258407;
or.pred %p5, %p3, %p4;
@%p5 bra BB6_17;
add.f64 %fd3, %fd2, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd3;
}
{
.reg .b32 %temp;
mov.b64 {%r54, %temp}, %fd3;
}
setp.gt.f64 %p6, %fd3, 0d0000000000000000;
setp.lt.f64 %p7, %fd3, 0d7FF0000000000000;
and.pred %p8, %p6, %p7;
@%p8 bra BB6_11;
abs.f64 %fd16, %fd3;
setp.gtu.f64 %p9, %fd16, 0d7FF0000000000000;
@%p9 bra BB6_10;
setp.neu.f64 %p10, %fd3, 0d0000000000000000;
@%p10 bra BB6_9;
mov.f64 %fd88, 0dFFF0000000000000;
bra.uni BB6_18;
BB6_9:
.loc 3 308 10
setp.eq.f64 %p11, %fd3, 0d7FF0000000000000;
selp.f64 %fd88, %fd3, 0dFFF8000000000000, %p11;
bra.uni BB6_18;
BB6_10:
.loc 3 308 10
add.f64 %fd88, %fd3, %fd3;
bra.uni BB6_18;
BB6_11:
.loc 3 308 10
setp.lt.u32 %p12, %r53, 1048576;
@%p12 bra BB6_13;
mov.u32 %r55, -1023;
bra.uni BB6_14;
BB6_13:
.loc 3 308 10
mul.f64 %fd18, %fd3, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd18;
}
{
.reg .b32 %temp;
mov.b64 {%r54, %temp}, %fd18;
}
mov.u32 %r55, -1077;
BB6_14:
.loc 3 308 10
shr.s32 %r40, %r53, 20;
add.s32 %r56, %r55, %r40;
and.b32 %r41, %r53, -2146435073;
or.b32 %r42, %r41, 1072693248;
mov.b64 %fd87, {%r54, %r42};
setp.lt.u32 %p13, %r42, 1073127583;
@%p13 bra BB6_16;
{
.reg .b32 %temp;
mov.b64 {%r43, %temp}, %fd87;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd87;
}
add.s32 %r45, %r44, -1048576;
mov.b64 %fd87, {%r43, %r45};
add.s32 %r56, %r56, 1;
BB6_16:
add.f64 %fd19, %fd87, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd19;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd20,%f2;
// inline asm
neg.f64 %fd22, %fd19;
fma.rn.f64 %fd23, %fd22, %fd20, %fd14;
fma.rn.f64 %fd24, %fd23, %fd23, %fd23;
fma.rn.f64 %fd25, %fd24, %fd20, %fd20;
add.f64 %fd26, %fd87, 0dBFF0000000000000;
mul.f64 %fd27, %fd26, %fd25;
fma.rn.f64 %fd28, %fd26, %fd25, %fd27;
mul.f64 %fd29, %fd28, %fd28;
mov.f64 %fd30, 0d3ED0EE258B7A8B04;
mov.f64 %fd31, 0d3EB1380B3AE80F1E;
.loc 3 308 10
fma.rn.f64 %fd32, %fd31, %fd29, %fd30;
mov.f64 %fd33, 0d3EF3B2669F02676F;
.loc 3 308 10
fma.rn.f64 %fd34, %fd32, %fd29, %fd33;
mov.f64 %fd35, 0d3F1745CBA9AB0956;
.loc 3 308 10
fma.rn.f64 %fd36, %fd34, %fd29, %fd35;
mov.f64 %fd37, 0d3F3C71C72D1B5154;
.loc 3 308 10
fma.rn.f64 %fd38, %fd36, %fd29, %fd37;
mov.f64 %fd39, 0d3F624924923BE72D;
.loc 3 308 10
fma.rn.f64 %fd40, %fd38, %fd29, %fd39;
mov.f64 %fd41, 0d3F8999999999A3C4;
.loc 3 308 10
fma.rn.f64 %fd42, %fd40, %fd29, %fd41;
mov.f64 %fd43, 0d3FB5555555555554;
.loc 3 308 10
fma.rn.f64 %fd44, %fd42, %fd29, %fd43;
sub.f64 %fd45, %fd26, %fd28;
add.f64 %fd46, %fd45, %fd45;
neg.f64 %fd47, %fd28;
fma.rn.f64 %fd48, %fd47, %fd26, %fd46;
mul.f64 %fd49, %fd25, %fd48;
mul.f64 %fd50, %fd44, %fd29;
fma.rn.f64 %fd51, %fd50, %fd28, %fd49;
cvt.rn.f64.s32 %fd52, %r56;
mov.f64 %fd53, 0d3FE62E42FEFA39EF;
.loc 3 308 10
fma.rn.f64 %fd54, %fd52, %fd53, %fd28;
neg.s32 %r46, %r56;
cvt.rn.f64.s32 %fd55, %r46;
fma.rn.f64 %fd56, %fd55, %fd53, %fd54;
sub.f64 %fd57, %fd56, %fd28;
sub.f64 %fd58, %fd51, %fd57;
mov.f64 %fd59, 0d3C7ABC9E3B39803F;
.loc 3 308 10
fma.rn.f64 %fd60, %fd52, %fd59, %fd58;
add.f64 %fd88, %fd54, %fd60;
bra.uni BB6_18;
BB6_17:
.loc 3 308 10
add.f64 %fd61, %fd2, 0d4000000000000000;
div.rn.f64 %fd62, %fd2, %fd61;
neg.f64 %fd63, %fd2;
mul.f64 %fd64, %fd62, %fd63;
add.f64 %fd65, %fd2, %fd64;
mul.f64 %fd66, %fd65, %fd65;
mov.f64 %fd67, 0d3ED087FFCEB2DC44;
mov.f64 %fd68, 0d3EB372FB2FBE14B5;
.loc 3 308 10
fma.rn.f64 %fd69, %fd68, %fd66, %fd67;
mov.f64 %fd70, 0d3EF3B9FF890F468C;
.loc 3 308 10
fma.rn.f64 %fd71, %fd69, %fd66, %fd70;
mov.f64 %fd72, 0d3F17457EFD51BAF8;
.loc 3 308 10
fma.rn.f64 %fd73, %fd71, %fd66, %fd72;
mov.f64 %fd74, 0d3F3C71C8DE3CE825;
.loc 3 308 10
fma.rn.f64 %fd75, %fd73, %fd66, %fd74;
mov.f64 %fd76, 0d3F6249248FA4661F;
.loc 3 308 10
fma.rn.f64 %fd77, %fd75, %fd66, %fd76;
mov.f64 %fd78, 0d3F899999999D70C4;
.loc 3 308 10
fma.rn.f64 %fd79, %fd77, %fd66, %fd78;
mov.f64 %fd80, 0d3FB5555555555462;
.loc 3 308 10
fma.rn.f64 %fd81, %fd79, %fd66, %fd80;
mul.f64 %fd82, %fd81, %fd66;
fma.rn.f64 %fd83, %fd82, %fd65, %fd64;
add.f64 %fd88, %fd83, %fd2;
BB6_18:
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd1;
}
setp.lt.s32 %p14, %r47, 0;
mul.f64 %fd84, %fd88, 0d3FE0000000000000;
neg.f64 %fd85, %fd84;
selp.f64 %fd86, %fd85, %fd84, %p14;
.loc 2 13 42
add.s32 %r48, %r52, %r5;
mul.wide.s32 %rd7, %r48, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 13 42
st.global.f64 [%rd8], %fd86;
.loc 2 13 22
add.s32 %r52, %r2, %r52;
.loc 2 13 1
setp.lt.s32 %p15, %r52, %r20;
@%p15 bra BB6_4;
BB6_19:
.loc 2 13 22
mov.u32 %r49, %nctaid.x;
mad.lo.s32 %r51, %r49, %r24, %r51;
.loc 2 13 1
setp.lt.s32 %p16, %r51, %r21;
@%p16 bra BB6_2;
BB6_20:
.loc 2 13 2
ret;
}
.visible .entry map_cbrt_double(
.param .u32 map_cbrt_double_param_0,
.param .u32 map_cbrt_double_param_1,
.param .u64 map_cbrt_double_param_2,
.param .u32 map_cbrt_double_param_3,
.param .u64 map_cbrt_double_param_4,
.param .u32 map_cbrt_double_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<7>;
.reg .s32 %r<54>;
.reg .s64 %rd<9>;
.reg .f64 %fd<26>;
ld.param.u32 %r20, [map_cbrt_double_param_0];
ld.param.u32 %r21, [map_cbrt_double_param_1];
ld.param.u64 %rd3, [map_cbrt_double_param_2];
ld.param.u32 %r22, [map_cbrt_double_param_3];
ld.param.u64 %rd4, [map_cbrt_double_param_4];
ld.param.u32 %r23, [map_cbrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 14 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r48, %r24, %r25, %r26;
.loc 2 14 1
setp.ge.s32 %p1, %r48, %r21;
@%p1 bra BB7_13;
.loc 2 14 1
mov.u32 %r27, %tid.y;
mov.u32 %r28, %ntid.y;
mov.u32 %r29, %ctaid.y;
mad.lo.s32 %r2, %r28, %r29, %r27;
.loc 2 14 22
mov.u32 %r30, %nctaid.y;
mul.lo.s32 %r3, %r30, %r28;
BB7_2:
.loc 2 14 1
setp.ge.s32 %p2, %r2, %r20;
@%p2 bra BB7_12;
.loc 2 14 1
mul.lo.s32 %r5, %r48, %r23;
.loc 2 14 42
mul.lo.s32 %r6, %r48, %r22;
mov.u32 %r49, %r2;
BB7_4:
.loc 2 14 1
mov.u32 %r7, %r49;
add.s32 %r31, %r7, %r5;
mul.wide.s32 %rd5, %r31, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 14 1
ld.global.f64 %fd1, [%rd6];
.loc 3 318 10
setp.eq.f64 %p3, %fd1, 0d0000000000000000;
@%p3 bra BB7_10;
abs.f64 %fd2, %fd1;
setp.geu.f64 %p4, %fd2, 0d7FF0000000000000;
@%p4 bra BB7_10;
{
.reg .b32 %temp;
mov.b64 {%r50, %temp}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd2;
}
shr.u32 %r32, %r51, 20;
and.b32 %r52, %r32, 2047;
setp.eq.s32 %p5, %r52, 0;
@%p5 bra BB7_8;
mov.u32 %r53, 0;
bra.uni BB7_9;
BB7_8:
.loc 3 318 10
mul.f64 %fd6, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%r50, %temp}, %fd6;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd6;
}
shr.u32 %r35, %r51, 20;
and.b32 %r52, %r35, 2047;
mov.u32 %r53, 18;
BB7_9:
.loc 3 318 10
add.s32 %r36, %r52, -1022;
cvt.rn.f32.s32 %f1, %r36;
mul.f32 %f2, %f1, 0f3EAAAAAB;
cvt.rni.s32.f32 %r37, %f2;
mad.lo.s32 %r38, %r37, -3145728, %r51;
mov.b64 %fd7, {%r50, %r38};
cvt.rn.f32.f64 %f3, %fd7;
lg2.approx.f32 %f4, %f3;
mul.f32 %f5, %f4, 0fBEAAAAAB;
ex2.approx.f32 %f6, %f5;
cvt.f64.f32 %fd8, %f6;
mul.f64 %fd9, %fd8, %fd8;
neg.f64 %fd10, %fd7;
mul.f64 %fd11, %fd8, %fd10;
mov.f64 %fd12, 0d3FF0000000000000;
.loc 3 318 10
fma.rn.f64 %fd13, %fd9, %fd11, %fd12;
mul.f64 %fd14, %fd8, 0d3FD5555555555555;
fma.rn.f64 %fd15, %fd13, %fd14, %fd8;
mul.f64 %fd16, %fd7, %fd15;
mul.f64 %fd17, %fd16, %fd15;
mul.f64 %fd18, %fd17, %fd17;
div.rn.f64 %fd19, %fd7, %fd18;
sub.f64 %fd20, %fd17, %fd19;
mov.f64 %fd21, 0dBFD5555555555555;
.loc 3 318 10
fma.rn.f64 %fd22, %fd20, %fd21, %fd17;
{
.reg .b32 %temp;
mov.b64 {%r39, %temp}, %fd22;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd22;
}
sub.s32 %r41, %r37, %r53;
shl.b32 %r42, %r41, 20;
add.s32 %r43, %r40, %r42;
mov.b64 %fd23, {%r39, %r43};
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd1;
}
setp.lt.s32 %p6, %r44, 0;
neg.f64 %fd24, %fd23;
selp.f64 %fd25, %fd24, %fd23, %p6;
bra.uni BB7_11;
BB7_10:
.loc 3 318 10
add.f64 %fd25, %fd1, %fd1;
BB7_11:
.loc 2 14 42
add.s32 %r45, %r7, %r6;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 14 42
st.global.f64 [%rd8], %fd25;
.loc 2 14 22
add.s32 %r18, %r3, %r7;
.loc 2 14 1
setp.lt.s32 %p7, %r18, %r20;
mov.u32 %r49, %r18;
@%p7 bra BB7_4;
BB7_12:
.loc 2 14 22
mov.u32 %r46, %nctaid.x;
mad.lo.s32 %r48, %r46, %r24, %r48;
.loc 2 14 1
setp.lt.s32 %p8, %r48, %r21;
@%p8 bra BB7_2;
BB7_13:
.loc 2 14 2
ret;
}
.visible .entry map_ceil_double(
.param .u32 map_ceil_double_param_0,
.param .u32 map_ceil_double_param_1,
.param .u64 map_ceil_double_param_2,
.param .u32 map_ceil_double_param_3,
.param .u64 map_ceil_double_param_4,
.param .u32 map_ceil_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_ceil_double_param_0];
ld.param.u32 %r13, [map_ceil_double_param_1];
ld.param.u64 %rd3, [map_ceil_double_param_2];
ld.param.u32 %r14, [map_ceil_double_param_3];
ld.param.u64 %rd4, [map_ceil_double_param_4];
ld.param.u32 %r15, [map_ceil_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 15 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 15 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB8_6;
.loc 2 15 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 15 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 15 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB8_2:
.loc 2 15 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB8_5;
.loc 2 15 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 15 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB8_4:
.loc 2 15 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 15 1
ld.global.f64 %fd1, [%rd6];
.loc 4 2795 10
cvt.rpi.f64.f64 %fd2, %fd1;
.loc 2 15 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 15 42
st.global.f64 [%rd8], %fd2;
.loc 2 15 22
add.s32 %r10, %r5, %r9;
.loc 2 15 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB8_4;
BB8_5:
.loc 2 15 22
add.s32 %r25, %r4, %r25;
.loc 2 15 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB8_2;
BB8_6:
.loc 2 15 2
ret;
}
.visible .entry map_cos_double(
.param .u32 map_cos_double_param_0,
.param .u32 map_cos_double_param_1,
.param .u64 map_cos_double_param_2,
.param .u32 map_cos_double_param_3,
.param .u64 map_cos_double_param_4,
.param .u32 map_cos_double_param_5
)
{
.local .align 4 .b8 __local_depot9[4];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<10>;
.reg .s32 %r<41>;
.reg .s64 %rd<16>;
.reg .f64 %fd<43>;
mov.u64 %SPL, __local_depot9;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r14, [map_cos_double_param_0];
ld.param.u32 %r15, [map_cos_double_param_1];
ld.param.u64 %rd1, [map_cos_double_param_2];
ld.param.u32 %r16, [map_cos_double_param_3];
ld.param.u64 %rd2, [map_cos_double_param_4];
ld.param.u32 %r17, [map_cos_double_param_5];
.loc 2 16 1
mov.u32 %r18, %ntid.x;
mov.u32 %r19, %ctaid.x;
mov.u32 %r20, %tid.x;
mad.lo.s32 %r38, %r18, %r19, %r20;
.loc 2 16 1
setp.ge.s32 %p1, %r38, %r15;
@%p1 bra BB9_14;
.loc 2 16 1
mov.u32 %r21, %ntid.y;
.loc 2 16 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r2, %r22, %r21;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd13, %rd1;
BB9_2:
.loc 2 16 1
mov.u32 %r23, %ctaid.y;
mov.u32 %r25, %tid.y;
mad.lo.s32 %r39, %r21, %r23, %r25;
.loc 2 16 1
setp.ge.s32 %p2, %r39, %r14;
@%p2 bra BB9_13;
.loc 2 16 1
mul.lo.s32 %r4, %r38, %r17;
.loc 2 16 42
mul.lo.s32 %r5, %r38, %r16;
BB9_4:
.loc 2 16 1
add.s32 %r30, %r39, %r4;
mul.wide.s32 %rd4, %r30, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd40, [%rd5];
.loc 3 203 10
abs.f64 %fd14, %fd40;
setp.neu.f64 %p3, %fd14, 0d7FF0000000000000;
@%p3 bra BB9_6;
mov.f64 %fd15, 0d0000000000000000;
.loc 3 203 10
mul.rn.f64 %fd40, %fd40, %fd15;
BB9_6:
add.u64 %rd6, %SP, 0;
.loc 3 203 10
mul.f64 %fd16, %fd40, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r40, %fd16;
cvta.to.local.u64 %rd7, %rd6;
.loc 3 203 10
st.local.u32 [%rd7], %r40;
cvt.rn.f64.s32 %fd17, %r40;
neg.f64 %fd18, %fd17;
mov.f64 %fd19, 0d3FF921FB54442D18;
.loc 3 203 10
fma.rn.f64 %fd20, %fd18, %fd19, %fd40;
mov.f64 %fd21, 0d3C91A62633145C00;
.loc 3 203 10
fma.rn.f64 %fd22, %fd18, %fd21, %fd20;
mov.f64 %fd23, 0d397B839A252049C0;
.loc 3 203 10
fma.rn.f64 %fd41, %fd18, %fd23, %fd22;
abs.f64 %fd24, %fd40;
setp.leu.f64 %p4, %fd24, 0d41E0000000000000;
@%p4 bra BB9_8;
// Callseq Start 0
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd40;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 3 203 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd41, [retval0+0];
}
// Callseq End 0
ld.local.u32 %r40, [%rd7];
BB9_8:
add.s32 %r11, %r40, 1;
shl.b32 %r31, %r11, 3;
and.b32 %r32, %r31, 8;
and.b32 %r33, %r11, 1;
setp.eq.b32 %p5, %r33, 1;
not.pred %p6, %p5;
selp.f64 %fd25, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p6;
mul.wide.u32 %rd10, %r32, 8;
mov.u64 %rd11, __cudart_sin_cos_coeffs;
add.s64 %rd12, %rd11, %rd10;
.loc 3 203 10
ld.const.f64 %fd26, [%rd12+8];
mul.rn.f64 %fd7, %fd41, %fd41;
fma.rn.f64 %fd27, %fd25, %fd7, %fd26;
ld.const.f64 %fd28, [%rd12+16];
fma.rn.f64 %fd29, %fd27, %fd7, %fd28;
ld.const.f64 %fd30, [%rd12+24];
fma.rn.f64 %fd31, %fd29, %fd7, %fd30;
ld.const.f64 %fd32, [%rd12+32];
fma.rn.f64 %fd33, %fd31, %fd7, %fd32;
ld.const.f64 %fd34, [%rd12+40];
fma.rn.f64 %fd35, %fd33, %fd7, %fd34;
ld.const.f64 %fd36, [%rd12+48];
fma.rn.f64 %fd8, %fd35, %fd7, %fd36;
fma.rn.f64 %fd42, %fd8, %fd41, %fd41;
@%p6 bra BB9_10;
mov.f64 %fd37, 0d3FF0000000000000;
.loc 3 203 10
fma.rn.f64 %fd42, %fd8, %fd7, %fd37;
BB9_10:
and.b32 %r34, %r11, 2;
setp.eq.s32 %p7, %r34, 0;
@%p7 bra BB9_12;
mov.f64 %fd38, 0d0000000000000000;
mov.f64 %fd39, 0dBFF0000000000000;
.loc 3 203 10
fma.rn.f64 %fd42, %fd42, %fd39, %fd38;
BB9_12:
.loc 2 16 42
add.s32 %r35, %r39, %r5;
mul.wide.s32 %rd14, %r35, 8;
add.s64 %rd15, %rd13, %rd14;
st.global.f64 [%rd15], %fd42;
.loc 2 16 22
add.s32 %r39, %r2, %r39;
.loc 2 16 1
setp.lt.s32 %p8, %r39, %r14;
@%p8 bra BB9_4;
BB9_13:
.loc 2 16 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r18, %r38;
.loc 2 16 1
setp.lt.s32 %p9, %r38, %r15;
@%p9 bra BB9_2;
BB9_14:
.loc 2 16 2
ret;
}
.visible .entry map_cosh_double(
.param .u32 map_cosh_double_param_0,
.param .u32 map_cosh_double_param_1,
.param .u64 map_cosh_double_param_2,
.param .u32 map_cosh_double_param_3,
.param .u64 map_cosh_double_param_4,
.param .u32 map_cosh_double_param_5
)
{
.reg .pred %p<8>;
.reg .f32 %f<5>;
.reg .s32 %r<42>;
.reg .s64 %rd<9>;
.reg .f64 %fd<50>;
ld.param.u32 %r15, [map_cosh_double_param_0];
ld.param.u32 %r16, [map_cosh_double_param_1];
ld.param.u64 %rd3, [map_cosh_double_param_2];
ld.param.u32 %r17, [map_cosh_double_param_3];
ld.param.u64 %rd4, [map_cosh_double_param_4];
ld.param.u32 %r18, [map_cosh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 17 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r39, %r19, %r20, %r21;
.loc 2 17 1
setp.ge.s32 %p1, %r39, %r16;
@%p1 bra BB10_12;
.loc 2 17 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 2 17 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB10_2:
.loc 2 17 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB10_11;
.loc 2 17 1
mul.lo.s32 %r5, %r39, %r18;
.loc 2 17 42
mul.lo.s32 %r6, %r39, %r17;
mov.u32 %r40, %r2;
BB10_4:
.loc 2 17 1
mov.u32 %r7, %r40;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 17 1
ld.global.f64 %fd1, [%rd6];
.loc 3 263 10
abs.f64 %fd2, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd2;
}
setp.lt.u32 %p3, %r27, 1082536911;
@%p3 bra BB10_6;
setp.gt.f64 %p4, %fd2, 0d0000000000000000;
selp.f64 %fd9, 0d7FF0000000000000, %fd1, %p4;
add.f64 %fd49, %fd9, %fd9;
bra.uni BB10_10;
BB10_6:
.loc 3 263 10
mul.f64 %fd10, %fd2, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd11, %fd10;
cvt.rzi.s32.f64 %r8, %fd11;
mov.f64 %fd12, 0dBFE62E42FEFA39EF;
.loc 3 263 10
fma.rn.f64 %fd13, %fd11, %fd12, %fd2;
mov.f64 %fd14, 0dBC7ABC9E3B39803F;
.loc 3 263 10
fma.rn.f64 %fd15, %fd11, %fd14, %fd13;
mov.f64 %fd16, 0d3E928A27E30F5561;
mov.f64 %fd17, 0d3E5AE6449C0686C0;
.loc 3 263 10
fma.rn.f64 %fd18, %fd17, %fd15, %fd16;
mov.f64 %fd19, 0d3EC71DE8E6486D6B;
.loc 3 263 10
fma.rn.f64 %fd20, %fd18, %fd15, %fd19;
mov.f64 %fd21, 0d3EFA019A6B2464C5;
.loc 3 263 10
fma.rn.f64 %fd22, %fd20, %fd15, %fd21;
mov.f64 %fd23, 0d3F2A01A0171064A5;
.loc 3 263 10
fma.rn.f64 %fd24, %fd22, %fd15, %fd23;
mov.f64 %fd25, 0d3F56C16C17F29C8D;
.loc 3 263 10
fma.rn.f64 %fd26, %fd24, %fd15, %fd25;
mov.f64 %fd27, 0d3F8111111111A24E;
.loc 3 263 10
fma.rn.f64 %fd28, %fd26, %fd15, %fd27;
mov.f64 %fd29, 0d3FA555555555211D;
.loc 3 263 10
fma.rn.f64 %fd30, %fd28, %fd15, %fd29;
mov.f64 %fd31, 0d3FC5555555555530;
.loc 3 263 10
fma.rn.f64 %fd32, %fd30, %fd15, %fd31;
mov.f64 %fd33, 0d3FE0000000000005;
.loc 3 263 10
fma.rn.f64 %fd34, %fd32, %fd15, %fd33;
mov.f64 %fd35, 0d3FF0000000000000;
.loc 3 263 10
fma.rn.f64 %fd36, %fd34, %fd15, %fd35;
fma.rn.f64 %fd48, %fd36, %fd15, %fd35;
add.s32 %r9, %r8, -2;
abs.s32 %r28, %r9;
setp.lt.s32 %p5, %r28, 1023;
@%p5 bra BB10_8;
add.s32 %r29, %r8, 2044;
shl.b32 %r30, %r29, 19;
and.b32 %r31, %r30, -1048576;
shl.b32 %r32, %r29, 20;
sub.s32 %r41, %r32, %r31;
mov.u32 %r33, 0;
.loc 3 263 10
mov.b64 %fd37, {%r33, %r31};
mul.f64 %fd48, %fd48, %fd37;
bra.uni BB10_9;
BB10_8:
.loc 3 263 10
shl.b32 %r34, %r9, 20;
add.s32 %r41, %r34, 1072693248;
BB10_9:
mov.u32 %r35, 0;
.loc 3 263 10
mov.b64 %fd40, {%r35, %r41};
mul.f64 %fd38, %fd48, %fd40;
// inline asm
cvt.rn.f32.f64 %f1,%fd38;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd39,%f2;
// inline asm
neg.f64 %fd41, %fd38;
fma.rn.f64 %fd43, %fd41, %fd39, %fd35;
fma.rn.f64 %fd44, %fd43, %fd43, %fd43;
fma.rn.f64 %fd45, %fd44, %fd39, %fd39;
mul.f64 %fd46, %fd45, 0d3FC0000000000000;
mov.f64 %fd47, 0d4000000000000000;
.loc 3 263 10
fma.rn.f64 %fd49, %fd47, %fd38, %fd46;
BB10_10:
.loc 2 17 42
add.s32 %r36, %r7, %r6;
mul.wide.s32 %rd7, %r36, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 17 42
st.global.f64 [%rd8], %fd49;
.loc 2 17 22
add.s32 %r13, %r3, %r7;
.loc 2 17 1
setp.lt.s32 %p6, %r13, %r15;
mov.u32 %r40, %r13;
@%p6 bra BB10_4;
BB10_11:
.loc 2 17 22
mov.u32 %r37, %nctaid.x;
mad.lo.s32 %r39, %r37, %r19, %r39;
.loc 2 17 1
setp.lt.s32 %p7, %r39, %r16;
@%p7 bra BB10_2;
BB10_12:
.loc 2 17 2
ret;
}
.visible .entry map_cospi_double(
.param .u32 map_cospi_double_param_0,
.param .u32 map_cospi_double_param_1,
.param .u64 map_cospi_double_param_2,
.param .u32 map_cospi_double_param_3,
.param .u64 map_cospi_double_param_4,
.param .u32 map_cospi_double_param_5
)
{
.reg .pred %p<9>;
.reg .s32 %r<43>;
.reg .s64 %rd<13>;
.reg .f64 %fd<37>;
ld.param.u32 %r11, [map_cospi_double_param_0];
ld.param.u32 %r12, [map_cospi_double_param_1];
ld.param.u64 %rd2, [map_cospi_double_param_2];
ld.param.u32 %r13, [map_cospi_double_param_3];
ld.param.u64 %rd3, [map_cospi_double_param_4];
ld.param.u32 %r14, [map_cospi_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 2 18 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r41, %r15, %r16, %r17;
.loc 2 18 1
setp.ge.s32 %p1, %r41, %r12;
@%p1 bra BB11_12;
.loc 2 18 1
mov.u32 %r18, %ntid.y;
.loc 2 18 22
mov.u32 %r19, %nctaid.y;
mul.lo.s32 %r2, %r19, %r18;
cvta.to.global.u64 %rd10, %rd2;
BB11_2:
.loc 2 18 1
mov.u32 %r20, %ctaid.y;
mov.u32 %r22, %tid.y;
mad.lo.s32 %r42, %r18, %r20, %r22;
.loc 2 18 1
setp.ge.s32 %p2, %r42, %r11;
@%p2 bra BB11_11;
.loc 2 18 1
mul.lo.s32 %r4, %r41, %r14;
.loc 2 18 42
mul.lo.s32 %r5, %r41, %r13;
BB11_4:
.loc 2 18 1
add.s32 %r27, %r42, %r4;
mul.wide.s32 %rd4, %r27, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 2 18 1
ld.global.f64 %fd35, [%rd5];
.loc 3 213 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd35;
}
shl.b32 %r29, %r28, 1;
setp.lt.u32 %p3, %r29, -2038431743;
@%p3 bra BB11_6;
mov.f64 %fd11, 0d0000000000000000;
.loc 3 213 10
mul.rn.f64 %fd35, %fd35, %fd11;
BB11_6:
{
.reg .b32 %temp;
mov.b64 {%temp, %r30}, %fd35;
}
add.s32 %r31, %r30, 1048576;
{
.reg .b32 %temp;
mov.b64 {%r32, %temp}, %fd35;
}
mov.b64 %fd12, {%r32, %r31};
cvt.rni.f64.f64 %fd13, %fd12;
cvt.rzi.s64.f64 %rd6, %fd13;
cvt.u32.u64 %r33, %rd6;
neg.f64 %fd14, %fd13;
mov.f64 %fd15, 0d3FE0000000000000;
.loc 3 213 10
fma.rn.f64 %fd16, %fd14, %fd15, %fd35;
mul.f64 %fd17, %fd16, 0d3CA1A62633145C07;
mov.f64 %fd18, 0d400921FB54442D18;
.loc 3 213 10
fma.rn.f64 %fd19, %fd16, %fd18, %fd17;
add.s32 %r8, %r33, 1;
shl.b32 %r34, %r8, 3;
and.b32 %r35, %r34, 8;
mul.rn.f64 %fd4, %fd19, %fd19;
and.b32 %r36, %r8, 1;
setp.eq.b32 %p4, %r36, 1;
not.pred %p5, %p4;
selp.f64 %fd20, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p5;
mul.wide.u32 %rd7, %r35, 8;
mov.u64 %rd8, __cudart_sin_cos_coeffs;
add.s64 %rd9, %rd8, %rd7;
.loc 3 213 10
ld.const.f64 %fd21, [%rd9+8];
fma.rn.f64 %fd22, %fd20, %fd4, %fd21;
ld.const.f64 %fd23, [%rd9+16];
fma.rn.f64 %fd24, %fd22, %fd4, %fd23;
ld.const.f64 %fd25, [%rd9+24];
fma.rn.f64 %fd26, %fd24, %fd4, %fd25;
ld.const.f64 %fd27, [%rd9+32];
fma.rn.f64 %fd28, %fd26, %fd4, %fd27;
ld.const.f64 %fd29, [%rd9+40];
fma.rn.f64 %fd30, %fd28, %fd4, %fd29;
ld.const.f64 %fd31, [%rd9+48];
fma.rn.f64 %fd5, %fd30, %fd4, %fd31;
fma.rn.f64 %fd36, %fd5, %fd19, %fd19;
@%p5 bra BB11_8;
mov.f64 %fd32, 0d3FF0000000000000;
.loc 3 213 10
fma.rn.f64 %fd36, %fd5, %fd4, %fd32;
BB11_8:
and.b32 %r37, %r8, 2;
setp.eq.s32 %p6, %r37, 0;
@%p6 bra BB11_10;
mov.f64 %fd33, 0d0000000000000000;
mov.f64 %fd34, 0dBFF0000000000000;
.loc 3 213 10
fma.rn.f64 %fd36, %fd36, %fd34, %fd33;
BB11_10:
.loc 2 18 42
add.s32 %r38, %r42, %r5;
mul.wide.s32 %rd11, %r38, 8;
add.s64 %rd12, %rd10, %rd11;
.loc 2 18 42
st.global.f64 [%rd12], %fd36;
.loc 2 18 22
add.s32 %r42, %r2, %r42;
.loc 2 18 1
setp.lt.s32 %p7, %r42, %r11;
@%p7 bra BB11_4;
BB11_11:
.loc 2 18 22
mov.u32 %r39, %nctaid.x;
mad.lo.s32 %r41, %r39, %r15, %r41;
.loc 2 18 1
setp.lt.s32 %p8, %r41, %r12;
@%p8 bra BB11_2;
BB11_12:
.loc 2 18 2
ret;
}
.visible .entry map_erfc_double(
.param .u32 map_erfc_double_param_0,
.param .u32 map_erfc_double_param_1,
.param .u64 map_erfc_double_param_2,
.param .u32 map_erfc_double_param_3,
.param .u64 map_erfc_double_param_4,
.param .u32 map_erfc_double_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<9>;
.reg .s32 %r<47>;
.reg .s64 %rd<9>;
.reg .f64 %fd<127>;
ld.param.u32 %r13, [map_erfc_double_param_0];
ld.param.u32 %r14, [map_erfc_double_param_1];
ld.param.u64 %rd1, [map_erfc_double_param_2];
ld.param.u32 %r15, [map_erfc_double_param_3];
ld.param.u64 %rd2, [map_erfc_double_param_4];
ld.param.u32 %r16, [map_erfc_double_param_5];
.loc 2 19 1
mov.u32 %r17, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r44, %r17, %r18, %r19;
.loc 2 19 1
setp.ge.s32 %p1, %r44, %r14;
@%p1 bra BB12_11;
.loc 2 19 1
mov.u32 %r20, %ntid.y;
.loc 2 19 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r2, %r21, %r20;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB12_2:
.loc 2 19 1
mov.u32 %r22, %ctaid.y;
mov.u32 %r24, %tid.y;
mad.lo.s32 %r45, %r20, %r22, %r24;
.loc 2 19 1
setp.ge.s32 %p2, %r45, %r13;
@%p2 bra BB12_10;
BB12_3:
.loc 2 19 1
mul.lo.s32 %r43, %r44, %r16;
add.s32 %r29, %r45, %r43;
mul.wide.s32 %rd4, %r29, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 2 19 1
ld.global.f64 %fd1, [%rd5];
.loc 3 399 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d7FF0000000000000;
@%p3 bra BB12_8;
abs.f64 %fd124, %fd1;
add.f64 %fd15, %fd124, 0dC010000000000000;
mov.f64 %fd16, 0dC010000000000000;
.loc 3 399 10
add.f64 %fd11, %fd124, 0d4010000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd11;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd12,%f2;
// inline asm
neg.f64 %fd17, %fd11;
mov.f64 %fd18, 0d3FF0000000000000;
.loc 3 399 10
fma.rn.f64 %fd19, %fd17, %fd12, %fd18;
fma.rn.f64 %fd20, %fd19, %fd19, %fd19;
fma.rn.f64 %fd21, %fd20, %fd12, %fd12;
mul.f64 %fd22, %fd15, %fd21;
add.rn.f64 %fd23, %fd22, %fd18;
fma.rn.f64 %fd24, %fd16, %fd23, %fd124;
neg.f64 %fd25, %fd22;
fma.rn.f64 %fd26, %fd25, %fd124, %fd24;
fma.rn.f64 %fd27, %fd21, %fd26, %fd22;
mov.f64 %fd28, 0dBE44E1C6FD03D328;
mov.f64 %fd29, 0dBDF8774AD4E0BFD7;
.loc 3 399 10
fma.rn.f64 %fd30, %fd29, %fd27, %fd28;
mov.f64 %fd31, 0dBE4330149F7A56B6;
.loc 3 399 10
fma.rn.f64 %fd32, %fd30, %fd27, %fd31;
mov.f64 %fd33, 0d3E7BEDDED8376273;
.loc 3 399 10
fma.rn.f64 %fd34, %fd32, %fd27, %fd33;
mov.f64 %fd35, 0d3E6F9254C3ABF22B;
.loc 3 399 10
fma.rn.f64 %fd36, %fd34, %fd27, %fd35;
mov.f64 %fd37, 0dBEAB9068C2148CF0;
.loc 3 399 10
fma.rn.f64 %fd38, %fd36, %fd27, %fd37;
mov.f64 %fd39, 0d3E94C6454DB34009;
.loc 3 399 10
fma.rn.f64 %fd40, %fd38, %fd27, %fd39;
mov.f64 %fd41, 0d3ED7F1C378F2311D;
.loc 3 399 10
fma.rn.f64 %fd42, %fd40, %fd27, %fd41;
mov.f64 %fd43, 0dBEE78E051C6D5C58;
.loc 3 399 10
fma.rn.f64 %fd44, %fd42, %fd27, %fd43;
mov.f64 %fd45, 0dBEF995B4EAD14A90;
.loc 3 399 10
fma.rn.f64 %fd46, %fd44, %fd27, %fd45;
mov.f64 %fd47, 0d3F23BE27CF0A29B2;
.loc 3 399 10
fma.rn.f64 %fd48, %fd46, %fd27, %fd47;
mov.f64 %fd49, 0dBF2A1DEF3E81672E;
.loc 3 399 10
fma.rn.f64 %fd50, %fd48, %fd27, %fd49;
mov.f64 %fd51, 0dBF48D4ABE68C1713;
.loc 3 399 10
fma.rn.f64 %fd52, %fd50, %fd27, %fd51;
mov.f64 %fd53, 0d3F749C67210DD6B4;
.loc 3 399 10
fma.rn.f64 %fd54, %fd52, %fd27, %fd53;
mov.f64 %fd55, 0dBF9096238568E357;
.loc 3 399 10
fma.rn.f64 %fd56, %fd54, %fd27, %fd55;
mov.f64 %fd57, 0d3FA3079EDF8C2DC9;
.loc 3 399 10
fma.rn.f64 %fd58, %fd56, %fd27, %fd57;
mov.f64 %fd59, 0dBFB0FB06DFF601FC;
.loc 3 399 10
fma.rn.f64 %fd60, %fd58, %fd27, %fd59;
mov.f64 %fd61, 0d3FB7FEE004DFBCDC;
.loc 3 399 10
fma.rn.f64 %fd62, %fd60, %fd27, %fd61;
mov.f64 %fd63, 0dBFB9DDB23C3DB8C6;
.loc 3 399 10
fma.rn.f64 %fd64, %fd62, %fd27, %fd63;
mov.f64 %fd65, 0d3FB16ECEFCFA5FDA;
.loc 3 399 10
fma.rn.f64 %fd66, %fd64, %fd27, %fd65;
mov.f64 %fd67, 0d3F8F7F5DF66FB6D6;
.loc 3 399 10
fma.rn.f64 %fd68, %fd66, %fd27, %fd67;
mov.f64 %fd69, 0dBFC1DF1AD154A29D;
.loc 3 399 10
fma.rn.f64 %fd70, %fd68, %fd27, %fd69;
mov.f64 %fd71, 0d3FF3BA5916E9FD7F;
.loc 3 399 10
fma.rn.f64 %fd72, %fd70, %fd27, %fd71;
mov.f64 %fd73, 0d4000000000000000;
.loc 3 399 10
fma.rn.f64 %fd13, %fd73, %fd124, %fd18;
// inline asm
cvt.rn.f32.f64 %f5,%fd13;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd14,%f6;
// inline asm
neg.f64 %fd74, %fd13;
fma.rn.f64 %fd75, %fd74, %fd14, %fd18;
fma.rn.f64 %fd76, %fd75, %fd75, %fd75;
fma.rn.f64 %fd77, %fd76, %fd14, %fd14;
mul.f64 %fd78, %fd72, %fd77;
mul.f64 %fd79, %fd78, 0dC000000000000000;
fma.rn.f64 %fd80, %fd124, %fd79, %fd72;
neg.f64 %fd81, %fd78;
add.rn.f64 %fd82, %fd80, %fd81;
fma.rn.f64 %fd3, %fd82, %fd77, %fd78;
neg.f64 %fd83, %fd124;
mul.f64 %fd4, %fd124, %fd83;
mul.f64 %fd84, %fd4, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd85, %fd84;
cvt.rzi.s32.f64 %r7, %fd85;
mov.f64 %fd86, 0dBFE62E42FEFA39EF;
.loc 3 399 10
fma.rn.f64 %fd87, %fd85, %fd86, %fd4;
mov.f64 %fd88, 0dBC7ABC9E3B39803F;
.loc 3 399 10
fma.rn.f64 %fd89, %fd85, %fd88, %fd87;
mov.f64 %fd90, 0d3E928A27E30F5561;
mov.f64 %fd91, 0d3E5AE6449C0686C0;
.loc 3 399 10
fma.rn.f64 %fd92, %fd91, %fd89, %fd90;
mov.f64 %fd93, 0d3EC71DE8E6486D6B;
.loc 3 399 10
fma.rn.f64 %fd94, %fd92, %fd89, %fd93;
mov.f64 %fd95, 0d3EFA019A6B2464C5;
.loc 3 399 10
fma.rn.f64 %fd96, %fd94, %fd89, %fd95;
mov.f64 %fd97, 0d3F2A01A0171064A5;
.loc 3 399 10
fma.rn.f64 %fd98, %fd96, %fd89, %fd97;
mov.f64 %fd99, 0d3F56C16C17F29C8D;
.loc 3 399 10
fma.rn.f64 %fd100, %fd98, %fd89, %fd99;
mov.f64 %fd101, 0d3F8111111111A24E;
.loc 3 399 10
fma.rn.f64 %fd102, %fd100, %fd89, %fd101;
mov.f64 %fd103, 0d3FA555555555211D;
.loc 3 399 10
fma.rn.f64 %fd104, %fd102, %fd89, %fd103;
mov.f64 %fd105, 0d3FC5555555555530;
.loc 3 399 10
fma.rn.f64 %fd106, %fd104, %fd89, %fd105;
mov.f64 %fd107, 0d3FE0000000000005;
.loc 3 399 10
fma.rn.f64 %fd108, %fd106, %fd89, %fd107;
fma.rn.f64 %fd109, %fd108, %fd89, %fd18;
fma.rn.f64 %fd125, %fd109, %fd89, %fd18;
abs.s32 %r30, %r7;
setp.lt.s32 %p4, %r30, 1023;
@%p4 bra BB12_6;
add.s32 %r31, %r7, 2046;
shl.b32 %r32, %r31, 19;
and.b32 %r33, %r32, -1048576;
shl.b32 %r34, %r31, 20;
sub.s32 %r46, %r34, %r33;
mov.u32 %r35, 0;
.loc 3 399 10
mov.b64 %fd110, {%r35, %r33};
mul.f64 %fd125, %fd125, %fd110;
bra.uni BB12_7;
BB12_6:
.loc 3 399 10
shl.b32 %r36, %r7, 20;
add.s32 %r46, %r36, 1072693248;
BB12_7:
abs.f64 %fd123, %fd1;
mov.f64 %fd122, 0d4000000000000000;
.loc 3 399 10
neg.f64 %fd121, %fd123;
mov.u32 %r37, 0;
.loc 3 399 10
mov.b64 %fd111, {%r37, %r46};
mul.f64 %fd112, %fd125, %fd111;
neg.f64 %fd114, %fd4;
fma.rn.f64 %fd115, %fd121, %fd123, %fd114;
fma.rn.f64 %fd116, %fd112, %fd115, %fd112;
mul.f64 %fd117, %fd3, %fd116;
{
.reg .b32 %temp;
mov.b64 {%temp, %r38}, %fd123;
}
setp.gt.s32 %p5, %r38, 1077624832;
selp.f64 %fd118, 0d0000000000000000, %fd117, %p5;
{
.reg .b32 %temp;
mov.b64 {%temp, %r39}, %fd1;
}
setp.lt.s32 %p6, %r39, 0;
sub.f64 %fd120, %fd122, %fd118;
selp.f64 %fd126, %fd120, %fd118, %p6;
bra.uni BB12_9;
BB12_8:
.loc 3 399 10
add.f64 %fd126, %fd1, %fd1;
BB12_9:
.loc 2 19 42
mad.lo.s32 %r40, %r44, %r15, %r45;
mul.wide.s32 %rd7, %r40, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 19 42
st.global.f64 [%rd8], %fd126;
.loc 2 19 22
add.s32 %r45, %r2, %r45;
.loc 2 19 1
setp.lt.s32 %p7, %r45, %r13;
@%p7 bra BB12_3;
BB12_10:
.loc 2 19 22
mov.u32 %r41, %nctaid.x;
mad.lo.s32 %r44, %r41, %r17, %r44;
.loc 2 19 1
setp.lt.s32 %p8, %r44, %r14;
@%p8 bra BB12_2;
BB12_11:
.loc 2 19 2
ret;
}
.visible .entry map_erfcinv_double(
.param .u32 map_erfcinv_double_param_0,
.param .u32 map_erfcinv_double_param_1,
.param .u64 map_erfcinv_double_param_2,
.param .u32 map_erfcinv_double_param_3,
.param .u64 map_erfcinv_double_param_4,
.param .u32 map_erfcinv_double_param_5
)
{
.reg .pred %p<19>;
.reg .f32 %f<17>;
.reg .s32 %r<69>;
.reg .s64 %rd<9>;
.reg .f64 %fd<261>;
ld.param.u32 %r20, [map_erfcinv_double_param_0];
ld.param.u32 %r21, [map_erfcinv_double_param_1];
ld.param.u64 %rd2, [map_erfcinv_double_param_2];
ld.param.u32 %r22, [map_erfcinv_double_param_3];
ld.param.u64 %rd3, [map_erfcinv_double_param_4];
ld.param.u32 %r23, [map_erfcinv_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 2 20 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r63, %r24, %r25, %r26;
.loc 2 20 1
setp.ge.s32 %p1, %r63, %r21;
@%p1 bra BB13_23;
.loc 2 20 1
mov.u32 %r27, %ntid.y;
.loc 2 20 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
cvta.to.global.u64 %rd6, %rd2;
BB13_2:
.loc 2 20 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r64, %r27, %r29, %r31;
.loc 2 20 1
setp.ge.s32 %p2, %r64, %r20;
@%p2 bra BB13_22;
BB13_3:
.loc 2 20 1
mul.lo.s32 %r62, %r63, %r23;
add.s32 %r36, %r64, %r62;
mul.wide.s32 %rd4, %r36, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 2 20 1
ld.global.f64 %fd1, [%rd5];
.loc 3 389 10
neg.f64 %fd2, %fd1;
mov.f64 %fd18, 0d4000000000000000;
.loc 3 389 10
add.rn.f64 %fd3, %fd18, %fd2;
setp.le.f64 %p3, %fd1, 0d3FFFFC0B65AA4E0E;
setp.ge.f64 %p4, %fd1, 0d3F4FA4D2AD8F904D;
and.pred %p5, %p4, %p3;
@%p5 bra BB13_20;
setp.gt.f64 %p6, %fd1, 0d3FF0000000000000;
selp.f64 %fd4, %fd3, %fd1, %p6;
setp.ltu.f64 %p7, %fd4, 0d2B2BFF2EE48E0530;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd4;
}
@%p7 bra BB13_6;
shr.u32 %r37, %r65, 20;
and.b32 %r38, %r37, 2046;
add.s32 %r39, %r38, -1022;
cvt.rn.f64.s32 %fd23, %r39;
and.b32 %r40, %r65, -2145386497;
add.s32 %r41, %r40, 1071644672;
mov.b64 %fd24, {%r66, %r41};
add.f64 %fd25, %fd24, 0dBFF0000000000000;
add.f64 %fd19, %fd24, 0d3FF0000000000000;
mov.f64 %fd26, 0d3FF0000000000000;
.loc 3 389 10
// inline asm
cvt.rn.f32.f64 %f1,%fd19;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd20,%f2;
// inline asm
neg.f64 %fd27, %fd19;
fma.rn.f64 %fd28, %fd27, %fd20, %fd26;
fma.rn.f64 %fd29, %fd28, %fd28, %fd28;
fma.rn.f64 %fd30, %fd29, %fd20, %fd20;
mul.f64 %fd31, %fd25, %fd30;
mov.f64 %fd32, 0dC000000000000000;
.loc 3 389 10
fma.rn.f64 %fd33, %fd32, %fd31, %fd25;
neg.f64 %fd34, %fd31;
fma.rn.f64 %fd35, %fd34, %fd25, %fd33;
fma.rn.f64 %fd36, %fd35, %fd30, %fd31;
mul.f64 %fd37, %fd36, %fd36;
mov.f64 %fd38, 0d3FA55CF59CDC5D89;
mov.f64 %fd39, 0d3FB5C5C218C775C9;
.loc 3 389 10
fma.rn.f64 %fd40, %fd39, %fd37, %fd38;
mov.f64 %fd41, 0d3FAEFD18CF6EBB9C;
.loc 3 389 10
fma.rn.f64 %fd42, %fd40, %fd37, %fd41;
mov.f64 %fd43, 0d3FB10682EDCB8D1B;
.loc 3 389 10
fma.rn.f64 %fd44, %fd42, %fd37, %fd43;
mov.f64 %fd45, 0d3FB3B1DD3AC7FC96;
.loc 3 389 10
fma.rn.f64 %fd46, %fd44, %fd37, %fd45;
mov.f64 %fd47, 0d3FB745CB459B54A6;
.loc 3 389 10
fma.rn.f64 %fd48, %fd46, %fd37, %fd47;
mov.f64 %fd49, 0d3FBC71C741A0669F;
.loc 3 389 10
fma.rn.f64 %fd50, %fd48, %fd37, %fd49;
mov.f64 %fd51, 0d3FC249249209112E;
.loc 3 389 10
fma.rn.f64 %fd52, %fd50, %fd37, %fd51;
mov.f64 %fd53, 0d3FC99999999A06C1;
.loc 3 389 10
fma.rn.f64 %fd54, %fd52, %fd37, %fd53;
mov.f64 %fd55, 0d3FD5555555555535;
.loc 3 389 10
fma.rn.f64 %fd56, %fd54, %fd37, %fd55;
mul.f64 %fd57, %fd56, %fd37;
fma.rn.f64 %fd58, %fd57, %fd36, %fd36;
add.f64 %fd59, %fd58, %fd58;
mov.f64 %fd60, 0d3FE62E42FEFA39EF;
.loc 3 389 10
fma.rn.f64 %fd61, %fd23, %fd60, %fd59;
neg.f64 %fd21, %fd61;
// inline asm
cvt.rn.f32.f64 %f5, %fd21;
// inline asm
// inline asm
rsqrt.approx.ftz.f32 %f6, %f5;
// inline asm
// inline asm
cvt.f64.f32 %fd22, %f6;
// inline asm
mul.rn.f64 %fd62, %fd22, %fd22;
neg.f64 %fd63, %fd62;
fma.rn.f64 %fd64, %fd21, %fd63, %fd26;
mov.f64 %fd65, 0d3FE0000000000000;
mov.f64 %fd66, 0d3FD8000000000000;
.loc 3 389 10
fma.rn.f64 %fd67, %fd66, %fd64, %fd65;
mul.rn.f64 %fd68, %fd64, %fd22;
fma.rn.f64 %fd69, %fd67, %fd68, %fd22;
mov.f64 %fd70, 0d4000A0E7333839AA;
mov.f64 %fd71, 0d3FEBE9222591AFAB;
.loc 3 389 10
fma.rn.f64 %fd72, %fd71, %fd69, %fd70;
mov.f64 %fd73, 0d4008768CF7E57D5C;
.loc 3 389 10
fma.rn.f64 %fd74, %fd72, %fd69, %fd73;
mov.f64 %fd75, 0d400B77E7E28DA583;
.loc 3 389 10
fma.rn.f64 %fd76, %fd74, %fd69, %fd75;
mov.f64 %fd77, 0d3FF34F26A4F99CF9;
.loc 3 389 10
fma.rn.f64 %fd78, %fd76, %fd69, %fd77;
mov.f64 %fd79, 0d3FC1F674ADB019ED;
.loc 3 389 10
fma.rn.f64 %fd80, %fd78, %fd69, %fd79;
mov.f64 %fd81, 0d3F75DDAE9506431D;
.loc 3 389 10
fma.rn.f64 %fd82, %fd80, %fd69, %fd81;
mov.f64 %fd83, 0d3F0ADA49AA32489C;
.loc 3 389 10
fma.rn.f64 %fd84, %fd82, %fd69, %fd83;
add.f64 %fd85, %fd69, 0d4001E90FF51C2197;
mov.f64 %fd86, 0d40111EA3A7CF3820;
.loc 3 389 10
fma.rn.f64 %fd87, %fd85, %fd69, %fd86;
mov.f64 %fd88, 0d4011A0E4A4749594;
.loc 3 389 10
fma.rn.f64 %fd89, %fd87, %fd69, %fd88;
mov.f64 %fd90, 0d400D4E977D38C14D;
.loc 3 389 10
fma.rn.f64 %fd91, %fd89, %fd69, %fd90;
mov.f64 %fd92, 0d3FF37FD567EC0D5F;
.loc 3 389 10
fma.rn.f64 %fd93, %fd91, %fd69, %fd92;
mov.f64 %fd94, 0d3FC1FB9D7F676033;
.loc 3 389 10
fma.rn.f64 %fd95, %fd93, %fd69, %fd94;
mov.f64 %fd96, 0d3F75DDCDF98946E4;
.loc 3 389 10
fma.rn.f64 %fd97, %fd95, %fd69, %fd96;
mov.f64 %fd98, 0d3F0ADA42D79D8DBB;
.loc 3 389 10
fma.rn.f64 %fd99, %fd97, %fd69, %fd98;
mul.f64 %fd100, %fd99, %fd69;
div.rn.f64 %fd259, %fd84, %fd100;
bra.uni BB13_19;
BB13_6:
.loc 3 389 10
setp.gt.f64 %p8, %fd4, 0d0000000000000000;
setp.lt.f64 %p9, %fd4, 0d7FF0000000000000;
and.pred %p10, %p8, %p9;
@%p10 bra BB13_12;
abs.f64 %fd101, %fd4;
setp.gtu.f64 %p11, %fd101, 0d7FF0000000000000;
@%p11 bra BB13_11;
setp.neu.f64 %p12, %fd4, 0d0000000000000000;
@%p12 bra BB13_10;
mov.f64 %fd258, 0dFFF0000000000000;
bra.uni BB13_18;
BB13_10:
.loc 3 389 10
setp.eq.f64 %p13, %fd4, 0d7FF0000000000000;
selp.f64 %fd258, %fd4, 0dFFF8000000000000, %p13;
bra.uni BB13_18;
BB13_11:
.loc 3 389 10
add.f64 %fd258, %fd4, %fd4;
bra.uni BB13_18;
BB13_12:
.loc 3 389 10
setp.lt.u32 %p14, %r65, 1048576;
@%p14 bra BB13_14;
mov.u32 %r67, -1023;
bra.uni BB13_15;
BB13_14:
.loc 3 389 10
mul.f64 %fd103, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd103;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd103;
}
mov.u32 %r67, -1077;
BB13_15:
.loc 3 389 10
shr.s32 %r44, %r65, 20;
add.s32 %r68, %r67, %r44;
and.b32 %r45, %r65, -2146435073;
or.b32 %r46, %r45, 1072693248;
mov.b64 %fd257, {%r66, %r46};
setp.lt.u32 %p15, %r46, 1073127583;
@%p15 bra BB13_17;
{
.reg .b32 %temp;
mov.b64 {%r47, %temp}, %fd257;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r48}, %fd257;
}
add.s32 %r49, %r48, -1048576;
mov.b64 %fd257, {%r47, %r49};
add.s32 %r68, %r68, 1;
BB13_17:
add.f64 %fd104, %fd257, 0d3FF0000000000000;
mov.f64 %fd106, 0d3FF0000000000000;
.loc 3 389 10
// inline asm
cvt.rn.f32.f64 %f9,%fd104;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd105,%f10;
// inline asm
neg.f64 %fd107, %fd104;
fma.rn.f64 %fd108, %fd107, %fd105, %fd106;
fma.rn.f64 %fd109, %fd108, %fd108, %fd108;
fma.rn.f64 %fd110, %fd109, %fd105, %fd105;
add.f64 %fd111, %fd257, 0dBFF0000000000000;
mul.f64 %fd112, %fd111, %fd110;
fma.rn.f64 %fd113, %fd111, %fd110, %fd112;
mul.f64 %fd114, %fd113, %fd113;
mov.f64 %fd115, 0d3ED0EE258B7A8B04;
mov.f64 %fd116, 0d3EB1380B3AE80F1E;
.loc 3 389 10
fma.rn.f64 %fd117, %fd116, %fd114, %fd115;
mov.f64 %fd118, 0d3EF3B2669F02676F;
.loc 3 389 10
fma.rn.f64 %fd119, %fd117, %fd114, %fd118;
mov.f64 %fd120, 0d3F1745CBA9AB0956;
.loc 3 389 10
fma.rn.f64 %fd121, %fd119, %fd114, %fd120;
mov.f64 %fd122, 0d3F3C71C72D1B5154;
.loc 3 389 10
fma.rn.f64 %fd123, %fd121, %fd114, %fd122;
mov.f64 %fd124, 0d3F624924923BE72D;
.loc 3 389 10
fma.rn.f64 %fd125, %fd123, %fd114, %fd124;
mov.f64 %fd126, 0d3F8999999999A3C4;
.loc 3 389 10
fma.rn.f64 %fd127, %fd125, %fd114, %fd126;
mov.f64 %fd128, 0d3FB5555555555554;
.loc 3 389 10
fma.rn.f64 %fd129, %fd127, %fd114, %fd128;
sub.f64 %fd130, %fd111, %fd113;
add.f64 %fd131, %fd130, %fd130;
neg.f64 %fd132, %fd113;
fma.rn.f64 %fd133, %fd132, %fd111, %fd131;
mul.f64 %fd134, %fd110, %fd133;
mul.f64 %fd135, %fd129, %fd114;
fma.rn.f64 %fd136, %fd135, %fd113, %fd134;
cvt.rn.f64.s32 %fd137, %r68;
mov.f64 %fd138, 0d3FE62E42FEFA39EF;
.loc 3 389 10
fma.rn.f64 %fd139, %fd137, %fd138, %fd113;
neg.s32 %r50, %r68;
cvt.rn.f64.s32 %fd140, %r50;
fma.rn.f64 %fd141, %fd140, %fd138, %fd139;
sub.f64 %fd142, %fd141, %fd113;
sub.f64 %fd143, %fd136, %fd142;
mov.f64 %fd144, 0d3C7ABC9E3B39803F;
.loc 3 389 10
fma.rn.f64 %fd145, %fd137, %fd144, %fd143;
add.f64 %fd258, %fd139, %fd145;
BB13_18:
neg.f64 %fd146, %fd258;
rsqrt.approx.f64 %fd147, %fd146;
mov.f64 %fd148, 0d3FFA2013964E259C;
mov.f64 %fd149, 0d3FE8E2101C71B0BF;
.loc 3 389 10
fma.rn.f64 %fd150, %fd149, %fd147, %fd148;
mov.f64 %fd151, 0d3FDABFE90921BE68;
.loc 3 389 10
fma.rn.f64 %fd152, %fd150, %fd147, %fd151;
mov.f64 %fd153, 0d3F97E41314DE00D4;
.loc 3 389 10
fma.rn.f64 %fd154, %fd152, %fd147, %fd153;
mov.f64 %fd155, 0d3F311BD487102E94;
.loc 3 389 10
fma.rn.f64 %fd156, %fd154, %fd147, %fd155;
add.f64 %fd157, %fd147, 0d3FF59895C30BAA54;
mov.f64 %fd158, 0d3FFAE8E5956A143F;
.loc 3 389 10
fma.rn.f64 %fd159, %fd157, %fd147, %fd158;
mov.f64 %fd160, 0d3FDACCE85FF7383D;
.loc 3 389 10
fma.rn.f64 %fd161, %fd159, %fd147, %fd160;
mov.f64 %fd162, 0d3F97E43B6CAC34FE;
.loc 3 389 10
fma.rn.f64 %fd163, %fd161, %fd147, %fd162;
mov.f64 %fd164, 0d3F311BD08289EB12;
.loc 3 389 10
fma.rn.f64 %fd165, %fd163, %fd147, %fd164;
mul.f64 %fd166, %fd165, %fd147;
div.rn.f64 %fd259, %fd156, %fd166;
BB13_19:
neg.f64 %fd167, %fd259;
selp.f64 %fd260, %fd167, %fd259, %p6;
bra.uni BB13_21;
BB13_20:
.loc 3 389 10
mul.rn.f64 %fd170, %fd3, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd170;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd170;
}
shr.u32 %r53, %r51, 20;
and.b32 %r54, %r53, 2046;
add.s32 %r55, %r54, -1022;
cvt.rn.f64.s32 %fd171, %r55;
and.b32 %r56, %r51, -2145386497;
add.s32 %r57, %r56, 1071644672;
mov.b64 %fd172, {%r52, %r57};
add.f64 %fd173, %fd172, 0dBFF0000000000000;
add.f64 %fd168, %fd172, 0d3FF0000000000000;
mov.f64 %fd174, 0d3FF0000000000000;
.loc 3 389 10
// inline asm
cvt.rn.f32.f64 %f13,%fd168;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f14,%f13;
// inline asm
// inline asm
cvt.f64.f32 %fd169,%f14;
// inline asm
neg.f64 %fd175, %fd168;
fma.rn.f64 %fd176, %fd175, %fd169, %fd174;
fma.rn.f64 %fd177, %fd176, %fd176, %fd176;
fma.rn.f64 %fd178, %fd177, %fd169, %fd169;
mul.f64 %fd179, %fd173, %fd178;
mov.f64 %fd180, 0dC000000000000000;
.loc 3 389 10
fma.rn.f64 %fd181, %fd180, %fd179, %fd173;
neg.f64 %fd182, %fd179;
fma.rn.f64 %fd183, %fd182, %fd173, %fd181;
fma.rn.f64 %fd184, %fd183, %fd178, %fd179;
mul.f64 %fd185, %fd184, %fd184;
mov.f64 %fd186, 0d3FA55CF59CDC5D89;
mov.f64 %fd187, 0d3FB5C5C218C775C9;
.loc 3 389 10
fma.rn.f64 %fd188, %fd187, %fd185, %fd186;
mov.f64 %fd189, 0d3FAEFD18CF6EBB9C;
.loc 3 389 10
fma.rn.f64 %fd190, %fd188, %fd185, %fd189;
mov.f64 %fd191, 0d3FB10682EDCB8D1B;
.loc 3 389 10
fma.rn.f64 %fd192, %fd190, %fd185, %fd191;
mov.f64 %fd193, 0d3FB3B1DD3AC7FC96;
.loc 3 389 10
fma.rn.f64 %fd194, %fd192, %fd185, %fd193;
mov.f64 %fd195, 0d3FB745CB459B54A6;
.loc 3 389 10
fma.rn.f64 %fd196, %fd194, %fd185, %fd195;
mov.f64 %fd197, 0d3FBC71C741A0669F;
.loc 3 389 10
fma.rn.f64 %fd198, %fd196, %fd185, %fd197;
mov.f64 %fd199, 0d3FC249249209112E;
.loc 3 389 10
fma.rn.f64 %fd200, %fd198, %fd185, %fd199;
mov.f64 %fd201, 0d3FC99999999A06C1;
.loc 3 389 10
fma.rn.f64 %fd202, %fd200, %fd185, %fd201;
mov.f64 %fd203, 0d3FD5555555555535;
.loc 3 389 10
fma.rn.f64 %fd204, %fd202, %fd185, %fd203;
mul.f64 %fd205, %fd204, %fd185;
fma.rn.f64 %fd206, %fd205, %fd184, %fd184;
add.f64 %fd207, %fd206, %fd206;
mov.f64 %fd208, 0d3FE62E42FEFA39EF;
.loc 3 389 10
fma.rn.f64 %fd209, %fd171, %fd208, %fd207;
mov.f64 %fd210, 0dC009000000000000;
.loc 3 389 10
sub.f64 %fd211, %fd210, %fd209;
mov.f64 %fd212, 0dBC08DDF93324D327;
mov.f64 %fd213, 0dBBB135D2E746E627;
.loc 3 389 10
fma.rn.f64 %fd214, %fd213, %fd211, %fd212;
mov.f64 %fd215, 0d3C37B83EEF0B7C9F;
.loc 3 389 10
fma.rn.f64 %fd216, %fd214, %fd211, %fd215;
mov.f64 %fd217, 0d3C69BA72CD589B91;
.loc 3 389 10
fma.rn.f64 %fd218, %fd216, %fd211, %fd217;
mov.f64 %fd219, 0dBCA33689090A6B96;
.loc 3 389 10
fma.rn.f64 %fd220, %fd218, %fd211, %fd219;
mov.f64 %fd221, 0d3C782E11898132E0;
.loc 3 389 10
fma.rn.f64 %fd222, %fd220, %fd211, %fd221;
mov.f64 %fd223, 0d3CFDE4ACFD9E26BA;
.loc 3 389 10
fma.rn.f64 %fd224, %fd222, %fd211, %fd223;
mov.f64 %fd225, 0dBD26D33EED66C487;
.loc 3 389 10
fma.rn.f64 %fd226, %fd224, %fd211, %fd225;
mov.f64 %fd227, 0dBD36F2167040D8E2;
.loc 3 389 10
fma.rn.f64 %fd228, %fd226, %fd211, %fd227;
mov.f64 %fd229, 0d3D872A22C2D77E20;
.loc 3 389 10
fma.rn.f64 %fd230, %fd228, %fd211, %fd229;
mov.f64 %fd231, 0dBDAC8859C4E5C0AF;
.loc 3 389 10
fma.rn.f64 %fd232, %fd230, %fd211, %fd231;
mov.f64 %fd233, 0dBDCDC583D118A561;
.loc 3 389 10
fma.rn.f64 %fd234, %fd232, %fd211, %fd233;
mov.f64 %fd235, 0d3E120F47CCF46B3C;
.loc 3 389 10
fma.rn.f64 %fd236, %fd234, %fd211, %fd235;
mov.f64 %fd237, 0dBE31A9E38DC84D60;
.loc 3 389 10
fma.rn.f64 %fd238, %fd236, %fd211, %fd237;
mov.f64 %fd239, 0dBE5F36CD6D3D46A9;
.loc 3 389 10
fma.rn.f64 %fd240, %fd238, %fd211, %fd239;
mov.f64 %fd241, 0d3E9C6B4F5D03B787;
.loc 3 389 10
fma.rn.f64 %fd242, %fd240, %fd211, %fd241;
mov.f64 %fd243, 0dBEB6E8A5434AE8A2;
.loc 3 389 10
fma.rn.f64 %fd244, %fd242, %fd211, %fd243;
mov.f64 %fd245, 0dBEED1D1F7B8736F6;
.loc 3 389 10
fma.rn.f64 %fd246, %fd244, %fd211, %fd245;
mov.f64 %fd247, 0d3F2879C2A212F024;
.loc 3 389 10
fma.rn.f64 %fd248, %fd246, %fd211, %fd247;
mov.f64 %fd249, 0dBF4845769484FCA8;
.loc 3 389 10
fma.rn.f64 %fd250, %fd248, %fd211, %fd249;
mov.f64 %fd251, 0dBF78B6C33114F909;
.loc 3 389 10
fma.rn.f64 %fd252, %fd250, %fd211, %fd251;
mov.f64 %fd253, 0d3FCEBD80D9B13E28;
.loc 3 389 10
fma.rn.f64 %fd254, %fd252, %fd211, %fd253;
mov.f64 %fd255, 0d3FFA755E7C99AE86;
.loc 3 389 10
fma.rn.f64 %fd256, %fd254, %fd211, %fd255;
fma.rn.f64 %fd260, %fd256, %fd2, %fd256;
BB13_21:
.loc 2 20 42
mul.lo.s32 %r61, %r63, %r22;
add.s32 %r58, %r64, %r61;
mul.wide.s32 %rd7, %r58, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 20 42
st.global.f64 [%rd8], %fd260;
.loc 2 20 22
add.s32 %r64, %r2, %r64;
.loc 2 20 1
setp.lt.s32 %p17, %r64, %r20;
@%p17 bra BB13_3;
BB13_22:
.loc 2 20 22
mov.u32 %r59, %nctaid.x;
mad.lo.s32 %r63, %r59, %r24, %r63;
.loc 2 20 1
setp.lt.s32 %p18, %r63, %r21;
@%p18 bra BB13_2;
BB13_23:
.loc 2 20 2
ret;
}
.visible .entry map_erfcx_double(
.param .u32 map_erfcx_double_param_0,
.param .u32 map_erfcx_double_param_1,
.param .u64 map_erfcx_double_param_2,
.param .u32 map_erfcx_double_param_3,
.param .u64 map_erfcx_double_param_4,
.param .u32 map_erfcx_double_param_5
)
{
.reg .pred %p<14>;
.reg .f32 %f<9>;
.reg .s32 %r<48>;
.reg .s64 %rd<9>;
.reg .f64 %fd<141>;
ld.param.u32 %r15, [map_erfcx_double_param_0];
ld.param.u32 %r16, [map_erfcx_double_param_1];
ld.param.u64 %rd2, [map_erfcx_double_param_2];
ld.param.u32 %r17, [map_erfcx_double_param_3];
ld.param.u64 %rd3, [map_erfcx_double_param_4];
ld.param.u32 %r18, [map_erfcx_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 2 21 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r45, %r19, %r20, %r21;
.loc 2 21 1
setp.ge.s32 %p1, %r45, %r16;
@%p1 bra BB14_17;
.loc 2 21 1
mov.u32 %r22, %ntid.y;
.loc 2 21 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r2, %r23, %r22;
cvta.to.global.u64 %rd6, %rd2;
BB14_2:
.loc 2 21 1
mov.u32 %r24, %ctaid.y;
mov.u32 %r26, %tid.y;
mad.lo.s32 %r46, %r22, %r24, %r26;
.loc 2 21 1
setp.ge.s32 %p2, %r46, %r15;
@%p2 bra BB14_16;
.loc 2 21 1
mul.lo.s32 %r4, %r45, %r18;
.loc 2 21 42
mul.lo.s32 %r5, %r45, %r17;
BB14_4:
.loc 2 21 1
add.s32 %r31, %r46, %r4;
mul.wide.s32 %rd4, %r31, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 2 21 1
ld.global.f64 %fd1, [%rd5];
.loc 3 404 10
abs.f64 %fd2, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r32}, %fd2;
}
setp.lt.u32 %p3, %r32, 1077936128;
@%p3 bra BB14_6;
rcp.rn.f64 %fd16, %fd2;
mov.f64 %fd17, 0d3FF0000000000000;
.loc 3 404 10
mul.f64 %fd18, %fd16, %fd16;
mov.f64 %fd19, 0d401A400000000000;
mov.f64 %fd20, 0dC03D880000000000;
.loc 3 404 10
fma.rn.f64 %fd21, %fd20, %fd18, %fd19;
mov.f64 %fd22, 0dBFFE000000000000;
.loc 3 404 10
fma.rn.f64 %fd23, %fd21, %fd18, %fd22;
mov.f64 %fd24, 0d3FE8000000000000;
.loc 3 404 10
fma.rn.f64 %fd25, %fd23, %fd18, %fd24;
mov.f64 %fd26, 0dBFE0000000000000;
.loc 3 404 10
fma.rn.f64 %fd27, %fd25, %fd18, %fd26;
fma.rn.f64 %fd28, %fd27, %fd18, %fd17;
mul.f64 %fd29, %fd16, 0d3FE20DD750429B6D;
mul.f64 %fd140, %fd28, %fd29;
bra.uni BB14_7;
BB14_6:
.loc 3 404 10
add.f64 %fd34, %fd2, 0dC010000000000000;
mov.f64 %fd35, 0dC010000000000000;
.loc 3 404 10
add.f64 %fd30, %fd2, 0d4010000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd30;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd31,%f2;
// inline asm
neg.f64 %fd36, %fd30;
mov.f64 %fd37, 0d3FF0000000000000;
.loc 3 404 10
fma.rn.f64 %fd38, %fd36, %fd31, %fd37;
fma.rn.f64 %fd39, %fd38, %fd38, %fd38;
fma.rn.f64 %fd40, %fd39, %fd31, %fd31;
mul.f64 %fd41, %fd34, %fd40;
add.rn.f64 %fd42, %fd41, %fd37;
fma.rn.f64 %fd43, %fd35, %fd42, %fd2;
neg.f64 %fd44, %fd41;
fma.rn.f64 %fd45, %fd44, %fd2, %fd43;
fma.rn.f64 %fd46, %fd40, %fd45, %fd41;
mov.f64 %fd47, 0dBE44E1C6FD03D328;
mov.f64 %fd48, 0dBDF8774AD4E0BFD7;
.loc 3 404 10
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
mov.f64 %fd50, 0dBE4330149F7A56B6;
.loc 3 404 10
fma.rn.f64 %fd51, %fd49, %fd46, %fd50;
mov.f64 %fd52, 0d3E7BEDDED8376273;
.loc 3 404 10
fma.rn.f64 %fd53, %fd51, %fd46, %fd52;
mov.f64 %fd54, 0d3E6F9254C3ABF22B;
.loc 3 404 10
fma.rn.f64 %fd55, %fd53, %fd46, %fd54;
mov.f64 %fd56, 0dBEAB9068C2148CF0;
.loc 3 404 10
fma.rn.f64 %fd57, %fd55, %fd46, %fd56;
mov.f64 %fd58, 0d3E94C6454DB34009;
.loc 3 404 10
fma.rn.f64 %fd59, %fd57, %fd46, %fd58;
mov.f64 %fd60, 0d3ED7F1C378F2311D;
.loc 3 404 10
fma.rn.f64 %fd61, %fd59, %fd46, %fd60;
mov.f64 %fd62, 0dBEE78E051C6D5C58;
.loc 3 404 10
fma.rn.f64 %fd63, %fd61, %fd46, %fd62;
mov.f64 %fd64, 0dBEF995B4EAD14A90;
.loc 3 404 10
fma.rn.f64 %fd65, %fd63, %fd46, %fd64;
mov.f64 %fd66, 0d3F23BE27CF0A29B2;
.loc 3 404 10
fma.rn.f64 %fd67, %fd65, %fd46, %fd66;
mov.f64 %fd68, 0dBF2A1DEF3E81672E;
.loc 3 404 10
fma.rn.f64 %fd69, %fd67, %fd46, %fd68;
mov.f64 %fd70, 0dBF48D4ABE68C1713;
.loc 3 404 10
fma.rn.f64 %fd71, %fd69, %fd46, %fd70;
mov.f64 %fd72, 0d3F749C67210DD6B4;
.loc 3 404 10
fma.rn.f64 %fd73, %fd71, %fd46, %fd72;
mov.f64 %fd74, 0dBF9096238568E357;
.loc 3 404 10
fma.rn.f64 %fd75, %fd73, %fd46, %fd74;
mov.f64 %fd76, 0d3FA3079EDF8C2DC9;
.loc 3 404 10
fma.rn.f64 %fd77, %fd75, %fd46, %fd76;
mov.f64 %fd78, 0dBFB0FB06DFF601FC;
.loc 3 404 10
fma.rn.f64 %fd79, %fd77, %fd46, %fd78;
mov.f64 %fd80, 0d3FB7FEE004DFBCDC;
.loc 3 404 10
fma.rn.f64 %fd81, %fd79, %fd46, %fd80;
mov.f64 %fd82, 0dBFB9DDB23C3DB8C6;
.loc 3 404 10
fma.rn.f64 %fd83, %fd81, %fd46, %fd82;
mov.f64 %fd84, 0d3FB16ECEFCFA5FDA;
.loc 3 404 10
fma.rn.f64 %fd85, %fd83, %fd46, %fd84;
mov.f64 %fd86, 0d3F8F7F5DF66FB6D6;
.loc 3 404 10
fma.rn.f64 %fd87, %fd85, %fd46, %fd86;
mov.f64 %fd88, 0dBFC1DF1AD154A29D;
.loc 3 404 10
fma.rn.f64 %fd89, %fd87, %fd46, %fd88;
mov.f64 %fd90, 0d3FF3BA5916E9FD7F;
.loc 3 404 10
fma.rn.f64 %fd91, %fd89, %fd46, %fd90;
mov.f64 %fd92, 0d4000000000000000;
.loc 3 404 10
fma.rn.f64 %fd32, %fd92, %fd2, %fd37;
// inline asm
cvt.rn.f32.f64 %f5,%fd32;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd33,%f6;
// inline asm
neg.f64 %fd93, %fd32;
fma.rn.f64 %fd94, %fd93, %fd33, %fd37;
fma.rn.f64 %fd95, %fd94, %fd94, %fd94;
fma.rn.f64 %fd96, %fd95, %fd33, %fd33;
mul.f64 %fd97, %fd91, %fd96;
mul.f64 %fd98, %fd97, 0dC000000000000000;
fma.rn.f64 %fd99, %fd2, %fd98, %fd91;
neg.f64 %fd100, %fd97;
add.rn.f64 %fd101, %fd99, %fd100;
fma.rn.f64 %fd140, %fd101, %fd96, %fd97;
BB14_7:
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd1;
}
setp.gt.s32 %p4, %r33, -1;
@%p4 bra BB14_15;
mul.f64 %fd6, %fd2, %fd2;
neg.f64 %fd102, %fd6;
fma.rn.f64 %fd7, %fd2, %fd2, %fd102;
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd6;
}
setp.lt.u32 %p5, %r8, 1082535491;
setp.lt.s32 %p6, %r8, -1064875759;
or.pred %p7, %p5, %p6;
@%p7 bra BB14_10;
setp.lt.s32 %p8, %r8, 0;
selp.f64 %fd103, 0d0000000000000000, 0d7FF0000000000000, %p8;
abs.f64 %fd104, %fd6;
setp.gtu.f64 %p9, %fd104, 0d7FF0000000000000;
add.f64 %fd105, %fd6, %fd6;
selp.f64 %fd139, %fd105, %fd103, %p9;
bra.uni BB14_14;
BB14_10:
.loc 3 404 10
mul.f64 %fd106, %fd6, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd107, %fd106;
cvt.rzi.s32.f64 %r9, %fd107;
mov.f64 %fd108, 0dBFE62E42FEFA39EF;
.loc 3 404 10
fma.rn.f64 %fd109, %fd107, %fd108, %fd6;
mov.f64 %fd110, 0dBC7ABC9E3B39803F;
.loc 3 404 10
fma.rn.f64 %fd111, %fd107, %fd110, %fd109;
mov.f64 %fd112, 0d3E928A27E30F5561;
mov.f64 %fd113, 0d3E5AE6449C0686C0;
.loc 3 404 10
fma.rn.f64 %fd114, %fd113, %fd111, %fd112;
mov.f64 %fd115, 0d3EC71DE8E6486D6B;
.loc 3 404 10
fma.rn.f64 %fd116, %fd114, %fd111, %fd115;
mov.f64 %fd117, 0d3EFA019A6B2464C5;
.loc 3 404 10
fma.rn.f64 %fd118, %fd116, %fd111, %fd117;
mov.f64 %fd119, 0d3F2A01A0171064A5;
.loc 3 404 10
fma.rn.f64 %fd120, %fd118, %fd111, %fd119;
mov.f64 %fd121, 0d3F56C16C17F29C8D;
.loc 3 404 10
fma.rn.f64 %fd122, %fd120, %fd111, %fd121;
mov.f64 %fd123, 0d3F8111111111A24E;
.loc 3 404 10
fma.rn.f64 %fd124, %fd122, %fd111, %fd123;
mov.f64 %fd125, 0d3FA555555555211D;
.loc 3 404 10
fma.rn.f64 %fd126, %fd124, %fd111, %fd125;
mov.f64 %fd127, 0d3FC5555555555530;
.loc 3 404 10
fma.rn.f64 %fd128, %fd126, %fd111, %fd127;
mov.f64 %fd129, 0d3FE0000000000005;
.loc 3 404 10
fma.rn.f64 %fd130, %fd128, %fd111, %fd129;
mov.f64 %fd131, 0d3FF0000000000000;
.loc 3 404 10
fma.rn.f64 %fd132, %fd130, %fd111, %fd131;
fma.rn.f64 %fd138, %fd132, %fd111, %fd131;
abs.s32 %r34, %r9;
setp.lt.s32 %p10, %r34, 1023;
@%p10 bra BB14_12;
add.s32 %r35, %r9, 2046;
shl.b32 %r36, %r35, 19;
and.b32 %r37, %r36, -1048576;
shl.b32 %r38, %r35, 20;
sub.s32 %r47, %r38, %r37;
mov.u32 %r39, 0;
.loc 3 404 10
mov.b64 %fd133, {%r39, %r37};
mul.f64 %fd138, %fd138, %fd133;
bra.uni BB14_13;
BB14_12:
.loc 3 404 10
shl.b32 %r40, %r9, 20;
add.s32 %r47, %r40, 1072693248;
BB14_13:
mov.u32 %r41, 0;
.loc 3 404 10
mov.b64 %fd134, {%r41, %r47};
mul.f64 %fd139, %fd138, %fd134;
BB14_14:
add.f64 %fd135, %fd139, %fd139;
fma.rn.f64 %fd136, %fd135, %fd7, %fd135;
sub.f64 %fd137, %fd136, %fd140;
setp.eq.f64 %p11, %fd135, 0d7FF0000000000000;
selp.f64 %fd140, %fd135, %fd137, %p11;
BB14_15:
.loc 2 21 42
add.s32 %r42, %r46, %r5;
mul.wide.s32 %rd7, %r42, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 21 42
st.global.f64 [%rd8], %fd140;
.loc 2 21 22
add.s32 %r46, %r2, %r46;
.loc 2 21 1
setp.lt.s32 %p12, %r46, %r15;
@%p12 bra BB14_4;
BB14_16:
.loc 2 21 22
mov.u32 %r43, %nctaid.x;
mad.lo.s32 %r45, %r43, %r19, %r45;
.loc 2 21 1
setp.lt.s32 %p13, %r45, %r16;
@%p13 bra BB14_2;
BB14_17:
.loc 2 21 2
ret;
}
.visible .entry map_erf_double(
.param .u32 map_erf_double_param_0,
.param .u32 map_erf_double_param_1,
.param .u64 map_erf_double_param_2,
.param .u32 map_erf_double_param_3,
.param .u64 map_erf_double_param_4,
.param .u32 map_erf_double_param_5
)
{
.reg .pred %p<7>;
.reg .s32 %r<38>;
.reg .s64 %rd<9>;
.reg .f64 %fd<105>;
ld.param.u32 %r10, [map_erf_double_param_0];
ld.param.u32 %r11, [map_erf_double_param_1];
ld.param.u64 %rd3, [map_erf_double_param_2];
ld.param.u32 %r12, [map_erf_double_param_3];
ld.param.u64 %rd4, [map_erf_double_param_4];
ld.param.u32 %r13, [map_erf_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 22 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r36, %r14, %r15, %r16;
.loc 2 22 1
setp.ge.s32 %p1, %r36, %r11;
@%p1 bra BB15_9;
.loc 2 22 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 2 22 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB15_2:
.loc 2 22 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB15_8;
.loc 2 22 1
mul.lo.s32 %r5, %r36, %r13;
.loc 2 22 42
mul.lo.s32 %r6, %r36, %r12;
mov.u32 %r37, %r2;
BB15_4:
.loc 2 22 1
mov.u32 %r7, %r37;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 22 1
ld.global.f64 %fd1, [%rd6];
.loc 3 374 10
abs.f64 %fd2, %fd1;
setp.ltu.f64 %p3, %fd2, 0d3FF0000000000000;
@%p3 bra BB15_6;
mov.f64 %fd6, 0dBCF1384CE38C616A;
mov.f64 %fd7, 0d3C8B9C2B870030E8;
.loc 3 374 10
fma.rn.f64 %fd8, %fd7, %fd2, %fd6;
mov.f64 %fd9, 0d3D4458AE9746C2FD;
.loc 3 374 10
fma.rn.f64 %fd10, %fd8, %fd2, %fd9;
mov.f64 %fd11, 0dBD8E4A44D4F1AB56;
.loc 3 374 10
fma.rn.f64 %fd12, %fd10, %fd2, %fd11;
mov.f64 %fd13, 0d3DCFDF15265C58EE;
.loc 3 374 10
fma.rn.f64 %fd14, %fd12, %fd2, %fd13;
mov.f64 %fd15, 0dBE0933832F358D51;
.loc 3 374 10
fma.rn.f64 %fd16, %fd14, %fd2, %fd15;
mov.f64 %fd17, 0d3E3F136D3F719446;
.loc 3 374 10
fma.rn.f64 %fd18, %fd16, %fd2, %fd17;
mov.f64 %fd19, 0dBE6E94C2FE151B3B;
.loc 3 374 10
fma.rn.f64 %fd20, %fd18, %fd2, %fd19;
mov.f64 %fd21, 0d3E985A70310EE0A8;
.loc 3 374 10
fma.rn.f64 %fd22, %fd20, %fd2, %fd21;
mov.f64 %fd23, 0dBEBF944DA1520B74;
.loc 3 374 10
fma.rn.f64 %fd24, %fd22, %fd2, %fd23;
mov.f64 %fd25, 0d3EE09F503825C543;
.loc 3 374 10
fma.rn.f64 %fd26, %fd24, %fd2, %fd25;
mov.f64 %fd27, 0dBEFBEEFE9F949E59;
.loc 3 374 10
fma.rn.f64 %fd28, %fd26, %fd2, %fd27;
mov.f64 %fd29, 0d3F11D785C6E28857;
.loc 3 374 10
fma.rn.f64 %fd30, %fd28, %fd2, %fd29;
mov.f64 %fd31, 0dBF1D866B223048C7;
.loc 3 374 10
fma.rn.f64 %fd32, %fd30, %fd2, %fd31;
mov.f64 %fd33, 0d3EF258F0847E8908;
.loc 3 374 10
fma.rn.f64 %fd34, %fd32, %fd2, %fd33;
mov.f64 %fd35, 0d3F429CFC58DBB776;
.loc 3 374 10
fma.rn.f64 %fd36, %fd34, %fd2, %fd35;
mov.f64 %fd37, 0dBF5BE16D3F71F3C5;
.loc 3 374 10
fma.rn.f64 %fd38, %fd36, %fd2, %fd37;
mov.f64 %fd39, 0d3F2E8BDA60326B1A;
.loc 3 374 10
fma.rn.f64 %fd40, %fd38, %fd2, %fd39;
mov.f64 %fd41, 0d3F938FB20B0988A6;
.loc 3 374 10
fma.rn.f64 %fd42, %fd40, %fd2, %fd41;
mov.f64 %fd43, 0dBFBA4E3A80F64E33;
.loc 3 374 10
fma.rn.f64 %fd44, %fd42, %fd2, %fd43;
mov.f64 %fd45, 0dBFE45F3E88093928;
.loc 3 374 10
fma.rn.f64 %fd46, %fd44, %fd2, %fd45;
mov.f64 %fd47, 0dBFF20DD599CAEEA0;
.loc 3 374 10
fma.rn.f64 %fd48, %fd46, %fd2, %fd47;
mov.f64 %fd49, 0dBE883BE1E31CE133;
.loc 3 374 10
fma.rn.f64 %fd50, %fd48, %fd2, %fd49;
mul.f64 %fd51, %fd50, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd52, %fd51;
cvt.rzi.s32.f64 %r22, %fd52;
mov.f64 %fd53, 0dBFE62E42FEFA39EF;
.loc 3 374 10
fma.rn.f64 %fd54, %fd52, %fd53, %fd50;
mov.f64 %fd55, 0d3E928A27E30F5561;
mov.f64 %fd56, 0d3E5AE6449C0686C0;
.loc 3 374 10
fma.rn.f64 %fd57, %fd56, %fd54, %fd55;
mov.f64 %fd58, 0d3EC71DE8E6486D6B;
.loc 3 374 10
fma.rn.f64 %fd59, %fd57, %fd54, %fd58;
mov.f64 %fd60, 0d3EFA019A6B2464C5;
.loc 3 374 10
fma.rn.f64 %fd61, %fd59, %fd54, %fd60;
mov.f64 %fd62, 0d3F2A01A0171064A5;
.loc 3 374 10
fma.rn.f64 %fd63, %fd61, %fd54, %fd62;
mov.f64 %fd64, 0d3F56C16C17F29C8D;
.loc 3 374 10
fma.rn.f64 %fd65, %fd63, %fd54, %fd64;
mov.f64 %fd66, 0d3F8111111111A24E;
.loc 3 374 10
fma.rn.f64 %fd67, %fd65, %fd54, %fd66;
mov.f64 %fd68, 0d3FA555555555211D;
.loc 3 374 10
fma.rn.f64 %fd69, %fd67, %fd54, %fd68;
mov.f64 %fd70, 0d3FC5555555555530;
.loc 3 374 10
fma.rn.f64 %fd71, %fd69, %fd54, %fd70;
mov.f64 %fd72, 0d3FE0000000000005;
.loc 3 374 10
fma.rn.f64 %fd73, %fd71, %fd54, %fd72;
mov.f64 %fd74, 0d3FF0000000000000;
.loc 3 374 10
fma.rn.f64 %fd75, %fd73, %fd54, %fd74;
fma.rn.f64 %fd76, %fd75, %fd54, %fd74;
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd76;
}
shl.b32 %r24, %r22, 20;
add.s32 %r25, %r23, %r24;
{
.reg .b32 %temp;
mov.b64 {%r26, %temp}, %fd76;
}
mov.b64 %fd77, {%r26, %r25};
sub.f64 %fd78, %fd74, %fd77;
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd2;
}
setp.gt.u32 %p4, %r27, 1075294207;
selp.f64 %fd79, 0d3FF0000000000000, %fd78, %p4;
{
.reg .b32 %temp;
mov.b64 {%r28, %temp}, %fd79;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r29}, %fd79;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r30}, %fd1;
}
and.b32 %r31, %r30, -2147483648;
or.b32 %r32, %r29, %r31;
mov.b64 %fd104, {%r28, %r32};
bra.uni BB15_7;
BB15_6:
.loc 3 374 10
mul.f64 %fd80, %fd1, %fd1;
mov.f64 %fd81, 0d3E4D5F4BB7A316F6;
mov.f64 %fd82, 0dBE0A83AA3B08FBC2;
.loc 3 374 10
fma.rn.f64 %fd83, %fd82, %fd80, %fd81;
mov.f64 %fd84, 0dBE85BDCE301B3CDF;
.loc 3 374 10
fma.rn.f64 %fd85, %fd83, %fd80, %fd84;
mov.f64 %fd86, 0d3EBB978FADB81BC9;
.loc 3 374 10
fma.rn.f64 %fd87, %fd85, %fd80, %fd86;
mov.f64 %fd88, 0dBEEF4C99D6AE5FB8;
.loc 3 374 10
fma.rn.f64 %fd89, %fd87, %fd80, %fd88;
mov.f64 %fd90, 0d3F1F9A2AF549012E;
.loc 3 374 10
fma.rn.f64 %fd91, %fd89, %fd80, %fd90;
mov.f64 %fd92, 0dBF4C02DAFC636A47;
.loc 3 374 10
fma.rn.f64 %fd93, %fd91, %fd80, %fd92;
mov.f64 %fd94, 0d3F7565BCCF619AC0;
.loc 3 374 10
fma.rn.f64 %fd95, %fd93, %fd80, %fd94;
mov.f64 %fd96, 0dBF9B82CE311E321A;
.loc 3 374 10
fma.rn.f64 %fd97, %fd95, %fd80, %fd96;
mov.f64 %fd98, 0d3FBCE2F21A04075C;
.loc 3 374 10
fma.rn.f64 %fd99, %fd97, %fd80, %fd98;
mov.f64 %fd100, 0dBFD812746B0379B4;
.loc 3 374 10
fma.rn.f64 %fd101, %fd99, %fd80, %fd100;
mov.f64 %fd102, 0d3FF20DD750429B6D;
.loc 3 374 10
fma.rn.f64 %fd103, %fd101, %fd80, %fd102;
mul.f64 %fd104, %fd103, %fd1;
BB15_7:
.loc 2 22 42
add.s32 %r33, %r7, %r6;
mul.wide.s32 %rd7, %r33, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 22 42
st.global.f64 [%rd8], %fd104;
.loc 2 22 22
add.s32 %r8, %r3, %r7;
.loc 2 22 1
setp.lt.s32 %p5, %r8, %r10;
mov.u32 %r37, %r8;
@%p5 bra BB15_4;
BB15_8:
.loc 2 22 22
mov.u32 %r34, %nctaid.x;
mad.lo.s32 %r36, %r34, %r14, %r36;
.loc 2 22 1
setp.lt.s32 %p6, %r36, %r11;
@%p6 bra BB15_2;
BB15_9:
.loc 2 22 2
ret;
}
.visible .entry map_erfinv_double(
.param .u32 map_erfinv_double_param_0,
.param .u32 map_erfinv_double_param_1,
.param .u64 map_erfinv_double_param_2,
.param .u32 map_erfinv_double_param_3,
.param .u64 map_erfinv_double_param_4,
.param .u32 map_erfinv_double_param_5
)
{
.reg .pred %p<10>;
.reg .f32 %f<5>;
.reg .s32 %r<34>;
.reg .s64 %rd<9>;
.reg .f64 %fd<175>;
ld.param.u32 %r10, [map_erfinv_double_param_0];
ld.param.u32 %r11, [map_erfinv_double_param_1];
ld.param.u64 %rd3, [map_erfinv_double_param_2];
ld.param.u32 %r12, [map_erfinv_double_param_3];
ld.param.u64 %rd4, [map_erfinv_double_param_4];
ld.param.u32 %r13, [map_erfinv_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 23 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r32, %r14, %r15, %r16;
.loc 2 23 1
setp.ge.s32 %p1, %r32, %r11;
@%p1 bra BB16_16;
.loc 2 23 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 2 23 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB16_2:
.loc 2 23 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB16_15;
.loc 2 23 1
mul.lo.s32 %r5, %r32, %r13;
.loc 2 23 42
mul.lo.s32 %r6, %r32, %r12;
mov.u32 %r33, %r2;
BB16_4:
.loc 2 23 1
mov.u32 %r7, %r33;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 23 1
ld.global.f64 %fd1, [%rd6];
.loc 3 384 10
abs.f64 %fd2, %fd1;
setp.lt.f64 %p3, %fd2, 0d3FF0000000000000;
@%p3 bra BB16_8;
setp.gtu.f64 %p4, %fd2, 0d7FF0000000000000;
@%p4 bra BB16_7;
setp.eq.f64 %p5, %fd2, 0d3FF0000000000000;
mul.f64 %fd14, %fd1, 0d7FF0000000000000;
selp.f64 %fd174, %fd14, 0dFFF8000000000000, %p5;
bra.uni BB16_14;
BB16_7:
.loc 3 384 10
add.f64 %fd174, %fd1, %fd1;
bra.uni BB16_14;
BB16_8:
.loc 3 384 10
neg.f64 %fd17, %fd1;
mov.f64 %fd18, 0d3FF0000000000000;
.loc 3 384 10
fma.rn.f64 %fd19, %fd1, %fd17, %fd18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r22}, %fd19;
}
{
.reg .b32 %temp;
mov.b64 {%r23, %temp}, %fd19;
}
shr.u32 %r24, %r22, 20;
and.b32 %r25, %r24, 2046;
add.s32 %r26, %r25, -1022;
cvt.rn.f64.s32 %fd20, %r26;
and.b32 %r27, %r22, -2145386497;
add.s32 %r28, %r27, 1071644672;
mov.b64 %fd21, {%r23, %r28};
add.f64 %fd22, %fd21, 0dBFF0000000000000;
add.f64 %fd15, %fd21, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd15;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd16,%f2;
// inline asm
neg.f64 %fd23, %fd15;
fma.rn.f64 %fd24, %fd23, %fd16, %fd18;
fma.rn.f64 %fd25, %fd24, %fd24, %fd24;
fma.rn.f64 %fd26, %fd25, %fd16, %fd16;
mul.f64 %fd27, %fd22, %fd26;
mov.f64 %fd28, 0dC000000000000000;
.loc 3 384 10
fma.rn.f64 %fd29, %fd28, %fd27, %fd22;
neg.f64 %fd30, %fd27;
fma.rn.f64 %fd31, %fd30, %fd22, %fd29;
fma.rn.f64 %fd32, %fd31, %fd26, %fd27;
mul.f64 %fd33, %fd32, %fd32;
mov.f64 %fd34, 0d3FA55CF59CDC5D89;
mov.f64 %fd35, 0d3FB5C5C218C775C9;
.loc 3 384 10
fma.rn.f64 %fd36, %fd35, %fd33, %fd34;
mov.f64 %fd37, 0d3FAEFD18CF6EBB9C;
.loc 3 384 10
fma.rn.f64 %fd38, %fd36, %fd33, %fd37;
mov.f64 %fd39, 0d3FB10682EDCB8D1B;
.loc 3 384 10
fma.rn.f64 %fd40, %fd38, %fd33, %fd39;
mov.f64 %fd41, 0d3FB3B1DD3AC7FC96;
.loc 3 384 10
fma.rn.f64 %fd42, %fd40, %fd33, %fd41;
mov.f64 %fd43, 0d3FB745CB459B54A6;
.loc 3 384 10
fma.rn.f64 %fd44, %fd42, %fd33, %fd43;
mov.f64 %fd45, 0d3FBC71C741A0669F;
.loc 3 384 10
fma.rn.f64 %fd46, %fd44, %fd33, %fd45;
mov.f64 %fd47, 0d3FC249249209112E;
.loc 3 384 10
fma.rn.f64 %fd48, %fd46, %fd33, %fd47;
mov.f64 %fd49, 0d3FC99999999A06C1;
.loc 3 384 10
fma.rn.f64 %fd50, %fd48, %fd33, %fd49;
mov.f64 %fd51, 0d3FD5555555555535;
.loc 3 384 10
fma.rn.f64 %fd52, %fd50, %fd33, %fd51;
mul.f64 %fd53, %fd52, %fd33;
fma.rn.f64 %fd54, %fd53, %fd32, %fd32;
add.f64 %fd55, %fd54, %fd54;
mov.f64 %fd56, 0d3FE62E42FEFA39EF;
.loc 3 384 10
fma.rn.f64 %fd5, %fd20, %fd56, %fd55;
neg.f64 %fd6, %fd5;
setp.gt.f64 %p6, %fd5, 0dC019000000000000;
@%p6 bra BB16_12;
sqrt.rn.f64 %fd7, %fd6;
setp.lt.f64 %p7, %fd7, 0d4010000000000000;
@%p7 bra BB16_11;
add.f64 %fd57, %fd7, 0dC014000000000000;
mov.f64 %fd58, 0dBDF18FEEC0E38727;
mov.f64 %fd59, 0dBDBDCEC3A7785389;
.loc 3 384 10
fma.rn.f64 %fd60, %fd59, %fd57, %fd58;
mov.f64 %fd61, 0d3E19E6BF2DDA45E3;
.loc 3 384 10
fma.rn.f64 %fd62, %fd60, %fd57, %fd61;
mov.f64 %fd63, 0dBE30468FB24E2F5F;
.loc 3 384 10
fma.rn.f64 %fd64, %fd62, %fd57, %fd63;
mov.f64 %fd65, 0d3E405AC6A8FBA182;
.loc 3 384 10
fma.rn.f64 %fd66, %fd64, %fd57, %fd65;
mov.f64 %fd67, 0dBE50102E495FB9C0;
.loc 3 384 10
fma.rn.f64 %fd68, %fd66, %fd57, %fd67;
mov.f64 %fd69, 0d3E5F4C20E1334AF8;
.loc 3 384 10
fma.rn.f64 %fd70, %fd68, %fd57, %fd69;
mov.f64 %fd71, 0dBE722D220FDF9C3E;
.loc 3 384 10
fma.rn.f64 %fd72, %fd70, %fd57, %fd71;
mov.f64 %fd73, 0d3E8EBC8BB824CB54;
.loc 3 384 10
fma.rn.f64 %fd74, %fd72, %fd57, %fd73;
mov.f64 %fd75, 0dBEB0A8D40EA372CC;
.loc 3 384 10
fma.rn.f64 %fd76, %fd74, %fd57, %fd75;
mov.f64 %fd77, 0d3ED2FBD29D093D2B;
.loc 3 384 10
fma.rn.f64 %fd78, %fd76, %fd57, %fd77;
mov.f64 %fd79, 0dBEF4A3497E1E0FAC;
.loc 3 384 10
fma.rn.f64 %fd80, %fd78, %fd57, %fd79;
mov.f64 %fd81, 0d3F13EBF4EB00938F;
.loc 3 384 10
fma.rn.f64 %fd82, %fd80, %fd57, %fd81;
mov.f64 %fd83, 0dBF2C2F36A8FC5D53;
.loc 3 384 10
fma.rn.f64 %fd84, %fd82, %fd57, %fd83;
mov.f64 %fd85, 0dBF222EA5DF04047C;
.loc 3 384 10
fma.rn.f64 %fd86, %fd84, %fd57, %fd85;
mov.f64 %fd87, 0d3FF02A30D1FBA0DC;
.loc 3 384 10
fma.rn.f64 %fd88, %fd86, %fd57, %fd87;
mov.f64 %fd89, 0d4013664DDD1AD7FB;
.loc 3 384 10
fma.rn.f64 %fd173, %fd88, %fd57, %fd89;
bra.uni BB16_13;
BB16_11:
.loc 3 384 10
add.f64 %fd90, %fd7, 0dC00A000000000000;
mov.f64 %fd91, 0d3E785CBE52878635;
mov.f64 %fd92, 0d3E23040F87DBD932;
.loc 3 384 10
fma.rn.f64 %fd93, %fd92, %fd90, %fd91;
mov.f64 %fd94, 0dBE92777453DD3955;
.loc 3 384 10
fma.rn.f64 %fd95, %fd93, %fd90, %fd94;
mov.f64 %fd96, 0d3E5395ABCD554C6C;
.loc 3 384 10
fma.rn.f64 %fd97, %fd95, %fd90, %fd96;
mov.f64 %fd98, 0d3EB936388A3790AD;
.loc 3 384 10
fma.rn.f64 %fd99, %fd97, %fd90, %fd98;
mov.f64 %fd100, 0dBED0D5DB812B5083;
.loc 3 384 10
fma.rn.f64 %fd101, %fd99, %fd90, %fd100;
mov.f64 %fd102, 0d3EC8860CD5D652F6;
.loc 3 384 10
fma.rn.f64 %fd103, %fd101, %fd90, %fd102;
mov.f64 %fd104, 0d3EEA29A0CACDFB23;
.loc 3 384 10
fma.rn.f64 %fd105, %fd103, %fd90, %fd104;
mov.f64 %fd106, 0dBF08CEF1F80281F2;
.loc 3 384 10
fma.rn.f64 %fd107, %fd105, %fd90, %fd106;
mov.f64 %fd108, 0d3F11E684D0B9188A;
.loc 3 384 10
fma.rn.f64 %fd109, %fd107, %fd90, %fd108;
mov.f64 %fd110, 0d3EF932CD54C8A222;
.loc 3 384 10
fma.rn.f64 %fd111, %fd109, %fd90, %fd110;
mov.f64 %fd112, 0dBF37448A89EF8AA3;
.loc 3 384 10
fma.rn.f64 %fd113, %fd111, %fd90, %fd112;
mov.f64 %fd114, 0d3F4F3CC55AD40C25;
.loc 3 384 10
fma.rn.f64 %fd115, %fd113, %fd90, %fd114;
mov.f64 %fd116, 0dBF5BA924132F38B1;
.loc 3 384 10
fma.rn.f64 %fd117, %fd115, %fd90, %fd116;
mov.f64 %fd118, 0d3F6468EECA533CF8;
.loc 3 384 10
fma.rn.f64 %fd119, %fd117, %fd90, %fd118;
mov.f64 %fd120, 0dBF6EBADABB891BBD;
.loc 3 384 10
fma.rn.f64 %fd121, %fd119, %fd90, %fd120;
mov.f64 %fd122, 0d3F75FFCFE5B76AFC;
.loc 3 384 10
fma.rn.f64 %fd123, %fd121, %fd90, %fd122;
mov.f64 %fd124, 0d3FF0158A6D641D39;
.loc 3 384 10
fma.rn.f64 %fd125, %fd123, %fd90, %fd124;
mov.f64 %fd126, 0d4008ABCC380D5A48;
.loc 3 384 10
fma.rn.f64 %fd173, %fd125, %fd90, %fd126;
bra.uni BB16_13;
BB16_12:
mov.f64 %fd127, 0dC009000000000000;
.loc 3 384 10
sub.f64 %fd128, %fd127, %fd5;
mov.f64 %fd129, 0dBC08DDF93324D327;
mov.f64 %fd130, 0dBBB135D2E746E627;
.loc 3 384 10
fma.rn.f64 %fd131, %fd130, %fd128, %fd129;
mov.f64 %fd132, 0d3C37B83EEF0B7C9F;
.loc 3 384 10
fma.rn.f64 %fd133, %fd131, %fd128, %fd132;
mov.f64 %fd134, 0d3C69BA72CD589B91;
.loc 3 384 10
fma.rn.f64 %fd135, %fd133, %fd128, %fd134;
mov.f64 %fd136, 0dBCA33689090A6B96;
.loc 3 384 10
fma.rn.f64 %fd137, %fd135, %fd128, %fd136;
mov.f64 %fd138, 0d3C782E11898132E0;
.loc 3 384 10
fma.rn.f64 %fd139, %fd137, %fd128, %fd138;
mov.f64 %fd140, 0d3CFDE4ACFD9E26BA;
.loc 3 384 10
fma.rn.f64 %fd141, %fd139, %fd128, %fd140;
mov.f64 %fd142, 0dBD26D33EED66C487;
.loc 3 384 10
fma.rn.f64 %fd143, %fd141, %fd128, %fd142;
mov.f64 %fd144, 0dBD36F2167040D8E2;
.loc 3 384 10
fma.rn.f64 %fd145, %fd143, %fd128, %fd144;
mov.f64 %fd146, 0d3D872A22C2D77E20;
.loc 3 384 10
fma.rn.f64 %fd147, %fd145, %fd128, %fd146;
mov.f64 %fd148, 0dBDAC8859C4E5C0AF;
.loc 3 384 10
fma.rn.f64 %fd149, %fd147, %fd128, %fd148;
mov.f64 %fd150, 0dBDCDC583D118A561;
.loc 3 384 10
fma.rn.f64 %fd151, %fd149, %fd128, %fd150;
mov.f64 %fd152, 0d3E120F47CCF46B3C;
.loc 3 384 10
fma.rn.f64 %fd153, %fd151, %fd128, %fd152;
mov.f64 %fd154, 0dBE31A9E38DC84D60;
.loc 3 384 10
fma.rn.f64 %fd155, %fd153, %fd128, %fd154;
mov.f64 %fd156, 0dBE5F36CD6D3D46A9;
.loc 3 384 10
fma.rn.f64 %fd157, %fd155, %fd128, %fd156;
mov.f64 %fd158, 0d3E9C6B4F5D03B787;
.loc 3 384 10
fma.rn.f64 %fd159, %fd157, %fd128, %fd158;
mov.f64 %fd160, 0dBEB6E8A5434AE8A2;
.loc 3 384 10
fma.rn.f64 %fd161, %fd159, %fd128, %fd160;
mov.f64 %fd162, 0dBEED1D1F7B8736F6;
.loc 3 384 10
fma.rn.f64 %fd163, %fd161, %fd128, %fd162;
mov.f64 %fd164, 0d3F2879C2A212F024;
.loc 3 384 10
fma.rn.f64 %fd165, %fd163, %fd128, %fd164;
mov.f64 %fd166, 0dBF4845769484FCA8;
.loc 3 384 10
fma.rn.f64 %fd167, %fd165, %fd128, %fd166;
mov.f64 %fd168, 0dBF78B6C33114F909;
.loc 3 384 10
fma.rn.f64 %fd169, %fd167, %fd128, %fd168;
mov.f64 %fd170, 0d3FCEBD80D9B13E28;
.loc 3 384 10
fma.rn.f64 %fd171, %fd169, %fd128, %fd170;
mov.f64 %fd172, 0d3FFA755E7C99AE86;
.loc 3 384 10
fma.rn.f64 %fd173, %fd171, %fd128, %fd172;
BB16_13:
mul.f64 %fd174, %fd173, %fd1;
BB16_14:
.loc 2 23 42
add.s32 %r29, %r7, %r6;
mul.wide.s32 %rd7, %r29, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 23 42
st.global.f64 [%rd8], %fd174;
.loc 2 23 22
add.s32 %r8, %r3, %r7;
.loc 2 23 1
setp.lt.s32 %p8, %r8, %r10;
mov.u32 %r33, %r8;
@%p8 bra BB16_4;
BB16_15:
.loc 2 23 22
mov.u32 %r30, %nctaid.x;
mad.lo.s32 %r32, %r30, %r14, %r32;
.loc 2 23 1
setp.lt.s32 %p9, %r32, %r11;
@%p9 bra BB16_2;
BB16_16:
.loc 2 23 2
ret;
}
.visible .entry map_exp10_double(
.param .u32 map_exp10_double_param_0,
.param .u32 map_exp10_double_param_1,
.param .u64 map_exp10_double_param_2,
.param .u32 map_exp10_double_param_3,
.param .u64 map_exp10_double_param_4,
.param .u32 map_exp10_double_param_5
)
{
.reg .pred %p<11>;
.reg .s32 %r<41>;
.reg .s64 %rd<9>;
.reg .f64 %fd<45>;
ld.param.u32 %r15, [map_exp10_double_param_0];
ld.param.u32 %r16, [map_exp10_double_param_1];
ld.param.u64 %rd3, [map_exp10_double_param_2];
ld.param.u32 %r17, [map_exp10_double_param_3];
ld.param.u64 %rd4, [map_exp10_double_param_4];
ld.param.u32 %r18, [map_exp10_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 24 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r38, %r19, %r20, %r21;
.loc 2 24 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB17_12;
.loc 2 24 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 2 24 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB17_2:
.loc 2 24 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB17_11;
.loc 2 24 1
mul.lo.s32 %r5, %r38, %r18;
.loc 2 24 42
mul.lo.s32 %r6, %r38, %r17;
mov.u32 %r39, %r2;
BB17_4:
.loc 2 24 1
mov.u32 %r7, %r39;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 24 1
ld.global.f64 %fd1, [%rd6];
.loc 3 253 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1081295892;
setp.lt.s32 %p4, %r8, -1066124872;
or.pred %p5, %p3, %p4;
@%p5 bra BB17_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd8, 0d0000000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p7, %fd9, 0d7FF0000000000000;
add.f64 %fd10, %fd1, %fd1;
selp.f64 %fd44, %fd10, %fd8, %p7;
bra.uni BB17_10;
BB17_6:
.loc 3 253 10
mul.f64 %fd11, %fd1, 0d400A934F0979A371;
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s32.f64 %r9, %fd12;
mov.f64 %fd13, 0dBFD34413509F79FF;
.loc 3 253 10
fma.rn.f64 %fd14, %fd12, %fd13, %fd1;
mov.f64 %fd15, 0d3C49DC1DA994FD21;
.loc 3 253 10
fma.rn.f64 %fd16, %fd12, %fd15, %fd14;
mul.f64 %fd17, %fd16, 0dBCAF48AD494EA3E9;
mov.f64 %fd18, 0d40026BB1BBB55516;
.loc 3 253 10
fma.rn.f64 %fd19, %fd16, %fd18, %fd17;
mov.f64 %fd20, 0d3E928A27E30F5561;
mov.f64 %fd21, 0d3E5AE6449C0686C0;
.loc 3 253 10
fma.rn.f64 %fd22, %fd21, %fd19, %fd20;
mov.f64 %fd23, 0d3EC71DE8E6486D6B;
.loc 3 253 10
fma.rn.f64 %fd24, %fd22, %fd19, %fd23;
mov.f64 %fd25, 0d3EFA019A6B2464C5;
.loc 3 253 10
fma.rn.f64 %fd26, %fd24, %fd19, %fd25;
mov.f64 %fd27, 0d3F2A01A0171064A5;
.loc 3 253 10
fma.rn.f64 %fd28, %fd26, %fd19, %fd27;
mov.f64 %fd29, 0d3F56C16C17F29C8D;
.loc 3 253 10
fma.rn.f64 %fd30, %fd28, %fd19, %fd29;
mov.f64 %fd31, 0d3F8111111111A24E;
.loc 3 253 10
fma.rn.f64 %fd32, %fd30, %fd19, %fd31;
mov.f64 %fd33, 0d3FA555555555211D;
.loc 3 253 10
fma.rn.f64 %fd34, %fd32, %fd19, %fd33;
mov.f64 %fd35, 0d3FC5555555555530;
.loc 3 253 10
fma.rn.f64 %fd36, %fd34, %fd19, %fd35;
mov.f64 %fd37, 0d3FE0000000000005;
.loc 3 253 10
fma.rn.f64 %fd38, %fd36, %fd19, %fd37;
mov.f64 %fd39, 0d3FF0000000000000;
.loc 3 253 10
fma.rn.f64 %fd40, %fd38, %fd19, %fd39;
fma.rn.f64 %fd43, %fd40, %fd19, %fd39;
abs.s32 %r27, %r9;
setp.lt.s32 %p8, %r27, 1023;
@%p8 bra BB17_8;
add.s32 %r28, %r9, 2046;
shl.b32 %r29, %r28, 19;
and.b32 %r30, %r29, -1048576;
shl.b32 %r31, %r28, 20;
sub.s32 %r40, %r31, %r30;
mov.u32 %r32, 0;
.loc 3 253 10
mov.b64 %fd41, {%r32, %r30};
mul.f64 %fd43, %fd43, %fd41;
bra.uni BB17_9;
BB17_8:
.loc 3 253 10
shl.b32 %r33, %r9, 20;
add.s32 %r40, %r33, 1072693248;
BB17_9:
mov.u32 %r34, 0;
.loc 3 253 10
mov.b64 %fd42, {%r34, %r40};
mul.f64 %fd44, %fd43, %fd42;
BB17_10:
.loc 2 24 42
add.s32 %r35, %r7, %r6;
mul.wide.s32 %rd7, %r35, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 24 42
st.global.f64 [%rd8], %fd44;
.loc 2 24 22
add.s32 %r13, %r3, %r7;
.loc 2 24 1
setp.lt.s32 %p9, %r13, %r15;
mov.u32 %r39, %r13;
@%p9 bra BB17_4;
BB17_11:
.loc 2 24 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r19, %r38;
.loc 2 24 1
setp.lt.s32 %p10, %r38, %r16;
@%p10 bra BB17_2;
BB17_12:
.loc 2 24 2
ret;
}
.visible .entry map_exp2_double(
.param .u32 map_exp2_double_param_0,
.param .u32 map_exp2_double_param_1,
.param .u64 map_exp2_double_param_2,
.param .u32 map_exp2_double_param_3,
.param .u64 map_exp2_double_param_4,
.param .u32 map_exp2_double_param_5
)
{
.reg .pred %p<11>;
.reg .s32 %r<41>;
.reg .s64 %rd<9>;
.reg .f64 %fd<41>;
ld.param.u32 %r15, [map_exp2_double_param_0];
ld.param.u32 %r16, [map_exp2_double_param_1];
ld.param.u64 %rd3, [map_exp2_double_param_2];
ld.param.u32 %r17, [map_exp2_double_param_3];
ld.param.u64 %rd4, [map_exp2_double_param_4];
ld.param.u32 %r18, [map_exp2_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 25 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r38, %r19, %r20, %r21;
.loc 2 25 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB18_12;
.loc 2 25 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 2 25 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB18_2:
.loc 2 25 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB18_11;
.loc 2 25 1
mul.lo.s32 %r5, %r38, %r18;
.loc 2 25 42
mul.lo.s32 %r6, %r38, %r17;
mov.u32 %r39, %r2;
BB18_4:
.loc 2 25 1
mov.u32 %r7, %r39;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 25 1
ld.global.f64 %fd1, [%rd6];
.loc 3 248 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1083179008;
setp.lt.s32 %p4, %r8, -1064252416;
or.pred %p5, %p3, %p4;
@%p5 bra BB18_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd8, 0d0000000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p7, %fd9, 0d7FF0000000000000;
add.f64 %fd10, %fd1, %fd1;
selp.f64 %fd40, %fd10, %fd8, %p7;
bra.uni BB18_10;
BB18_6:
.loc 3 248 10
cvt.rni.f64.f64 %fd11, %fd1;
sub.f64 %fd12, %fd1, %fd11;
cvt.rzi.s32.f64 %r9, %fd11;
mul.f64 %fd13, %fd12, 0d3C7ABC9E3B39803F;
mov.f64 %fd14, 0d3FE62E42FEFA39EF;
.loc 3 248 10
fma.rn.f64 %fd15, %fd12, %fd14, %fd13;
mov.f64 %fd16, 0d3E928A27E30F5561;
mov.f64 %fd17, 0d3E5AE6449C0686C0;
.loc 3 248 10
fma.rn.f64 %fd18, %fd17, %fd15, %fd16;
mov.f64 %fd19, 0d3EC71DE8E6486D6B;
.loc 3 248 10
fma.rn.f64 %fd20, %fd18, %fd15, %fd19;
mov.f64 %fd21, 0d3EFA019A6B2464C5;
.loc 3 248 10
fma.rn.f64 %fd22, %fd20, %fd15, %fd21;
mov.f64 %fd23, 0d3F2A01A0171064A5;
.loc 3 248 10
fma.rn.f64 %fd24, %fd22, %fd15, %fd23;
mov.f64 %fd25, 0d3F56C16C17F29C8D;
.loc 3 248 10
fma.rn.f64 %fd26, %fd24, %fd15, %fd25;
mov.f64 %fd27, 0d3F8111111111A24E;
.loc 3 248 10
fma.rn.f64 %fd28, %fd26, %fd15, %fd27;
mov.f64 %fd29, 0d3FA555555555211D;
.loc 3 248 10
fma.rn.f64 %fd30, %fd28, %fd15, %fd29;
mov.f64 %fd31, 0d3FC5555555555530;
.loc 3 248 10
fma.rn.f64 %fd32, %fd30, %fd15, %fd31;
mov.f64 %fd33, 0d3FE0000000000005;
.loc 3 248 10
fma.rn.f64 %fd34, %fd32, %fd15, %fd33;
mov.f64 %fd35, 0d3FF0000000000000;
.loc 3 248 10
fma.rn.f64 %fd36, %fd34, %fd15, %fd35;
fma.rn.f64 %fd39, %fd36, %fd15, %fd35;
abs.s32 %r27, %r9;
setp.lt.s32 %p8, %r27, 1023;
@%p8 bra BB18_8;
add.s32 %r28, %r9, 2046;
shl.b32 %r29, %r28, 19;
and.b32 %r30, %r29, -1048576;
shl.b32 %r31, %r28, 20;
sub.s32 %r40, %r31, %r30;
mov.u32 %r32, 0;
.loc 3 248 10
mov.b64 %fd37, {%r32, %r30};
mul.f64 %fd39, %fd39, %fd37;
bra.uni BB18_9;
BB18_8:
.loc 3 248 10
shl.b32 %r33, %r9, 20;
add.s32 %r40, %r33, 1072693248;
BB18_9:
mov.u32 %r34, 0;
.loc 3 248 10
mov.b64 %fd38, {%r34, %r40};
mul.f64 %fd40, %fd39, %fd38;
BB18_10:
.loc 2 25 42
add.s32 %r35, %r7, %r6;
mul.wide.s32 %rd7, %r35, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 25 42
st.global.f64 [%rd8], %fd40;
.loc 2 25 22
add.s32 %r13, %r3, %r7;
.loc 2 25 1
setp.lt.s32 %p9, %r13, %r15;
mov.u32 %r39, %r13;
@%p9 bra BB18_4;
BB18_11:
.loc 2 25 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r19, %r38;
.loc 2 25 1
setp.lt.s32 %p10, %r38, %r16;
@%p10 bra BB18_2;
BB18_12:
.loc 2 25 2
ret;
}
.visible .entry map_exp_double(
.param .u32 map_exp_double_param_0,
.param .u32 map_exp_double_param_1,
.param .u64 map_exp_double_param_2,
.param .u32 map_exp_double_param_3,
.param .u64 map_exp_double_param_4,
.param .u32 map_exp_double_param_5
)
{
.reg .pred %p<11>;
.reg .s32 %r<41>;
.reg .s64 %rd<9>;
.reg .f64 %fd<42>;
ld.param.u32 %r15, [map_exp_double_param_0];
ld.param.u32 %r16, [map_exp_double_param_1];
ld.param.u64 %rd3, [map_exp_double_param_2];
ld.param.u32 %r17, [map_exp_double_param_3];
ld.param.u64 %rd4, [map_exp_double_param_4];
ld.param.u32 %r18, [map_exp_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 26 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r38, %r19, %r20, %r21;
.loc 2 26 1
setp.ge.s32 %p1, %r38, %r16;
@%p1 bra BB19_12;
.loc 2 26 1
mov.u32 %r22, %tid.y;
mov.u32 %r23, %ntid.y;
mov.u32 %r24, %ctaid.y;
mad.lo.s32 %r2, %r23, %r24, %r22;
.loc 2 26 22
mov.u32 %r25, %nctaid.y;
mul.lo.s32 %r3, %r25, %r23;
BB19_2:
.loc 2 26 1
setp.ge.s32 %p2, %r2, %r15;
@%p2 bra BB19_11;
.loc 2 26 1
mul.lo.s32 %r5, %r38, %r18;
.loc 2 26 42
mul.lo.s32 %r6, %r38, %r17;
mov.u32 %r39, %r2;
BB19_4:
.loc 2 26 1
mov.u32 %r7, %r39;
add.s32 %r26, %r7, %r5;
mul.wide.s32 %rd5, %r26, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 26 1
ld.global.f64 %fd1, [%rd6];
.loc 3 243 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1082535491;
setp.lt.s32 %p4, %r8, -1064875759;
or.pred %p5, %p3, %p4;
@%p5 bra BB19_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd8, 0d0000000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p7, %fd9, 0d7FF0000000000000;
add.f64 %fd10, %fd1, %fd1;
selp.f64 %fd41, %fd10, %fd8, %p7;
bra.uni BB19_10;
BB19_6:
.loc 3 243 10
mul.f64 %fd11, %fd1, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s32.f64 %r9, %fd12;
mov.f64 %fd13, 0dBFE62E42FEFA39EF;
.loc 3 243 10
fma.rn.f64 %fd14, %fd12, %fd13, %fd1;
mov.f64 %fd15, 0dBC7ABC9E3B39803F;
.loc 3 243 10
fma.rn.f64 %fd16, %fd12, %fd15, %fd14;
mov.f64 %fd17, 0d3E928A27E30F5561;
mov.f64 %fd18, 0d3E5AE6449C0686C0;
.loc 3 243 10
fma.rn.f64 %fd19, %fd18, %fd16, %fd17;
mov.f64 %fd20, 0d3EC71DE8E6486D6B;
.loc 3 243 10
fma.rn.f64 %fd21, %fd19, %fd16, %fd20;
mov.f64 %fd22, 0d3EFA019A6B2464C5;
.loc 3 243 10
fma.rn.f64 %fd23, %fd21, %fd16, %fd22;
mov.f64 %fd24, 0d3F2A01A0171064A5;
.loc 3 243 10
fma.rn.f64 %fd25, %fd23, %fd16, %fd24;
mov.f64 %fd26, 0d3F56C16C17F29C8D;
.loc 3 243 10
fma.rn.f64 %fd27, %fd25, %fd16, %fd26;
mov.f64 %fd28, 0d3F8111111111A24E;
.loc 3 243 10
fma.rn.f64 %fd29, %fd27, %fd16, %fd28;
mov.f64 %fd30, 0d3FA555555555211D;
.loc 3 243 10
fma.rn.f64 %fd31, %fd29, %fd16, %fd30;
mov.f64 %fd32, 0d3FC5555555555530;
.loc 3 243 10
fma.rn.f64 %fd33, %fd31, %fd16, %fd32;
mov.f64 %fd34, 0d3FE0000000000005;
.loc 3 243 10
fma.rn.f64 %fd35, %fd33, %fd16, %fd34;
mov.f64 %fd36, 0d3FF0000000000000;
.loc 3 243 10
fma.rn.f64 %fd37, %fd35, %fd16, %fd36;
fma.rn.f64 %fd40, %fd37, %fd16, %fd36;
abs.s32 %r27, %r9;
setp.lt.s32 %p8, %r27, 1023;
@%p8 bra BB19_8;
add.s32 %r28, %r9, 2046;
shl.b32 %r29, %r28, 19;
and.b32 %r30, %r29, -1048576;
shl.b32 %r31, %r28, 20;
sub.s32 %r40, %r31, %r30;
mov.u32 %r32, 0;
.loc 3 243 10
mov.b64 %fd38, {%r32, %r30};
mul.f64 %fd40, %fd40, %fd38;
bra.uni BB19_9;
BB19_8:
.loc 3 243 10
shl.b32 %r33, %r9, 20;
add.s32 %r40, %r33, 1072693248;
BB19_9:
mov.u32 %r34, 0;
.loc 3 243 10
mov.b64 %fd39, {%r34, %r40};
mul.f64 %fd41, %fd40, %fd39;
BB19_10:
.loc 2 26 42
add.s32 %r35, %r7, %r6;
mul.wide.s32 %rd7, %r35, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 26 42
st.global.f64 [%rd8], %fd41;
.loc 2 26 22
add.s32 %r13, %r3, %r7;
.loc 2 26 1
setp.lt.s32 %p9, %r13, %r15;
mov.u32 %r39, %r13;
@%p9 bra BB19_4;
BB19_11:
.loc 2 26 22
mov.u32 %r36, %nctaid.x;
mad.lo.s32 %r38, %r36, %r19, %r38;
.loc 2 26 1
setp.lt.s32 %p10, %r38, %r16;
@%p10 bra BB19_2;
BB19_12:
.loc 2 26 2
ret;
}
.visible .entry map_expm1_double(
.param .u32 map_expm1_double_param_0,
.param .u32 map_expm1_double_param_1,
.param .u64 map_expm1_double_param_2,
.param .u32 map_expm1_double_param_3,
.param .u64 map_expm1_double_param_4,
.param .u32 map_expm1_double_param_5
)
{
.reg .pred %p<13>;
.reg .s32 %r<37>;
.reg .s64 %rd<9>;
.reg .f64 %fd<45>;
ld.param.u32 %r11, [map_expm1_double_param_0];
ld.param.u32 %r12, [map_expm1_double_param_1];
ld.param.u64 %rd3, [map_expm1_double_param_2];
ld.param.u32 %r13, [map_expm1_double_param_3];
ld.param.u64 %rd4, [map_expm1_double_param_4];
ld.param.u32 %r14, [map_expm1_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 27 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r35, %r15, %r16, %r17;
.loc 2 27 1
setp.ge.s32 %p1, %r35, %r12;
@%p1 bra BB20_9;
.loc 2 27 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r2, %r19, %r20, %r18;
.loc 2 27 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r3, %r21, %r19;
BB20_2:
.loc 2 27 1
setp.ge.s32 %p2, %r2, %r11;
@%p2 bra BB20_8;
.loc 2 27 1
mul.lo.s32 %r5, %r35, %r14;
.loc 2 27 42
mul.lo.s32 %r6, %r35, %r13;
mov.u32 %r36, %r2;
BB20_4:
.loc 2 27 1
mov.u32 %r7, %r36;
add.s32 %r22, %r7, %r5;
mul.wide.s32 %rd5, %r22, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 27 1
ld.global.f64 %fd1, [%rd6];
.loc 3 258 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
setp.lt.u32 %p3, %r8, 1082535491;
setp.lt.s32 %p4, %r8, -1068859392;
or.pred %p5, %p3, %p4;
@%p5 bra BB20_6;
setp.lt.s32 %p6, %r8, 0;
selp.f64 %fd5, 0dBFF0000000000000, 0d7FF0000000000000, %p6;
abs.f64 %fd6, %fd1;
setp.gtu.f64 %p7, %fd6, 0d7FF0000000000000;
add.f64 %fd7, %fd1, %fd1;
selp.f64 %fd44, %fd7, %fd5, %p7;
bra.uni BB20_7;
BB20_6:
.loc 3 258 10
mul.f64 %fd8, %fd1, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd9, %fd8;
cvt.rzi.s32.f64 %r23, %fd9;
mov.f64 %fd10, 0dBFE62E42FEFA39EF;
.loc 3 258 10
fma.rn.f64 %fd11, %fd9, %fd10, %fd1;
mov.f64 %fd12, 0dBC7ABC9E3B39803F;
.loc 3 258 10
fma.rn.f64 %fd13, %fd9, %fd12, %fd11;
shl.b32 %r24, %r8, 1;
setp.lt.u32 %p8, %r24, 2142496327;
selp.b32 %r25, 0, %r23, %p8;
mov.u32 %r26, 0;
.loc 3 258 10
selp.f64 %fd14, %fd1, %fd13, %p8;
mov.f64 %fd15, 0d3E5AF86D8EBD13CD;
mov.f64 %fd16, 0d3E21F4076ACD15B6;
.loc 3 258 10
fma.rn.f64 %fd17, %fd16, %fd14, %fd15;
mov.f64 %fd18, 0d3E927E5092BA033D;
.loc 3 258 10
fma.rn.f64 %fd19, %fd17, %fd14, %fd18;
mov.f64 %fd20, 0d3EC71DDE6C5F9DA1;
.loc 3 258 10
fma.rn.f64 %fd21, %fd19, %fd14, %fd20;
mov.f64 %fd22, 0d3EFA01A018D034E6;
.loc 3 258 10
fma.rn.f64 %fd23, %fd21, %fd14, %fd22;
mov.f64 %fd24, 0d3F2A01A01B3B6940;
.loc 3 258 10
fma.rn.f64 %fd25, %fd23, %fd14, %fd24;
mov.f64 %fd26, 0d3F56C16C16C1B5DD;
.loc 3 258 10
fma.rn.f64 %fd27, %fd25, %fd14, %fd26;
mov.f64 %fd28, 0d3F8111111110F74D;
.loc 3 258 10
fma.rn.f64 %fd29, %fd27, %fd14, %fd28;
mov.f64 %fd30, 0d3FA555555555554D;
.loc 3 258 10
fma.rn.f64 %fd31, %fd29, %fd14, %fd30;
mov.f64 %fd32, 0d3FC5555555555557;
.loc 3 258 10
fma.rn.f64 %fd33, %fd31, %fd14, %fd32;
mov.f64 %fd34, 0d3FE0000000000000;
.loc 3 258 10
fma.rn.f64 %fd35, %fd33, %fd14, %fd34;
mul.f64 %fd36, %fd35, %fd14;
fma.rn.f64 %fd37, %fd36, %fd14, %fd14;
setp.eq.s32 %p9, %r25, 1024;
selp.b32 %r27, -1, 0, %p9;
add.s32 %r28, %r27, %r25;
shl.b32 %r29, %r28, 20;
add.s32 %r30, %r29, 1072693248;
mov.u32 %r31, 1072693248;
.loc 3 258 10
mov.b64 %fd38, {%r26, %r30};
mov.b64 %fd39, {%r26, %r31};
sub.f64 %fd40, %fd38, %fd39;
fma.rn.f64 %fd41, %fd37, %fd38, %fd40;
add.f64 %fd42, %fd41, %fd41;
selp.f64 %fd43, %fd42, %fd41, %p9;
setp.eq.s32 %p10, %r24, 0;
selp.f64 %fd44, %fd14, %fd43, %p10;
BB20_7:
.loc 2 27 42
add.s32 %r32, %r7, %r6;
mul.wide.s32 %rd7, %r32, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 27 42
st.global.f64 [%rd8], %fd44;
.loc 2 27 22
add.s32 %r9, %r3, %r7;
.loc 2 27 1
setp.lt.s32 %p11, %r9, %r11;
mov.u32 %r36, %r9;
@%p11 bra BB20_4;
BB20_8:
.loc 2 27 22
mov.u32 %r33, %nctaid.x;
mad.lo.s32 %r35, %r33, %r15, %r35;
.loc 2 27 1
setp.lt.s32 %p12, %r35, %r12;
@%p12 bra BB20_2;
BB20_9:
.loc 2 27 2
ret;
}
.visible .entry map_fabs_double(
.param .u32 map_fabs_double_param_0,
.param .u32 map_fabs_double_param_1,
.param .u64 map_fabs_double_param_2,
.param .u32 map_fabs_double_param_3,
.param .u64 map_fabs_double_param_4,
.param .u32 map_fabs_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_fabs_double_param_0];
ld.param.u32 %r13, [map_fabs_double_param_1];
ld.param.u64 %rd3, [map_fabs_double_param_2];
ld.param.u32 %r14, [map_fabs_double_param_3];
ld.param.u64 %rd4, [map_fabs_double_param_4];
ld.param.u32 %r15, [map_fabs_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 28 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 28 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB21_6;
.loc 2 28 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 28 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 28 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB21_2:
.loc 2 28 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB21_5;
.loc 2 28 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 28 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB21_4:
.loc 2 28 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 28 1
ld.global.f64 %fd1, [%rd6];
.loc 4 2755 10
abs.f64 %fd2, %fd1;
.loc 2 28 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 28 42
st.global.f64 [%rd8], %fd2;
.loc 2 28 22
add.s32 %r10, %r5, %r9;
.loc 2 28 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB21_4;
BB21_5:
.loc 2 28 22
add.s32 %r25, %r4, %r25;
.loc 2 28 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB21_2;
BB21_6:
.loc 2 28 2
ret;
}
.visible .entry map_floor_double(
.param .u32 map_floor_double_param_0,
.param .u32 map_floor_double_param_1,
.param .u64 map_floor_double_param_2,
.param .u32 map_floor_double_param_3,
.param .u64 map_floor_double_param_4,
.param .u32 map_floor_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_floor_double_param_0];
ld.param.u32 %r13, [map_floor_double_param_1];
ld.param.u64 %rd3, [map_floor_double_param_2];
ld.param.u32 %r14, [map_floor_double_param_3];
ld.param.u64 %rd4, [map_floor_double_param_4];
ld.param.u32 %r15, [map_floor_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 29 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 29 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB22_6;
.loc 2 29 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 29 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 29 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB22_2:
.loc 2 29 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB22_5;
.loc 2 29 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 29 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB22_4:
.loc 2 29 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 29 1
ld.global.f64 %fd1, [%rd6];
.loc 4 2745 10
cvt.rmi.f64.f64 %fd2, %fd1;
.loc 2 29 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 29 42
st.global.f64 [%rd8], %fd2;
.loc 2 29 22
add.s32 %r10, %r5, %r9;
.loc 2 29 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB22_4;
BB22_5:
.loc 2 29 22
add.s32 %r25, %r4, %r25;
.loc 2 29 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB22_2;
BB22_6:
.loc 2 29 2
ret;
}
.visible .entry map_j0_double(
.param .u32 map_j0_double_param_0,
.param .u32 map_j0_double_param_1,
.param .u64 map_j0_double_param_2,
.param .u32 map_j0_double_param_3,
.param .u64 map_j0_double_param_4,
.param .u32 map_j0_double_param_5
)
{
.local .align 4 .b8 __local_depot23[8];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<15>;
.reg .f32 %f<5>;
.reg .s32 %r<44>;
.reg .s64 %rd<20>;
.reg .f64 %fd<217>;
mov.u64 %SPL, __local_depot23;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r15, [map_j0_double_param_0];
ld.param.u32 %r16, [map_j0_double_param_1];
ld.param.u64 %rd1, [map_j0_double_param_2];
ld.param.u32 %r17, [map_j0_double_param_3];
ld.param.u64 %rd2, [map_j0_double_param_4];
ld.param.u32 %r18, [map_j0_double_param_5];
.loc 2 30 1
mov.u32 %r19, %ntid.x;
mov.u32 %r20, %ctaid.x;
mov.u32 %r21, %tid.x;
mad.lo.s32 %r40, %r19, %r20, %r21;
.loc 2 30 1
setp.ge.s32 %p1, %r40, %r16;
@%p1 bra BB23_24;
.loc 2 30 1
mov.u32 %r22, %ntid.y;
.loc 2 30 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r2, %r23, %r22;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd17, %rd1;
BB23_2:
.loc 2 30 1
mov.u32 %r24, %ctaid.y;
mov.u32 %r26, %tid.y;
mad.lo.s32 %r41, %r22, %r24, %r26;
.loc 2 30 1
setp.ge.s32 %p2, %r41, %r15;
@%p2 bra BB23_23;
BB23_3:
.loc 2 30 1
mad.lo.s32 %r31, %r40, %r18, %r41;
mul.wide.s32 %rd4, %r31, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd25, [%rd5];
.loc 3 333 10
abs.f64 %fd1, %fd25;
setp.gtu.f64 %p3, %fd1, 0d400FB319F277BBE5;
@%p3 bra BB23_5;
add.f64 %fd26, %fd1, 0dC0033D152E971B40;
add.f64 %fd27, %fd26, 0d3CA0F539D7DA258E;
mov.f64 %fd28, 0dBCFCF8F9A8C294BC;
mov.f64 %fd29, 0dBCC0D18564C48C61;
.loc 3 333 10
fma.rn.f64 %fd30, %fd29, %fd27, %fd28;
mov.f64 %fd31, 0d3D3FAB983CAE498B;
.loc 3 333 10
fma.rn.f64 %fd32, %fd30, %fd27, %fd31;
mov.f64 %fd33, 0d3D7CD7C018579B88;
.loc 3 333 10
fma.rn.f64 %fd34, %fd32, %fd27, %fd33;
mov.f64 %fd35, 0dBDBBDD2342D64FDD;
.loc 3 333 10
fma.rn.f64 %fd36, %fd34, %fd27, %fd35;
mov.f64 %fd37, 0dBDF5C2D9416B1E2B;
.loc 3 333 10
fma.rn.f64 %fd38, %fd36, %fd27, %fd37;
mov.f64 %fd39, 0d3E32951D73174DD5;
.loc 3 333 10
fma.rn.f64 %fd40, %fd38, %fd27, %fd39;
mov.f64 %fd41, 0d3E67FF99802CAEB5;
.loc 3 333 10
fma.rn.f64 %fd42, %fd40, %fd27, %fd41;
mov.f64 %fd43, 0dBEA1CCE305C4C9F7;
.loc 3 333 10
fma.rn.f64 %fd44, %fd42, %fd27, %fd43;
mov.f64 %fd45, 0dBED232C77E29E1BB;
.loc 3 333 10
fma.rn.f64 %fd46, %fd44, %fd27, %fd45;
mov.f64 %fd47, 0d3F06ED3B9F0EF757;
.loc 3 333 10
fma.rn.f64 %fd48, %fd46, %fd27, %fd47;
mov.f64 %fd49, 0d3F315382BA096A62;
.loc 3 333 10
fma.rn.f64 %fd50, %fd48, %fd27, %fd49;
mov.f64 %fd51, 0dBF61F992590D1AE4;
.loc 3 333 10
fma.rn.f64 %fd52, %fd50, %fd27, %fd51;
mov.f64 %fd53, 0dBF81BB1CBE1A465F;
.loc 3 333 10
fma.rn.f64 %fd54, %fd52, %fd27, %fd53;
mov.f64 %fd55, 0d3FACFAE864368D84;
.loc 3 333 10
fma.rn.f64 %fd56, %fd54, %fd27, %fd55;
mov.f64 %fd57, 0d3FBBA1DEEA0294A3;
.loc 3 333 10
fma.rn.f64 %fd58, %fd56, %fd27, %fd57;
mov.f64 %fd59, 0dBFE09CDB36551280;
.loc 3 333 10
fma.rn.f64 %fd60, %fd58, %fd27, %fd59;
mul.f64 %fd216, %fd60, %fd27;
bra.uni BB23_22;
BB23_5:
.loc 3 333 10
setp.gtu.f64 %p4, %fd1, 0d401C58FD1A62F5EC;
@%p4 bra BB23_7;
add.f64 %fd61, %fd1, 0dC016148F5B2C2E45;
add.f64 %fd62, %fd61, 0dBC975054CD60A517;
mov.f64 %fd63, 0d3CF83FD1F333EB61;
mov.f64 %fd64, 0d3CBCB0A8F126B343;
.loc 3 333 10
fma.rn.f64 %fd65, %fd64, %fd62, %fd63;
mov.f64 %fd66, 0dBD4100E33E3FB413;
.loc 3 333 10
fma.rn.f64 %fd67, %fd65, %fd62, %fd66;
mov.f64 %fd68, 0dBD7846076D004627;
.loc 3 333 10
fma.rn.f64 %fd69, %fd67, %fd62, %fd68;
mov.f64 %fd70, 0d3DBE2F1D4F90720D;
.loc 3 333 10
fma.rn.f64 %fd71, %fd69, %fd62, %fd70;
mov.f64 %fd72, 0d3DF1D03B1E4A119B;
.loc 3 333 10
fma.rn.f64 %fd73, %fd71, %fd62, %fd72;
mov.f64 %fd74, 0dBE341D72B1B3BCE9;
.loc 3 333 10
fma.rn.f64 %fd75, %fd73, %fd62, %fd74;
mov.f64 %fd76, 0dBE62DA37CE2A9EF8;
.loc 3 333 10
fma.rn.f64 %fd77, %fd75, %fd62, %fd76;
mov.f64 %fd78, 0d3EA32E6D9974F763;
.loc 3 333 10
fma.rn.f64 %fd79, %fd77, %fd62, %fd78;
mov.f64 %fd80, 0d3ECAD77D744A1879;
.loc 3 333 10
fma.rn.f64 %fd81, %fd79, %fd62, %fd80;
mov.f64 %fd82, 0dBF0863F481A37337;
.loc 3 333 10
fma.rn.f64 %fd83, %fd81, %fd62, %fd82;
mov.f64 %fd84, 0dBF26F641F418F0F4;
.loc 3 333 10
fma.rn.f64 %fd85, %fd83, %fd62, %fd84;
mov.f64 %fd86, 0d3F627E31FE9A969E;
.loc 3 333 10
fma.rn.f64 %fd87, %fd85, %fd62, %fd86;
mov.f64 %fd88, 0d3F72F7FFE9025628;
.loc 3 333 10
fma.rn.f64 %fd89, %fd87, %fd62, %fd88;
mov.f64 %fd90, 0dBFAB2150CB41E8BF;
.loc 3 333 10
fma.rn.f64 %fd91, %fd89, %fd62, %fd90;
mov.f64 %fd92, 0dBF9F8F72E7A848DE;
.loc 3 333 10
fma.rn.f64 %fd93, %fd91, %fd62, %fd92;
mov.f64 %fd94, 0d3FD5C6E60A097823;
.loc 3 333 10
fma.rn.f64 %fd95, %fd93, %fd62, %fd94;
mul.f64 %fd216, %fd95, %fd62;
bra.uni BB23_22;
BB23_7:
.loc 3 333 10
setp.gtu.f64 %p5, %fd1, 0d402471FCB6A7A8C0;
@%p5 bra BB23_9;
add.f64 %fd96, %fd1, 0dC0214EB56CCCDECA;
add.f64 %fd97, %fd96, 0d3CB51970714C7C25;
mov.f64 %fd98, 0dBCF4B3A71AAAC629;
mov.f64 %fd99, 0dBCBDB7FFCF659E24;
.loc 3 333 10
fma.rn.f64 %fd100, %fd99, %fd97, %fd98;
mov.f64 %fd101, 0d3D417EC150ECDCE7;
.loc 3 333 10
fma.rn.f64 %fd102, %fd100, %fd97, %fd101;
mov.f64 %fd103, 0d3D7438F5EA1D10B2;
.loc 3 333 10
fma.rn.f64 %fd104, %fd102, %fd97, %fd103;
mov.f64 %fd105, 0dBDBEDAE7EC2C9E87;
.loc 3 333 10
fma.rn.f64 %fd106, %fd104, %fd97, %fd105;
mov.f64 %fd107, 0dBDECADD2C4B91F58;
.loc 3 333 10
fma.rn.f64 %fd108, %fd106, %fd97, %fd107;
mov.f64 %fd109, 0d3E34582C8EE12204;
.loc 3 333 10
fma.rn.f64 %fd110, %fd108, %fd97, %fd109;
mov.f64 %fd111, 0d3E5CEDA451DD20F8;
.loc 3 333 10
fma.rn.f64 %fd112, %fd110, %fd97, %fd111;
mov.f64 %fd113, 0dBEA30E8CC3165E2F;
.loc 3 333 10
fma.rn.f64 %fd114, %fd112, %fd97, %fd113;
mov.f64 %fd115, 0dBEC3324842BB1A2E;
.loc 3 333 10
fma.rn.f64 %fd116, %fd114, %fd97, %fd115;
mov.f64 %fd117, 0d3F07800BC54FBDDB;
.loc 3 333 10
fma.rn.f64 %fd118, %fd116, %fd97, %fd117;
mov.f64 %fd119, 0d3F1D79605276949A;
.loc 3 333 10
fma.rn.f64 %fd120, %fd118, %fd97, %fd119;
mov.f64 %fd121, 0dBF60E0D60385A629;
.loc 3 333 10
fma.rn.f64 %fd122, %fd120, %fd97, %fd121;
mov.f64 %fd123, 0dBF648E63600D82F3;
.loc 3 333 10
fma.rn.f64 %fd124, %fd122, %fd97, %fd123;
mov.f64 %fd125, 0d3FA68B984EC6493A;
.loc 3 333 10
fma.rn.f64 %fd126, %fd124, %fd97, %fd125;
mov.f64 %fd127, 0d3F900F7FCF183E0B;
.loc 3 333 10
fma.rn.f64 %fd128, %fd126, %fd97, %fd127;
mov.f64 %fd129, 0dBFD15F7977A772D4;
.loc 3 333 10
fma.rn.f64 %fd130, %fd128, %fd97, %fd129;
mul.f64 %fd216, %fd130, %fd97;
bra.uni BB23_22;
BB23_9:
.loc 3 333 10
abs.f64 %fd131, %fd1;
setp.neu.f64 %p6, %fd131, 0d7FF0000000000000;
@%p6 bra BB23_11;
mov.f64 %fd216, 0d0000000000000000;
bra.uni BB23_22;
BB23_11:
add.u64 %rd6, %SP, 4;
.loc 3 333 10
// inline asm
cvt.rn.f32.f64 %f1,%fd1;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd134,%f2;
// inline asm
neg.f64 %fd135, %fd1;
mov.f64 %fd136, 0d3FF0000000000000;
.loc 3 333 10
fma.rn.f64 %fd137, %fd135, %fd134, %fd136;
fma.rn.f64 %fd138, %fd137, %fd137, %fd137;
fma.rn.f64 %fd139, %fd138, %fd134, %fd134;
mul.f64 %fd140, %fd139, %fd139;
mov.f64 %fd141, 0d409927467A655012;
mov.f64 %fd142, 0dC0D115CB8C11A9DC;
.loc 3 333 10
fma.rn.f64 %fd143, %fd142, %fd140, %fd141;
mov.f64 %fd144, 0dC05751787E247BD4;
.loc 3 333 10
fma.rn.f64 %fd145, %fd143, %fd140, %fd144;
mov.f64 %fd146, 0d401704C4E5FC36B2;
.loc 3 333 10
fma.rn.f64 %fd147, %fd145, %fd140, %fd146;
mov.f64 %fd148, 0dBFE15B747A2FD531;
.loc 3 333 10
fma.rn.f64 %fd149, %fd147, %fd140, %fd148;
mov.f64 %fd150, 0d3FBA7FEACF6CB79B;
.loc 3 333 10
fma.rn.f64 %fd151, %fd149, %fd140, %fd150;
mov.f64 %fd152, 0dBFAFFFFFEDDCF548;
.loc 3 333 10
fma.rn.f64 %fd153, %fd151, %fd140, %fd152;
mov.f64 %fd154, 0d3FEFFFFFFFFFC9E5;
.loc 3 333 10
fma.rn.f64 %fd155, %fd153, %fd140, %fd154;
mov.f64 %fd156, 0d410ECD4523B12B84;
mov.f64 %fd157, 0dC14602FE1C34685E;
.loc 3 333 10
fma.rn.f64 %fd158, %fd157, %fd140, %fd156;
mov.f64 %fd159, 0dC0C7A2FC1972F05A;
.loc 3 333 10
fma.rn.f64 %fd160, %fd158, %fd140, %fd159;
mov.f64 %fd161, 0d407EBA131F7E5BEB;
.loc 3 333 10
fma.rn.f64 %fd162, %fd160, %fd140, %fd161;
mov.f64 %fd163, 0dC0373B92E6E7CC7D;
.loc 3 333 10
fma.rn.f64 %fd164, %fd162, %fd140, %fd163;
mov.f64 %fd165, 0d3FFA31BEE63A2F08;
.loc 3 333 10
fma.rn.f64 %fd166, %fd164, %fd140, %fd165;
mov.f64 %fd167, 0dBFCAD320104D5D05;
.loc 3 333 10
fma.rn.f64 %fd168, %fd166, %fd140, %fd167;
mov.f64 %fd169, 0d3FB0AAAA9C76D07E;
.loc 3 333 10
fma.rn.f64 %fd170, %fd168, %fd140, %fd169;
mov.f64 %fd171, 0dBFBFFFFFFFFDACEC;
.loc 3 333 10
fma.rn.f64 %fd172, %fd170, %fd140, %fd171;
fma.rn.f64 %fd5, %fd172, %fd139, %fd1;
rsqrt.approx.f64 %fd173, %fd1;
mul.f64 %fd174, %fd173, 0d3FE9884533D43651;
mul.f64 %fd6, %fd174, %fd155;
mul.f64 %fd175, %fd5, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r42, %fd175;
cvta.to.local.u64 %rd7, %rd6;
.loc 3 333 10
st.local.u32 [%rd7], %r42;
cvt.rn.f64.s32 %fd176, %r42;
neg.f64 %fd177, %fd176;
mov.f64 %fd178, 0d3FF921FB54442D18;
.loc 3 333 10
fma.rn.f64 %fd179, %fd177, %fd178, %fd5;
mov.f64 %fd180, 0d3C91A62633145C00;
.loc 3 333 10
fma.rn.f64 %fd181, %fd177, %fd180, %fd179;
mov.f64 %fd182, 0d397B839A252049C0;
.loc 3 333 10
fma.rn.f64 %fd212, %fd177, %fd182, %fd181;
abs.f64 %fd183, %fd5;
setp.leu.f64 %p7, %fd183, 0d41E0000000000000;
@%p7 bra BB23_13;
// Callseq Start 1
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd5;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 3 333 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd212, [retval0+0];
}
// Callseq End 1
ld.local.u32 %r42, [%rd7];
BB23_13:
and.b32 %r32, %r42, 3;
cvt.rn.f64.s32 %fd184, %r32;
add.f64 %fd185, %fd212, 0dBFE921FB54442D18;
fma.rn.f64 %fd213, %fd184, 0d3FF921FB54442D18, %fd185;
abs.f64 %fd186, %fd213;
setp.neu.f64 %p8, %fd186, 0d7FF0000000000000;
@%p8 bra BB23_15;
mov.f64 %fd187, 0d0000000000000000;
.loc 3 333 10
mul.rn.f64 %fd213, %fd213, %fd187;
BB23_15:
add.u64 %rd10, %SP, 0;
.loc 3 333 10
mul.f64 %fd188, %fd213, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r43, %fd188;
cvta.to.local.u64 %rd11, %rd10;
.loc 3 333 10
st.local.u32 [%rd11], %r43;
cvt.rn.f64.s32 %fd189, %r43;
neg.f64 %fd190, %fd189;
fma.rn.f64 %fd192, %fd190, %fd178, %fd213;
fma.rn.f64 %fd194, %fd190, %fd180, %fd192;
fma.rn.f64 %fd214, %fd190, %fd182, %fd194;
abs.f64 %fd196, %fd213;
setp.leu.f64 %p9, %fd196, 0d41E0000000000000;
@%p9 bra BB23_17;
// Callseq Start 2
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd213;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 3 333 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd214, [retval0+0];
}
// Callseq End 2
ld.local.u32 %r43, [%rd11];
BB23_17:
add.s32 %r12, %r43, 1;
shl.b32 %r33, %r12, 3;
and.b32 %r34, %r33, 8;
and.b32 %r35, %r12, 1;
setp.eq.b32 %p10, %r35, 1;
not.pred %p11, %p10;
selp.f64 %fd197, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p11;
mul.wide.u32 %rd14, %r34, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 3 333 10
ld.const.f64 %fd198, [%rd16+8];
mul.rn.f64 %fd16, %fd214, %fd214;
fma.rn.f64 %fd199, %fd197, %fd16, %fd198;
ld.const.f64 %fd200, [%rd16+16];
fma.rn.f64 %fd201, %fd199, %fd16, %fd200;
ld.const.f64 %fd202, [%rd16+24];
fma.rn.f64 %fd203, %fd201, %fd16, %fd202;
ld.const.f64 %fd204, [%rd16+32];
fma.rn.f64 %fd205, %fd203, %fd16, %fd204;
ld.const.f64 %fd206, [%rd16+40];
fma.rn.f64 %fd207, %fd205, %fd16, %fd206;
ld.const.f64 %fd208, [%rd16+48];
fma.rn.f64 %fd17, %fd207, %fd16, %fd208;
fma.rn.f64 %fd215, %fd17, %fd214, %fd214;
@%p11 bra BB23_19;
fma.rn.f64 %fd215, %fd17, %fd16, %fd136;
BB23_19:
and.b32 %r36, %r12, 2;
setp.eq.s32 %p12, %r36, 0;
@%p12 bra BB23_21;
mov.f64 %fd210, 0d0000000000000000;
mov.f64 %fd211, 0dBFF0000000000000;
.loc 3 333 10
fma.rn.f64 %fd215, %fd215, %fd211, %fd210;
BB23_21:
mul.f64 %fd216, %fd6, %fd215;
BB23_22:
.loc 2 30 42
mad.lo.s32 %r37, %r40, %r17, %r41;
mul.wide.s32 %rd18, %r37, 8;
add.s64 %rd19, %rd17, %rd18;
st.global.f64 [%rd19], %fd216;
.loc 2 30 22
add.s32 %r41, %r2, %r41;
.loc 2 30 1
setp.lt.s32 %p13, %r41, %r15;
@%p13 bra BB23_3;
BB23_23:
.loc 2 30 22
mov.u32 %r38, %nctaid.x;
mad.lo.s32 %r40, %r38, %r19, %r40;
.loc 2 30 1
setp.lt.s32 %p14, %r40, %r16;
@%p14 bra BB23_2;
BB23_24:
.loc 2 30 2
ret;
}
.visible .entry map_j1_double(
.param .u32 map_j1_double_param_0,
.param .u32 map_j1_double_param_1,
.param .u64 map_j1_double_param_2,
.param .u32 map_j1_double_param_3,
.param .u64 map_j1_double_param_4,
.param .u32 map_j1_double_param_5
)
{
.local .align 4 .b8 __local_depot24[8];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<17>;
.reg .f32 %f<5>;
.reg .s32 %r<43>;
.reg .s64 %rd<20>;
.reg .f64 %fd<215>;
mov.u64 %SPL, __local_depot24;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r14, [map_j1_double_param_0];
ld.param.u32 %r15, [map_j1_double_param_1];
ld.param.u64 %rd1, [map_j1_double_param_2];
ld.param.u32 %r16, [map_j1_double_param_3];
ld.param.u64 %rd2, [map_j1_double_param_4];
ld.param.u32 %r17, [map_j1_double_param_5];
.loc 2 31 1
mov.u32 %r18, %ntid.x;
mov.u32 %r19, %ctaid.x;
mov.u32 %r20, %tid.x;
mad.lo.s32 %r39, %r18, %r19, %r20;
.loc 2 31 1
setp.ge.s32 %p1, %r39, %r15;
@%p1 bra BB24_24;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd17, %rd1;
BB24_2:
.loc 2 31 1
mov.u32 %r21, %ctaid.y;
mov.u32 %r22, %ntid.y;
mov.u32 %r23, %tid.y;
mad.lo.s32 %r40, %r22, %r21, %r23;
.loc 2 31 1
setp.ge.s32 %p2, %r40, %r14;
@%p2 bra BB24_23;
BB24_3:
.loc 2 31 1
mad.lo.s32 %r28, %r39, %r17, %r40;
mul.wide.s32 %rd4, %r28, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 3 338 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d400353AABAD7B784;
@%p3 bra BB24_5;
mov.f64 %fd26, 0dBD4DD167A0DC3F55;
mov.f64 %fd27, 0d3D020E4ADCDE2AD3;
.loc 3 338 10
fma.rn.f64 %fd28, %fd27, %fd2, %fd26;
mov.f64 %fd29, 0d3D5503F5A491E487;
.loc 3 338 10
fma.rn.f64 %fd30, %fd28, %fd2, %fd29;
mov.f64 %fd31, 0d3DC1F29940C2403A;
.loc 3 338 10
fma.rn.f64 %fd32, %fd30, %fd2, %fd31;
mov.f64 %fd33, 0d3D84CF9302EACDEF;
.loc 3 338 10
fma.rn.f64 %fd34, %fd32, %fd2, %fd33;
mov.f64 %fd35, 0dBE384A53DBBCA436;
.loc 3 338 10
fma.rn.f64 %fd36, %fd34, %fd2, %fd35;
mov.f64 %fd37, 0d3D9779BEE4F63BCC;
.loc 3 338 10
fma.rn.f64 %fd38, %fd36, %fd2, %fd37;
mov.f64 %fd39, 0d3EA6C160E414F3F0;
.loc 3 338 10
fma.rn.f64 %fd40, %fd38, %fd2, %fd39;
mov.f64 %fd41, 0d3D8F3D2F12430699;
.loc 3 338 10
fma.rn.f64 %fd42, %fd40, %fd2, %fd41;
mov.f64 %fd43, 0dBF0C71C72C0CED04;
.loc 3 338 10
fma.rn.f64 %fd44, %fd42, %fd2, %fd43;
mov.f64 %fd45, 0d3D659BCA506F1128;
.loc 3 338 10
fma.rn.f64 %fd46, %fd44, %fd2, %fd45;
mov.f64 %fd47, 0d3F65555555506982;
.loc 3 338 10
fma.rn.f64 %fd48, %fd46, %fd2, %fd47;
mov.f64 %fd49, 0d3D15BA0B425F1BFB;
.loc 3 338 10
fma.rn.f64 %fd50, %fd48, %fd2, %fd49;
mov.f64 %fd51, 0dBFB0000000000065;
.loc 3 338 10
fma.rn.f64 %fd52, %fd50, %fd2, %fd51;
mov.f64 %fd53, 0d3C8729A7253FB679;
.loc 3 338 10
fma.rn.f64 %fd54, %fd52, %fd2, %fd53;
mov.f64 %fd55, 0d3FE0000000000000;
.loc 3 338 10
fma.rn.f64 %fd56, %fd54, %fd2, %fd55;
mul.f64 %fd214, %fd56, %fd2;
bra.uni BB24_22;
BB24_5:
.loc 3 338 10
setp.gtu.f64 %p4, %fd2, 0d4015B1D0574614EA;
@%p4 bra BB24_7;
add.f64 %fd57, %fd2, 0dC00EA75575AF6F09;
add.f64 %fd58, %fd57, 0d3CA60155A9D1B256;
mov.f64 %fd59, 0d3D41011A1DF02DAD;
mov.f64 %fd60, 0dBCF8D3CDBB60175E;
.loc 3 338 10
fma.rn.f64 %fd61, %fd60, %fd58, %fd59;
mov.f64 %fd62, 0d3D76013AC1E5E222;
.loc 3 338 10
fma.rn.f64 %fd63, %fd61, %fd58, %fd62;
mov.f64 %fd64, 0dBDBEC315D96D5F03;
.loc 3 338 10
fma.rn.f64 %fd65, %fd63, %fd58, %fd64;
mov.f64 %fd66, 0dBDF03BE1B4B57207;
.loc 3 338 10
fma.rn.f64 %fd67, %fd65, %fd58, %fd66;
mov.f64 %fd68, 0d3E345695F8B660F7;
.loc 3 338 10
fma.rn.f64 %fd69, %fd67, %fd58, %fd68;
mov.f64 %fd70, 0d3E617069FCFCFFF4;
.loc 3 338 10
fma.rn.f64 %fd71, %fd69, %fd58, %fd70;
mov.f64 %fd72, 0dBEA33825C36745EB;
.loc 3 338 10
fma.rn.f64 %fd73, %fd71, %fd58, %fd72;
mov.f64 %fd74, 0dBEC9799D4F90931B;
.loc 3 338 10
fma.rn.f64 %fd75, %fd73, %fd58, %fd74;
mov.f64 %fd76, 0d3F083A06E2F7DF13;
.loc 3 338 10
fma.rn.f64 %fd77, %fd75, %fd58, %fd76;
mov.f64 %fd78, 0d3F26E4C2D53A7CF6;
.loc 3 338 10
fma.rn.f64 %fd79, %fd77, %fd58, %fd78;
mov.f64 %fd80, 0dBF624B3409957B1C;
.loc 3 338 10
fma.rn.f64 %fd81, %fd79, %fd58, %fd80;
mov.f64 %fd82, 0dBF7537544C3325DF;
.loc 3 338 10
fma.rn.f64 %fd83, %fd81, %fd58, %fd82;
mov.f64 %fd84, 0d3FAB589D1DA138E2;
.loc 3 338 10
fma.rn.f64 %fd85, %fd83, %fd58, %fd84;
mov.f64 %fd86, 0d3FAAE8A39F51AD13;
.loc 3 338 10
fma.rn.f64 %fd87, %fd85, %fd58, %fd86;
mov.f64 %fd88, 0dBFD9C6CF582CBF7F;
.loc 3 338 10
fma.rn.f64 %fd89, %fd87, %fd58, %fd88;
mul.f64 %fd214, %fd89, %fd58;
bra.uni BB24_22;
BB24_7:
.loc 3 338 10
setp.gtu.f64 %p5, %fd2, 0d40213065E54C1AA9;
@%p5 bra BB24_9;
add.f64 %fd90, %fd2, 0dC01C0FF5F3B47250;
add.f64 %fd91, %fd90, 0d3C9B226D9D243827;
mov.f64 %fd92, 0dBD40E8363DB649A9;
mov.f64 %fd93, 0d3CF3EB867515FAD6;
.loc 3 338 10
fma.rn.f64 %fd94, %fd93, %fd91, %fd92;
mov.f64 %fd95, 0dBD73B7DD4A6608FB;
.loc 3 338 10
fma.rn.f64 %fd96, %fd94, %fd91, %fd95;
mov.f64 %fd97, 0d3DBEC5E01482C750;
.loc 3 338 10
fma.rn.f64 %fd98, %fd96, %fd91, %fd97;
mov.f64 %fd99, 0d3DEC62BB9E882103;
.loc 3 338 10
fma.rn.f64 %fd100, %fd98, %fd91, %fd99;
mov.f64 %fd101, 0dBE34462EED732A23;
.loc 3 338 10
fma.rn.f64 %fd102, %fd100, %fd91, %fd101;
mov.f64 %fd103, 0dBE5D48DCAD7DC59B;
.loc 3 338 10
fma.rn.f64 %fd104, %fd102, %fd91, %fd103;
mov.f64 %fd105, 0d3EA3026DF29167E9;
.loc 3 338 10
fma.rn.f64 %fd106, %fd104, %fd91, %fd105;
mov.f64 %fd107, 0d3EC4255B0119666C;
.loc 3 338 10
fma.rn.f64 %fd108, %fd106, %fd91, %fd107;
mov.f64 %fd109, 0dBF0796A751B32693;
.loc 3 338 10
fma.rn.f64 %fd110, %fd108, %fd91, %fd109;
mov.f64 %fd111, 0dBF207358BBDBA284;
.loc 3 338 10
fma.rn.f64 %fd112, %fd110, %fd91, %fd111;
mov.f64 %fd113, 0d3F613FBC7D6927B1;
.loc 3 338 10
fma.rn.f64 %fd114, %fd112, %fd91, %fd113;
mov.f64 %fd115, 0d3F69A4B292E3DD75;
.loc 3 338 10
fma.rn.f64 %fd116, %fd114, %fd91, %fd115;
mov.f64 %fd117, 0dBFA80C83BDEEE4FB;
.loc 3 338 10
fma.rn.f64 %fd118, %fd116, %fd91, %fd117;
mov.f64 %fd119, 0dBF95E70DC60362BF;
.loc 3 338 10
fma.rn.f64 %fd120, %fd118, %fd91, %fd119;
mov.f64 %fd121, 0d3FD33518B3874E8A;
.loc 3 338 10
fma.rn.f64 %fd122, %fd120, %fd91, %fd121;
mul.f64 %fd214, %fd122, %fd91;
bra.uni BB24_22;
BB24_9:
.loc 3 338 10
abs.f64 %fd123, %fd2;
setp.neu.f64 %p6, %fd123, 0d7FF0000000000000;
@%p6 bra BB24_11;
mov.f64 %fd214, 0d0000000000000000;
bra.uni BB24_22;
BB24_11:
add.u64 %rd6, %SP, 4;
.loc 3 338 10
// inline asm
cvt.rn.f32.f64 %f1,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd126,%f2;
// inline asm
neg.f64 %fd127, %fd2;
mov.f64 %fd128, 0d3FF0000000000000;
.loc 3 338 10
fma.rn.f64 %fd129, %fd127, %fd126, %fd128;
fma.rn.f64 %fd130, %fd129, %fd129, %fd129;
fma.rn.f64 %fd131, %fd130, %fd126, %fd126;
mul.f64 %fd132, %fd131, %fd131;
mov.f64 %fd133, 0dC099C06322A3F8BE;
mov.f64 %fd134, 0d40CD02EA3F2F6751;
.loc 3 338 10
fma.rn.f64 %fd135, %fd134, %fd132, %fd133;
mov.f64 %fd136, 0d405B89354DA77324;
.loc 3 338 10
fma.rn.f64 %fd137, %fd135, %fd132, %fd136;
mov.f64 %fd138, 0dC01E352294653188;
.loc 3 338 10
fma.rn.f64 %fd139, %fd137, %fd132, %fd138;
mov.f64 %fd140, 0d3FE9BC7DB16BD7A7;
.loc 3 338 10
fma.rn.f64 %fd141, %fd139, %fd132, %fd140;
mov.f64 %fd142, 0dBFC8BFE1C3A4F741;
.loc 3 338 10
fma.rn.f64 %fd143, %fd141, %fd132, %fd142;
mov.f64 %fd144, 0d3FC7FFFFF0D00BE2;
.loc 3 338 10
fma.rn.f64 %fd145, %fd143, %fd132, %fd144;
mov.f64 %fd146, 0d3FF00000000068CC;
.loc 3 338 10
fma.rn.f64 %fd147, %fd145, %fd132, %fd146;
mov.f64 %fd148, 0d415A30AC6857BEE0;
mov.f64 %fd149, 0dC18DA26B212FDC9A;
.loc 3 338 10
fma.rn.f64 %fd150, %fd149, %fd132, %fd148;
mov.f64 %fd151, 0dC11764222AD7C910;
.loc 3 338 10
fma.rn.f64 %fd152, %fd150, %fd132, %fd151;
mov.f64 %fd153, 0d40CEB02E0C306857;
.loc 3 338 10
fma.rn.f64 %fd154, %fd152, %fd132, %fd153;
mov.f64 %fd155, 0dC08351859FA2B23B;
.loc 3 338 10
fma.rn.f64 %fd156, %fd154, %fd132, %fd155;
mov.f64 %fd157, 0d403E65A07AF51F42;
.loc 3 338 10
fma.rn.f64 %fd158, %fd156, %fd132, %fd157;
mov.f64 %fd159, 0dC002F2B817F77A57;
.loc 3 338 10
fma.rn.f64 %fd160, %fd158, %fd132, %fd159;
mov.f64 %fd161, 0d3FD7BCC34DA069FD;
.loc 3 338 10
fma.rn.f64 %fd162, %fd160, %fd132, %fd161;
mov.f64 %fd163, 0dBFC4FFFFF8A44463;
.loc 3 338 10
fma.rn.f64 %fd164, %fd162, %fd132, %fd163;
mov.f64 %fd165, 0d3FD7FFFFFFFF5CD7;
.loc 3 338 10
fma.rn.f64 %fd166, %fd164, %fd132, %fd165;
fma.rn.f64 %fd6, %fd166, %fd131, %fd2;
rsqrt.approx.f64 %fd167, %fd2;
mul.f64 %fd168, %fd167, 0d3FE9884533D43651;
mul.f64 %fd7, %fd168, %fd147;
mul.f64 %fd169, %fd6, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r41, %fd169;
cvta.to.local.u64 %rd7, %rd6;
.loc 3 338 10
st.local.u32 [%rd7], %r41;
cvt.rn.f64.s32 %fd170, %r41;
neg.f64 %fd171, %fd170;
mov.f64 %fd172, 0d3FF921FB54442D18;
.loc 3 338 10
fma.rn.f64 %fd173, %fd171, %fd172, %fd6;
mov.f64 %fd174, 0d3C91A62633145C00;
.loc 3 338 10
fma.rn.f64 %fd175, %fd171, %fd174, %fd173;
mov.f64 %fd176, 0d397B839A252049C0;
.loc 3 338 10
fma.rn.f64 %fd210, %fd171, %fd176, %fd175;
abs.f64 %fd177, %fd6;
setp.leu.f64 %p7, %fd177, 0d41E0000000000000;
@%p7 bra BB24_13;
// Callseq Start 3
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd6;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 3 338 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd210, [retval0+0];
}
// Callseq End 3
ld.local.u32 %r41, [%rd7];
BB24_13:
and.b32 %r29, %r41, 3;
cvt.rn.f64.s32 %fd178, %r29;
add.f64 %fd179, %fd210, 0dC002D97C7F3321D2;
fma.rn.f64 %fd211, %fd178, 0d3FF921FB54442D18, %fd179;
abs.f64 %fd180, %fd211;
setp.neu.f64 %p8, %fd180, 0d7FF0000000000000;
@%p8 bra BB24_15;
mov.f64 %fd181, 0d0000000000000000;
.loc 3 338 10
mul.rn.f64 %fd211, %fd211, %fd181;
BB24_15:
add.u64 %rd10, %SP, 0;
.loc 3 338 10
mul.f64 %fd182, %fd211, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r42, %fd182;
cvta.to.local.u64 %rd11, %rd10;
.loc 3 338 10
st.local.u32 [%rd11], %r42;
cvt.rn.f64.s32 %fd183, %r42;
neg.f64 %fd184, %fd183;
fma.rn.f64 %fd186, %fd184, %fd172, %fd211;
fma.rn.f64 %fd188, %fd184, %fd174, %fd186;
fma.rn.f64 %fd212, %fd184, %fd176, %fd188;
abs.f64 %fd190, %fd211;
setp.leu.f64 %p9, %fd190, 0d41E0000000000000;
@%p9 bra BB24_17;
// Callseq Start 4
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd211;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 3 338 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd212, [retval0+0];
}
// Callseq End 4
ld.local.u32 %r42, [%rd11];
BB24_17:
add.s32 %r11, %r42, 1;
shl.b32 %r30, %r11, 3;
and.b32 %r31, %r30, 8;
and.b32 %r32, %r11, 1;
setp.eq.b32 %p10, %r32, 1;
not.pred %p11, %p10;
selp.f64 %fd191, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p11;
mul.wide.u32 %rd14, %r31, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 3 338 10
ld.const.f64 %fd192, [%rd16+8];
mul.rn.f64 %fd17, %fd212, %fd212;
fma.rn.f64 %fd193, %fd191, %fd17, %fd192;
ld.const.f64 %fd194, [%rd16+16];
fma.rn.f64 %fd195, %fd193, %fd17, %fd194;
ld.const.f64 %fd196, [%rd16+24];
fma.rn.f64 %fd197, %fd195, %fd17, %fd196;
ld.const.f64 %fd198, [%rd16+32];
fma.rn.f64 %fd199, %fd197, %fd17, %fd198;
ld.const.f64 %fd200, [%rd16+40];
fma.rn.f64 %fd201, %fd199, %fd17, %fd200;
ld.const.f64 %fd202, [%rd16+48];
fma.rn.f64 %fd18, %fd201, %fd17, %fd202;
fma.rn.f64 %fd213, %fd18, %fd212, %fd212;
@%p11 bra BB24_19;
fma.rn.f64 %fd213, %fd18, %fd17, %fd128;
BB24_19:
and.b32 %r33, %r11, 2;
setp.eq.s32 %p12, %r33, 0;
@%p12 bra BB24_21;
mov.f64 %fd204, 0d0000000000000000;
mov.f64 %fd205, 0dBFF0000000000000;
.loc 3 338 10
fma.rn.f64 %fd213, %fd213, %fd205, %fd204;
BB24_21:
mul.f64 %fd214, %fd7, %fd213;
BB24_22:
neg.f64 %fd206, %fd214;
setp.lt.f64 %p13, %fd1, 0d0000000000000000;
selp.f64 %fd207, %fd206, %fd214, %p13;
mul.f64 %fd208, %fd1, 0d3FE0000000000000;
setp.lt.f64 %p14, %fd2, 0d39B4484BFEEBC2A0;
selp.f64 %fd209, %fd208, %fd207, %p14;
.loc 2 31 42
mad.lo.s32 %r34, %r39, %r16, %r40;
mul.wide.s32 %rd18, %r34, 8;
add.s64 %rd19, %rd17, %rd18;
st.global.f64 [%rd19], %fd209;
.loc 2 31 22
mov.u32 %r36, %nctaid.y;
mad.lo.s32 %r40, %r36, %r22, %r40;
.loc 2 31 1
setp.lt.s32 %p15, %r40, %r14;
@%p15 bra BB24_3;
BB24_23:
.loc 2 31 22
mov.u32 %r37, %nctaid.x;
mad.lo.s32 %r39, %r37, %r18, %r39;
.loc 2 31 1
setp.lt.s32 %p16, %r39, %r15;
@%p16 bra BB24_2;
BB24_24:
.loc 2 31 2
ret;
}
.visible .entry map_lgamma_double(
.param .u32 map_lgamma_double_param_0,
.param .u32 map_lgamma_double_param_1,
.param .u64 map_lgamma_double_param_2,
.param .u32 map_lgamma_double_param_3,
.param .u64 map_lgamma_double_param_4,
.param .u32 map_lgamma_double_param_5
)
{
.reg .pred %p<49>;
.reg .f32 %f<21>;
.reg .s32 %r<130>;
.reg .s64 %rd<14>;
.reg .f64 %fd<452>;
ld.param.u32 %r49, [map_lgamma_double_param_0];
ld.param.u32 %r50, [map_lgamma_double_param_1];
ld.param.u64 %rd1, [map_lgamma_double_param_2];
ld.param.u32 %r51, [map_lgamma_double_param_3];
ld.param.u64 %rd2, [map_lgamma_double_param_4];
ld.param.u32 %r52, [map_lgamma_double_param_5];
.loc 2 32 1
mov.u32 %r53, %ntid.x;
mov.u32 %r54, %ctaid.x;
mov.u32 %r55, %tid.x;
mad.lo.s32 %r112, %r53, %r54, %r55;
.loc 2 32 1
setp.ge.s32 %p1, %r112, %r50;
@%p1 bra BB25_72;
.loc 2 32 1
mov.u32 %r56, %ntid.y;
.loc 2 32 22
mov.u32 %r57, %nctaid.y;
mul.lo.s32 %r2, %r57, %r56;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd11, %rd1;
BB25_2:
.loc 2 32 1
mov.u32 %r58, %ctaid.y;
mov.u32 %r60, %tid.y;
mad.lo.s32 %r113, %r56, %r58, %r60;
.loc 2 32 1
setp.ge.s32 %p2, %r113, %r49;
@%p2 bra BB25_71;
BB25_3:
.loc 2 32 1
mad.lo.s32 %r65, %r112, %r52, %r113;
mul.wide.s32 %rd4, %r65, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 2 32 1
ld.global.f64 %fd1, [%rd5];
.loc 3 423 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d7FF0000000000000;
@%p3 bra BB25_69;
setp.ltu.f64 %p4, %fd2, 0d4008000000000000;
@%p4 bra BB25_20;
setp.ltu.f64 %p5, %fd2, 0d4020000000000000;
@%p5 bra BB25_19;
// inline asm
cvt.rn.f32.f64 %f1,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd52,%f2;
// inline asm
neg.f64 %fd53, %fd2;
mov.f64 %fd54, 0d3FF0000000000000;
.loc 3 423 10
fma.rn.f64 %fd55, %fd53, %fd52, %fd54;
fma.rn.f64 %fd56, %fd55, %fd55, %fd55;
fma.rn.f64 %fd57, %fd56, %fd52, %fd52;
mul.f64 %fd58, %fd57, %fd57;
mov.f64 %fd59, 0d3F4B68B992738FBF;
mov.f64 %fd60, 0dBF5AC321034783F9;
.loc 3 423 10
fma.rn.f64 %fd61, %fd60, %fd58, %fd59;
mov.f64 %fd62, 0dBF4380D01E4F7B8C;
.loc 3 423 10
fma.rn.f64 %fd63, %fd61, %fd58, %fd62;
mov.f64 %fd64, 0d3F4A019FA29F7264;
.loc 3 423 10
fma.rn.f64 %fd65, %fd63, %fd58, %fd64;
mov.f64 %fd66, 0dBF66C16C16B2ACEC;
.loc 3 423 10
fma.rn.f64 %fd67, %fd65, %fd58, %fd66;
mov.f64 %fd68, 0d3FB5555555555545;
.loc 3 423 10
fma.rn.f64 %fd69, %fd67, %fd58, %fd68;
mov.f64 %fd70, 0d3FED67F1C864BEAE;
.loc 3 423 10
fma.rn.f64 %fd3, %fd69, %fd57, %fd70;
{
.reg .b32 %temp;
mov.b64 {%temp, %r114}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r115, %temp}, %fd2;
}
setp.lt.f64 %p6, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p7, %fd2, 0d0000000000000000;
and.pred %p8, %p7, %p6;
@%p8 bra BB25_12;
abs.f64 %fd71, %fd2;
setp.gtu.f64 %p9, %fd71, 0d7FF0000000000000;
@%p9 bra BB25_11;
setp.neu.f64 %p10, %fd2, 0d0000000000000000;
@%p10 bra BB25_10;
mov.f64 %fd445, 0dFFF0000000000000;
bra.uni BB25_18;
BB25_10:
.loc 3 423 10
setp.eq.f64 %p11, %fd2, 0d7FF0000000000000;
selp.f64 %fd445, %fd2, 0dFFF8000000000000, %p11;
bra.uni BB25_18;
BB25_11:
.loc 3 423 10
add.f64 %fd445, %fd2, %fd2;
bra.uni BB25_18;
BB25_12:
.loc 3 423 10
setp.lt.u32 %p12, %r114, 1048576;
@%p12 bra BB25_14;
mov.u32 %r116, -1023;
bra.uni BB25_15;
BB25_14:
.loc 3 423 10
mul.f64 %fd73, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r114}, %fd73;
}
{
.reg .b32 %temp;
mov.b64 {%r115, %temp}, %fd73;
}
mov.u32 %r116, -1077;
BB25_15:
.loc 3 423 10
shr.s32 %r68, %r114, 20;
add.s32 %r117, %r116, %r68;
and.b32 %r69, %r114, -2146435073;
or.b32 %r70, %r69, 1072693248;
mov.b64 %fd444, {%r115, %r70};
setp.lt.u32 %p13, %r70, 1073127583;
@%p13 bra BB25_17;
{
.reg .b32 %temp;
mov.b64 {%r71, %temp}, %fd444;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd444;
}
add.s32 %r73, %r72, -1048576;
mov.b64 %fd444, {%r71, %r73};
add.s32 %r117, %r117, 1;
BB25_17:
add.f64 %fd74, %fd444, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f5,%fd74;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd75,%f6;
// inline asm
neg.f64 %fd77, %fd74;
fma.rn.f64 %fd78, %fd77, %fd75, %fd54;
fma.rn.f64 %fd79, %fd78, %fd78, %fd78;
fma.rn.f64 %fd80, %fd79, %fd75, %fd75;
add.f64 %fd81, %fd444, 0dBFF0000000000000;
mul.f64 %fd82, %fd81, %fd80;
fma.rn.f64 %fd83, %fd81, %fd80, %fd82;
mul.f64 %fd84, %fd83, %fd83;
mov.f64 %fd85, 0d3ED0EE258B7A8B04;
mov.f64 %fd86, 0d3EB1380B3AE80F1E;
.loc 3 423 10
fma.rn.f64 %fd87, %fd86, %fd84, %fd85;
mov.f64 %fd88, 0d3EF3B2669F02676F;
.loc 3 423 10
fma.rn.f64 %fd89, %fd87, %fd84, %fd88;
mov.f64 %fd90, 0d3F1745CBA9AB0956;
.loc 3 423 10
fma.rn.f64 %fd91, %fd89, %fd84, %fd90;
mov.f64 %fd92, 0d3F3C71C72D1B5154;
.loc 3 423 10
fma.rn.f64 %fd93, %fd91, %fd84, %fd92;
mov.f64 %fd94, 0d3F624924923BE72D;
.loc 3 423 10
fma.rn.f64 %fd95, %fd93, %fd84, %fd94;
mov.f64 %fd96, 0d3F8999999999A3C4;
.loc 3 423 10
fma.rn.f64 %fd97, %fd95, %fd84, %fd96;
mov.f64 %fd98, 0d3FB5555555555554;
.loc 3 423 10
fma.rn.f64 %fd99, %fd97, %fd84, %fd98;
sub.f64 %fd100, %fd81, %fd83;
add.f64 %fd101, %fd100, %fd100;
neg.f64 %fd102, %fd83;
fma.rn.f64 %fd103, %fd102, %fd81, %fd101;
mul.f64 %fd104, %fd80, %fd103;
mul.f64 %fd105, %fd99, %fd84;
fma.rn.f64 %fd106, %fd105, %fd83, %fd104;
cvt.rn.f64.s32 %fd107, %r117;
mov.f64 %fd108, 0d3FE62E42FEFA39EF;
.loc 3 423 10
fma.rn.f64 %fd109, %fd107, %fd108, %fd83;
neg.s32 %r74, %r117;
cvt.rn.f64.s32 %fd110, %r74;
fma.rn.f64 %fd111, %fd110, %fd108, %fd109;
sub.f64 %fd112, %fd111, %fd83;
sub.f64 %fd113, %fd106, %fd112;
mov.f64 %fd114, 0d3C7ABC9E3B39803F;
.loc 3 423 10
fma.rn.f64 %fd115, %fd107, %fd114, %fd113;
add.f64 %fd445, %fd109, %fd115;
BB25_18:
{
.reg .b32 %temp;
mov.b64 {%temp, %r75}, %fd445;
}
add.s32 %r76, %r75, -1048576;
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd445;
}
mov.b64 %fd116, {%r77, %r76};
add.f64 %fd117, %fd2, 0dBFE0000000000000;
fma.rn.f64 %fd118, %fd116, %fd117, %fd3;
fma.rn.f64 %fd120, %fd116, %fd117, %fd53;
add.f64 %fd121, %fd120, %fd118;
setp.eq.f64 %p14, %fd2, 0d7FF0000000000000;
selp.f64 %fd451, %fd2, %fd121, %p14;
bra.uni BB25_37;
BB25_19:
.loc 3 423 10
add.f64 %fd122, %fd2, 0dC008000000000000;
mov.f64 %fd123, 0dC1122B7730207EF3;
mov.f64 %fd124, 0dC0AF7040BB18FB05;
.loc 3 423 10
fma.rn.f64 %fd125, %fd124, %fd122, %fd123;
mov.f64 %fd126, 0dC1585A0DB81DE7D0;
.loc 3 423 10
fma.rn.f64 %fd127, %fd125, %fd122, %fd126;
mov.f64 %fd128, 0dC18A992B8BA94677;
.loc 3 423 10
fma.rn.f64 %fd129, %fd127, %fd122, %fd128;
mov.f64 %fd130, 0dC1AAC5CB6957CC20;
.loc 3 423 10
fma.rn.f64 %fd131, %fd129, %fd122, %fd130;
mov.f64 %fd132, 0dC1BC0E2B308774BE;
.loc 3 423 10
fma.rn.f64 %fd133, %fd131, %fd122, %fd132;
mov.f64 %fd134, 0dC1C6BA13DCAE7F67;
.loc 3 423 10
fma.rn.f64 %fd135, %fd133, %fd122, %fd134;
mov.f64 %fd136, 0dC1CCF33B9C3D120C;
.loc 3 423 10
fma.rn.f64 %fd137, %fd135, %fd122, %fd136;
add.f64 %fd138, %fd122, 0dC08FF62E0BE189FE;
mov.f64 %fd139, 0dC10074FACE10C93F;
.loc 3 423 10
fma.rn.f64 %fd140, %fd138, %fd122, %fd139;
mov.f64 %fd141, 0dC151B662F8D75791;
.loc 3 423 10
fma.rn.f64 %fd142, %fd140, %fd122, %fd141;
mov.f64 %fd143, 0dC18EE64AB4D207F7;
.loc 3 423 10
fma.rn.f64 %fd144, %fd142, %fd122, %fd143;
mov.f64 %fd145, 0dC1B9051687C9951A;
.loc 3 423 10
fma.rn.f64 %fd146, %fd144, %fd122, %fd145;
mov.f64 %fd147, 0dC1D2B866BF0B853D;
.loc 3 423 10
fma.rn.f64 %fd148, %fd146, %fd122, %fd147;
mov.f64 %fd149, 0dC1D4E2130E9DC133;
.loc 3 423 10
fma.rn.f64 %fd150, %fd148, %fd122, %fd149;
div.rn.f64 %fd151, %fd137, %fd150;
add.f64 %fd451, %fd151, %fd122;
bra.uni BB25_37;
BB25_20:
.loc 3 423 10
setp.ltu.f64 %p15, %fd2, 0d3FF8000000000000;
@%p15 bra BB25_22;
add.f64 %fd152, %fd2, 0dC000000000000000;
mov.f64 %fd153, 0dBE71FA71D78C0EE2;
mov.f64 %fd154, 0d3E452636124338B3;
.loc 3 423 10
fma.rn.f64 %fd155, %fd154, %fd152, %fd153;
mov.f64 %fd156, 0d3E8D111F31E61306;
.loc 3 423 10
fma.rn.f64 %fd157, %fd155, %fd152, %fd156;
mov.f64 %fd158, 0dBEA0502BBE1B2706;
.loc 3 423 10
fma.rn.f64 %fd159, %fd157, %fd152, %fd158;
mov.f64 %fd160, 0d3EB06850B2970292;
.loc 3 423 10
fma.rn.f64 %fd161, %fd159, %fd152, %fd160;
mov.f64 %fd162, 0dBEC108474875033D;
.loc 3 423 10
fma.rn.f64 %fd163, %fd161, %fd152, %fd162;
mov.f64 %fd164, 0d3ED24ACCC62909DC;
.loc 3 423 10
fma.rn.f64 %fd165, %fd163, %fd152, %fd164;
mov.f64 %fd166, 0dBEE3CB25209E63BE;
.loc 3 423 10
fma.rn.f64 %fd167, %fd165, %fd152, %fd166;
mov.f64 %fd168, 0d3EF581CBBC8CDC7B;
.loc 3 423 10
fma.rn.f64 %fd169, %fd167, %fd152, %fd168;
mov.f64 %fd170, 0dBF078E04B85C7597;
.loc 3 423 10
fma.rn.f64 %fd171, %fd169, %fd152, %fd170;
mov.f64 %fd172, 0d3F1A12730CF45051;
.loc 3 423 10
fma.rn.f64 %fd173, %fd171, %fd152, %fd172;
mov.f64 %fd174, 0dBF2D3FD354062012;
.loc 3 423 10
fma.rn.f64 %fd175, %fd173, %fd152, %fd174;
mov.f64 %fd176, 0d3F40B36B0B4DE323;
.loc 3 423 10
fma.rn.f64 %fd177, %fd175, %fd152, %fd176;
mov.f64 %fd178, 0dBF538AC5C6D0317A;
.loc 3 423 10
fma.rn.f64 %fd179, %fd177, %fd152, %fd178;
mov.f64 %fd180, 0d3F67ADD6EAAB19FC;
.loc 3 423 10
fma.rn.f64 %fd181, %fd179, %fd152, %fd180;
mov.f64 %fd182, 0dBF7E404FC20E4D5B;
.loc 3 423 10
fma.rn.f64 %fd183, %fd181, %fd152, %fd182;
mov.f64 %fd184, 0d3F951322AC7DA390;
.loc 3 423 10
fma.rn.f64 %fd185, %fd183, %fd152, %fd184;
mov.f64 %fd186, 0dBFB13E001A5578A3;
.loc 3 423 10
fma.rn.f64 %fd187, %fd185, %fd152, %fd186;
mov.f64 %fd188, 0d3FD4A34CC4A60FA3;
.loc 3 423 10
fma.rn.f64 %fd189, %fd187, %fd152, %fd188;
mov.f64 %fd190, 0d3FDB0EE6072093CF;
.loc 3 423 10
fma.rn.f64 %fd191, %fd189, %fd152, %fd190;
mul.f64 %fd451, %fd191, %fd152;
bra.uni BB25_37;
BB25_22:
.loc 3 423 10
setp.ltu.f64 %p16, %fd2, 0d3FE6666666666666;
@%p16 bra BB25_24;
mov.f64 %fd192, 0d3FF0000000000000;
.loc 3 423 10
sub.f64 %fd193, %fd192, %fd2;
mov.f64 %fd194, 0d3FA3EB504359EB88;
mov.f64 %fd195, 0d3F881F6D2A4C4310;
.loc 3 423 10
fma.rn.f64 %fd196, %fd195, %fd193, %fd194;
mov.f64 %fd197, 0d3FAE35D8DEB06317;
.loc 3 423 10
fma.rn.f64 %fd198, %fd196, %fd193, %fd197;
mov.f64 %fd199, 0d3FAED469A8B6ECCE;
.loc 3 423 10
fma.rn.f64 %fd200, %fd198, %fd193, %fd199;
mov.f64 %fd201, 0d3FACC1B1C357BEFE;
.loc 3 423 10
fma.rn.f64 %fd202, %fd200, %fd193, %fd201;
mov.f64 %fd203, 0d3FAD7154DB67F79F;
.loc 3 423 10
fma.rn.f64 %fd204, %fd202, %fd193, %fd203;
mov.f64 %fd205, 0d3FAFCC622CF2F7BB;
.loc 3 423 10
fma.rn.f64 %fd206, %fd204, %fd193, %fd205;
mov.f64 %fd207, 0d3FB11747A4D1CC43;
.loc 3 423 10
fma.rn.f64 %fd208, %fd206, %fd193, %fd207;
mov.f64 %fd209, 0d3FB24CE16A21B8AC;
.loc 3 423 10
fma.rn.f64 %fd210, %fd208, %fd193, %fd209;
mov.f64 %fd211, 0d3FB3B1C21A7BCB00;
.loc 3 423 10
fma.rn.f64 %fd212, %fd210, %fd193, %fd211;
mov.f64 %fd213, 0d3FB556723452ED57;
.loc 3 423 10
fma.rn.f64 %fd214, %fd212, %fd193, %fd213;
mov.f64 %fd215, 0d3FB748C00891544F;
.loc 3 423 10
fma.rn.f64 %fd216, %fd214, %fd193, %fd215;
mov.f64 %fd217, 0d3FB9A0207808CF40;
.loc 3 423 10
fma.rn.f64 %fd218, %fd216, %fd193, %fd217;
mov.f64 %fd219, 0d3FBC80673B8AE26B;
.loc 3 423 10
fma.rn.f64 %fd220, %fd218, %fd193, %fd219;
mov.f64 %fd221, 0d3FC010B364B7E555;
.loc 3 423 10
fma.rn.f64 %fd222, %fd220, %fd193, %fd221;
mov.f64 %fd223, 0d3FC2703A1D239658;
.loc 3 423 10
fma.rn.f64 %fd224, %fd222, %fd193, %fd223;
mov.f64 %fd225, 0d3FC5B40CB1137E6E;
.loc 3 423 10
fma.rn.f64 %fd226, %fd224, %fd193, %fd225;
mov.f64 %fd227, 0d3FCA8B9C17AC4F03;
.loc 3 423 10
fma.rn.f64 %fd228, %fd226, %fd193, %fd227;
mov.f64 %fd229, 0d3FD151322AC7CB52;
.loc 3 423 10
fma.rn.f64 %fd230, %fd228, %fd193, %fd229;
mov.f64 %fd231, 0d3FD9A4D55BEAB1D4;
.loc 3 423 10
fma.rn.f64 %fd232, %fd230, %fd193, %fd231;
mov.f64 %fd233, 0d3FEA51A6625307D6;
.loc 3 423 10
fma.rn.f64 %fd234, %fd232, %fd193, %fd233;
mov.f64 %fd235, 0d3FE2788CFC6FB619;
.loc 3 423 10
fma.rn.f64 %fd236, %fd234, %fd193, %fd235;
mul.f64 %fd451, %fd236, %fd193;
bra.uni BB25_37;
BB25_24:
mov.f64 %fd237, 0d3EA7B77CEB0625E8;
mov.f64 %fd238, 0dBE7844988BFE6590;
.loc 3 423 10
fma.rn.f64 %fd239, %fd238, %fd2, %fd237;
mov.f64 %fd240, 0dBE998C69C8710CC4;
.loc 3 423 10
fma.rn.f64 %fd241, %fd239, %fd2, %fd240;
mov.f64 %fd242, 0dBEF6527A5A11CF6E;
.loc 3 423 10
fma.rn.f64 %fd243, %fd241, %fd2, %fd242;
mov.f64 %fd244, 0d3F20EC2950B1B5DE;
.loc 3 423 10
fma.rn.f64 %fd245, %fd243, %fd2, %fd244;
mov.f64 %fd246, 0dBF2C4D80C24BA278;
.loc 3 423 10
fma.rn.f64 %fd247, %fd245, %fd2, %fd246;
mov.f64 %fd248, 0dBF5315B4E8CC0D09;
.loc 3 423 10
fma.rn.f64 %fd249, %fd247, %fd2, %fd248;
mov.f64 %fd250, 0d3F7D917F15D50020;
.loc 3 423 10
fma.rn.f64 %fd251, %fd249, %fd2, %fd250;
mov.f64 %fd252, 0dBF83B4ABB41CB6FA;
.loc 3 423 10
fma.rn.f64 %fd253, %fd251, %fd2, %fd252;
mov.f64 %fd254, 0dBFA59AF1275B7120;
.loc 3 423 10
fma.rn.f64 %fd255, %fd253, %fd2, %fd254;
mov.f64 %fd256, 0d3FC5512321A168A0;
.loc 3 423 10
fma.rn.f64 %fd257, %fd255, %fd2, %fd256;
mov.f64 %fd258, 0dBFA5815E8FDCE74C;
.loc 3 423 10
fma.rn.f64 %fd259, %fd257, %fd2, %fd258;
mov.f64 %fd260, 0dBFE4FCF4026ADD1A;
.loc 3 423 10
fma.rn.f64 %fd261, %fd259, %fd2, %fd260;
mov.f64 %fd262, 0d3FE2788CFC6FB5C8;
.loc 3 423 10
fma.rn.f64 %fd263, %fd261, %fd2, %fd262;
mul.f64 %fd264, %fd263, %fd2;
fma.rn.f64 %fd15, %fd264, %fd2, %fd2;
{
.reg .b32 %temp;
mov.b64 {%temp, %r118}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%r119, %temp}, %fd15;
}
setp.gt.f64 %p17, %fd15, 0d0000000000000000;
setp.lt.f64 %p18, %fd15, 0d7FF0000000000000;
and.pred %p19, %p17, %p18;
@%p19 bra BB25_30;
abs.f64 %fd265, %fd15;
setp.gtu.f64 %p20, %fd265, 0d7FF0000000000000;
@%p20 bra BB25_29;
setp.neu.f64 %p21, %fd15, 0d0000000000000000;
@%p21 bra BB25_28;
mov.f64 %fd447, 0dFFF0000000000000;
bra.uni BB25_36;
BB25_28:
.loc 3 423 10
setp.eq.f64 %p22, %fd15, 0d7FF0000000000000;
selp.f64 %fd447, %fd15, 0dFFF8000000000000, %p22;
bra.uni BB25_36;
BB25_29:
.loc 3 423 10
add.f64 %fd447, %fd15, %fd15;
bra.uni BB25_36;
BB25_30:
.loc 3 423 10
setp.lt.u32 %p23, %r118, 1048576;
@%p23 bra BB25_32;
mov.u32 %r120, -1023;
bra.uni BB25_33;
BB25_32:
.loc 3 423 10
mul.f64 %fd267, %fd15, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r118}, %fd267;
}
{
.reg .b32 %temp;
mov.b64 {%r119, %temp}, %fd267;
}
mov.u32 %r120, -1077;
BB25_33:
.loc 3 423 10
shr.s32 %r80, %r118, 20;
add.s32 %r121, %r120, %r80;
and.b32 %r81, %r118, -2146435073;
or.b32 %r82, %r81, 1072693248;
mov.b64 %fd446, {%r119, %r82};
setp.lt.u32 %p24, %r82, 1073127583;
@%p24 bra BB25_35;
{
.reg .b32 %temp;
mov.b64 {%r83, %temp}, %fd446;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r84}, %fd446;
}
add.s32 %r85, %r84, -1048576;
mov.b64 %fd446, {%r83, %r85};
add.s32 %r121, %r121, 1;
BB25_35:
add.f64 %fd268, %fd446, 0d3FF0000000000000;
mov.f64 %fd270, 0d3FF0000000000000;
.loc 3 423 10
// inline asm
cvt.rn.f32.f64 %f9,%fd268;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd269,%f10;
// inline asm
neg.f64 %fd271, %fd268;
fma.rn.f64 %fd272, %fd271, %fd269, %fd270;
fma.rn.f64 %fd273, %fd272, %fd272, %fd272;
fma.rn.f64 %fd274, %fd273, %fd269, %fd269;
add.f64 %fd275, %fd446, 0dBFF0000000000000;
mul.f64 %fd276, %fd275, %fd274;
fma.rn.f64 %fd277, %fd275, %fd274, %fd276;
mul.f64 %fd278, %fd277, %fd277;
mov.f64 %fd279, 0d3ED0EE258B7A8B04;
mov.f64 %fd280, 0d3EB1380B3AE80F1E;
.loc 3 423 10
fma.rn.f64 %fd281, %fd280, %fd278, %fd279;
mov.f64 %fd282, 0d3EF3B2669F02676F;
.loc 3 423 10
fma.rn.f64 %fd283, %fd281, %fd278, %fd282;
mov.f64 %fd284, 0d3F1745CBA9AB0956;
.loc 3 423 10
fma.rn.f64 %fd285, %fd283, %fd278, %fd284;
mov.f64 %fd286, 0d3F3C71C72D1B5154;
.loc 3 423 10
fma.rn.f64 %fd287, %fd285, %fd278, %fd286;
mov.f64 %fd288, 0d3F624924923BE72D;
.loc 3 423 10
fma.rn.f64 %fd289, %fd287, %fd278, %fd288;
mov.f64 %fd290, 0d3F8999999999A3C4;
.loc 3 423 10
fma.rn.f64 %fd291, %fd289, %fd278, %fd290;
mov.f64 %fd292, 0d3FB5555555555554;
.loc 3 423 10
fma.rn.f64 %fd293, %fd291, %fd278, %fd292;
sub.f64 %fd294, %fd275, %fd277;
add.f64 %fd295, %fd294, %fd294;
neg.f64 %fd296, %fd277;
fma.rn.f64 %fd297, %fd296, %fd275, %fd295;
mul.f64 %fd298, %fd274, %fd297;
mul.f64 %fd299, %fd293, %fd278;
fma.rn.f64 %fd300, %fd299, %fd277, %fd298;
cvt.rn.f64.s32 %fd301, %r121;
mov.f64 %fd302, 0d3FE62E42FEFA39EF;
.loc 3 423 10
fma.rn.f64 %fd303, %fd301, %fd302, %fd277;
neg.s32 %r86, %r121;
cvt.rn.f64.s32 %fd304, %r86;
fma.rn.f64 %fd305, %fd304, %fd302, %fd303;
sub.f64 %fd306, %fd305, %fd277;
sub.f64 %fd307, %fd300, %fd306;
mov.f64 %fd308, 0d3C7ABC9E3B39803F;
.loc 3 423 10
fma.rn.f64 %fd309, %fd301, %fd308, %fd307;
add.f64 %fd447, %fd303, %fd309;
BB25_36:
neg.f64 %fd451, %fd447;
BB25_37:
setp.ge.f64 %p25, %fd1, 0d0000000000000000;
@%p25 bra BB25_70;
cvt.rzi.f64.f64 %fd310, %fd2;
setp.neu.f64 %p26, %fd2, %fd310;
@%p26 bra BB25_40;
mov.f64 %fd451, 0d7FF0000000000000;
bra.uni BB25_70;
BB25_40:
.loc 3 423 10
setp.lt.f64 %p27, %fd2, 0d3BFD83C94FB6D2AC;
{
.reg .b32 %temp;
mov.b64 {%temp, %r126}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r127, %temp}, %fd2;
}
@%p27 bra BB25_57;
add.s32 %r87, %r126, 1048576;
mov.b64 %fd311, {%r127, %r87};
cvt.rni.f64.f64 %fd312, %fd311;
cvt.rzi.s64.f64 %rd6, %fd312;
cvt.u32.u64 %r28, %rd6;
neg.f64 %fd313, %fd312;
mov.f64 %fd314, 0d3FE0000000000000;
.loc 3 423 10
fma.rn.f64 %fd315, %fd313, %fd314, %fd2;
mul.f64 %fd316, %fd315, 0d3CA1A62633145C07;
mov.f64 %fd317, 0d400921FB54442D18;
.loc 3 423 10
fma.rn.f64 %fd318, %fd315, %fd317, %fd316;
shl.b32 %r88, %r28, 3;
and.b32 %r89, %r88, 8;
mul.rn.f64 %fd25, %fd318, %fd318;
and.b64 %rd7, %rd6, 1;
setp.eq.b64 %p28, %rd7, 1;
not.pred %p29, %p28;
selp.f64 %fd319, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p29;
mul.wide.u32 %rd8, %r89, 8;
mov.u64 %rd9, __cudart_sin_cos_coeffs;
add.s64 %rd10, %rd9, %rd8;
.loc 3 423 10
ld.const.f64 %fd320, [%rd10+8];
fma.rn.f64 %fd321, %fd319, %fd25, %fd320;
ld.const.f64 %fd322, [%rd10+16];
fma.rn.f64 %fd323, %fd321, %fd25, %fd322;
ld.const.f64 %fd324, [%rd10+24];
fma.rn.f64 %fd325, %fd323, %fd25, %fd324;
ld.const.f64 %fd326, [%rd10+32];
fma.rn.f64 %fd327, %fd325, %fd25, %fd326;
ld.const.f64 %fd328, [%rd10+40];
fma.rn.f64 %fd329, %fd327, %fd25, %fd328;
ld.const.f64 %fd330, [%rd10+48];
fma.rn.f64 %fd26, %fd329, %fd25, %fd330;
fma.rn.f64 %fd448, %fd26, %fd318, %fd318;
@%p29 bra BB25_43;
mov.f64 %fd331, 0d3FF0000000000000;
.loc 3 423 10
fma.rn.f64 %fd448, %fd26, %fd25, %fd331;
BB25_43:
and.b32 %r90, %r28, 2;
setp.eq.s32 %p30, %r90, 0;
@%p30 bra BB25_45;
mov.f64 %fd332, 0d0000000000000000;
mov.f64 %fd333, 0dBFF0000000000000;
.loc 3 423 10
fma.rn.f64 %fd448, %fd448, %fd333, %fd332;
BB25_45:
abs.f64 %fd334, %fd448;
mul.f64 %fd335, %fd334, %fd2;
div.rn.f64 %fd32, %fd317, %fd335;
{
.reg .b32 %temp;
mov.b64 {%temp, %r122}, %fd32;
}
{
.reg .b32 %temp;
mov.b64 {%r123, %temp}, %fd32;
}
setp.gt.f64 %p31, %fd32, 0d0000000000000000;
setp.lt.f64 %p32, %fd32, 0d7FF0000000000000;
and.pred %p33, %p31, %p32;
@%p33 bra BB25_51;
abs.f64 %fd337, %fd32;
setp.gtu.f64 %p34, %fd337, 0d7FF0000000000000;
@%p34 bra BB25_50;
setp.neu.f64 %p35, %fd32, 0d0000000000000000;
@%p35 bra BB25_49;
mov.f64 %fd338, 0dFFF0000000000000;
.loc 3 423 10
sub.f64 %fd451, %fd338, %fd451;
bra.uni BB25_70;
BB25_49:
.loc 3 423 10
setp.eq.f64 %p36, %fd32, 0d7FF0000000000000;
selp.f64 %fd33, %fd32, 0dFFF8000000000000, %p36;
sub.f64 %fd451, %fd33, %fd451;
bra.uni BB25_70;
BB25_50:
.loc 3 423 10
add.f64 %fd34, %fd32, %fd32;
sub.f64 %fd451, %fd34, %fd451;
bra.uni BB25_70;
BB25_51:
.loc 3 423 10
setp.lt.u32 %p37, %r122, 1048576;
@%p37 bra BB25_53;
mov.u32 %r124, -1023;
bra.uni BB25_54;
BB25_53:
.loc 3 423 10
mul.f64 %fd339, %fd32, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r122}, %fd339;
}
{
.reg .b32 %temp;
mov.b64 {%r123, %temp}, %fd339;
}
mov.u32 %r124, -1077;
BB25_54:
.loc 3 423 10
shr.s32 %r93, %r122, 20;
add.s32 %r125, %r124, %r93;
and.b32 %r94, %r122, -2146435073;
or.b32 %r95, %r94, 1072693248;
mov.b64 %fd449, {%r123, %r95};
setp.lt.u32 %p38, %r95, 1073127583;
@%p38 bra BB25_56;
{
.reg .b32 %temp;
mov.b64 {%r96, %temp}, %fd449;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r97}, %fd449;
}
add.s32 %r98, %r97, -1048576;
mov.b64 %fd449, {%r96, %r98};
add.s32 %r125, %r125, 1;
BB25_56:
add.f64 %fd340, %fd449, 0d3FF0000000000000;
mov.f64 %fd342, 0d3FF0000000000000;
.loc 3 423 10
// inline asm
cvt.rn.f32.f64 %f13,%fd340;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f14,%f13;
// inline asm
// inline asm
cvt.f64.f32 %fd341,%f14;
// inline asm
neg.f64 %fd343, %fd340;
fma.rn.f64 %fd344, %fd343, %fd341, %fd342;
fma.rn.f64 %fd345, %fd344, %fd344, %fd344;
fma.rn.f64 %fd346, %fd345, %fd341, %fd341;
add.f64 %fd347, %fd449, 0dBFF0000000000000;
mul.f64 %fd348, %fd347, %fd346;
fma.rn.f64 %fd349, %fd347, %fd346, %fd348;
mul.f64 %fd350, %fd349, %fd349;
mov.f64 %fd351, 0d3ED0EE258B7A8B04;
mov.f64 %fd352, 0d3EB1380B3AE80F1E;
.loc 3 423 10
fma.rn.f64 %fd353, %fd352, %fd350, %fd351;
mov.f64 %fd354, 0d3EF3B2669F02676F;
.loc 3 423 10
fma.rn.f64 %fd355, %fd353, %fd350, %fd354;
mov.f64 %fd356, 0d3F1745CBA9AB0956;
.loc 3 423 10
fma.rn.f64 %fd357, %fd355, %fd350, %fd356;
mov.f64 %fd358, 0d3F3C71C72D1B5154;
.loc 3 423 10
fma.rn.f64 %fd359, %fd357, %fd350, %fd358;
mov.f64 %fd360, 0d3F624924923BE72D;
.loc 3 423 10
fma.rn.f64 %fd361, %fd359, %fd350, %fd360;
mov.f64 %fd362, 0d3F8999999999A3C4;
.loc 3 423 10
fma.rn.f64 %fd363, %fd361, %fd350, %fd362;
mov.f64 %fd364, 0d3FB5555555555554;
.loc 3 423 10
fma.rn.f64 %fd365, %fd363, %fd350, %fd364;
sub.f64 %fd366, %fd347, %fd349;
add.f64 %fd367, %fd366, %fd366;
neg.f64 %fd368, %fd349;
fma.rn.f64 %fd369, %fd368, %fd347, %fd367;
mul.f64 %fd370, %fd346, %fd369;
mul.f64 %fd371, %fd365, %fd350;
fma.rn.f64 %fd372, %fd371, %fd349, %fd370;
cvt.rn.f64.s32 %fd373, %r125;
mov.f64 %fd374, 0d3FE62E42FEFA39EF;
.loc 3 423 10
fma.rn.f64 %fd375, %fd373, %fd374, %fd349;
neg.s32 %r99, %r125;
cvt.rn.f64.s32 %fd376, %r99;
fma.rn.f64 %fd377, %fd376, %fd374, %fd375;
sub.f64 %fd378, %fd377, %fd349;
sub.f64 %fd379, %fd372, %fd378;
mov.f64 %fd380, 0d3C7ABC9E3B39803F;
.loc 3 423 10
fma.rn.f64 %fd381, %fd373, %fd380, %fd379;
add.f64 %fd38, %fd375, %fd381;
sub.f64 %fd451, %fd38, %fd451;
bra.uni BB25_70;
BB25_57:
.loc 3 423 10
setp.gt.f64 %p39, %fd2, 0d0000000000000000;
setp.lt.f64 %p40, %fd2, 0d7FF0000000000000;
and.pred %p41, %p39, %p40;
@%p41 bra BB25_63;
abs.f64 %fd382, %fd2;
setp.gtu.f64 %p42, %fd382, 0d7FF0000000000000;
@%p42 bra BB25_62;
setp.neu.f64 %p43, %fd2, 0d0000000000000000;
@%p43 bra BB25_61;
mov.f64 %fd383, 0dFFF0000000000000;
.loc 3 423 10
neg.f64 %fd451, %fd383;
bra.uni BB25_70;
BB25_61:
.loc 3 423 10
setp.eq.f64 %p44, %fd2, 0d7FF0000000000000;
selp.f64 %fd41, %fd2, 0dFFF8000000000000, %p44;
neg.f64 %fd451, %fd41;
bra.uni BB25_70;
BB25_62:
.loc 3 423 10
add.f64 %fd42, %fd2, %fd2;
neg.f64 %fd451, %fd42;
bra.uni BB25_70;
BB25_63:
.loc 3 423 10
setp.lt.u32 %p45, %r126, 1048576;
@%p45 bra BB25_65;
mov.u32 %r128, -1023;
bra.uni BB25_66;
BB25_65:
.loc 3 423 10
mul.f64 %fd384, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r126}, %fd384;
}
{
.reg .b32 %temp;
mov.b64 {%r127, %temp}, %fd384;
}
mov.u32 %r128, -1077;
BB25_66:
.loc 3 423 10
shr.s32 %r102, %r126, 20;
add.s32 %r129, %r128, %r102;
and.b32 %r103, %r126, -2146435073;
or.b32 %r104, %r103, 1072693248;
mov.b64 %fd450, {%r127, %r104};
setp.lt.u32 %p46, %r104, 1073127583;
@%p46 bra BB25_68;
{
.reg .b32 %temp;
mov.b64 {%r105, %temp}, %fd450;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r106}, %fd450;
}
add.s32 %r107, %r106, -1048576;
mov.b64 %fd450, {%r105, %r107};
add.s32 %r129, %r129, 1;
BB25_68:
add.f64 %fd385, %fd450, 0d3FF0000000000000;
mov.f64 %fd387, 0d3FF0000000000000;
.loc 3 423 10
// inline asm
cvt.rn.f32.f64 %f17,%fd385;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f18,%f17;
// inline asm
// inline asm
cvt.f64.f32 %fd386,%f18;
// inline asm
neg.f64 %fd388, %fd385;
fma.rn.f64 %fd389, %fd388, %fd386, %fd387;
fma.rn.f64 %fd390, %fd389, %fd389, %fd389;
fma.rn.f64 %fd391, %fd390, %fd386, %fd386;
add.f64 %fd392, %fd450, 0dBFF0000000000000;
mul.f64 %fd393, %fd392, %fd391;
fma.rn.f64 %fd394, %fd392, %fd391, %fd393;
mul.f64 %fd395, %fd394, %fd394;
mov.f64 %fd396, 0d3ED0EE258B7A8B04;
mov.f64 %fd397, 0d3EB1380B3AE80F1E;
.loc 3 423 10
fma.rn.f64 %fd398, %fd397, %fd395, %fd396;
mov.f64 %fd399, 0d3EF3B2669F02676F;
.loc 3 423 10
fma.rn.f64 %fd400, %fd398, %fd395, %fd399;
mov.f64 %fd401, 0d3F1745CBA9AB0956;
.loc 3 423 10
fma.rn.f64 %fd402, %fd400, %fd395, %fd401;
mov.f64 %fd403, 0d3F3C71C72D1B5154;
.loc 3 423 10
fma.rn.f64 %fd404, %fd402, %fd395, %fd403;
mov.f64 %fd405, 0d3F624924923BE72D;
.loc 3 423 10
fma.rn.f64 %fd406, %fd404, %fd395, %fd405;
mov.f64 %fd407, 0d3F8999999999A3C4;
.loc 3 423 10
fma.rn.f64 %fd408, %fd406, %fd395, %fd407;
mov.f64 %fd409, 0d3FB5555555555554;
.loc 3 423 10
fma.rn.f64 %fd410, %fd408, %fd395, %fd409;
sub.f64 %fd411, %fd392, %fd394;
add.f64 %fd412, %fd411, %fd411;
neg.f64 %fd413, %fd394;
fma.rn.f64 %fd414, %fd413, %fd392, %fd412;
mul.f64 %fd415, %fd391, %fd414;
mul.f64 %fd416, %fd410, %fd395;
fma.rn.f64 %fd417, %fd416, %fd394, %fd415;
cvt.rn.f64.s32 %fd418, %r129;
mov.f64 %fd419, 0d3FE62E42FEFA39EF;
.loc 3 423 10
fma.rn.f64 %fd420, %fd418, %fd419, %fd394;
neg.s32 %r108, %r129;
cvt.rn.f64.s32 %fd421, %r108;
fma.rn.f64 %fd422, %fd421, %fd419, %fd420;
sub.f64 %fd423, %fd422, %fd394;
sub.f64 %fd424, %fd417, %fd423;
mov.f64 %fd425, 0d3C7ABC9E3B39803F;
.loc 3 423 10
fma.rn.f64 %fd426, %fd418, %fd425, %fd424;
add.f64 %fd46, %fd420, %fd426;
neg.f64 %fd451, %fd46;
bra.uni BB25_70;
BB25_69:
.loc 3 423 10
add.f64 %fd451, %fd1, %fd1;
BB25_70:
.loc 2 32 42
mad.lo.s32 %r109, %r112, %r51, %r113;
mul.wide.s32 %rd12, %r109, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 32 42
st.global.f64 [%rd13], %fd451;
.loc 2 32 22
add.s32 %r113, %r2, %r113;
.loc 2 32 1
setp.lt.s32 %p47, %r113, %r49;
@%p47 bra BB25_3;
BB25_71:
.loc 2 32 22
mov.u32 %r110, %nctaid.x;
mad.lo.s32 %r112, %r110, %r53, %r112;
.loc 2 32 1
setp.lt.s32 %p48, %r112, %r50;
@%p48 bra BB25_2;
BB25_72:
.loc 2 32 2
ret;
}
.visible .entry map_log10_double(
.param .u32 map_log10_double_param_0,
.param .u32 map_log10_double_param_1,
.param .u64 map_log10_double_param_2,
.param .u32 map_log10_double_param_3,
.param .u64 map_log10_double_param_4,
.param .u32 map_log10_double_param_5
)
{
.reg .pred %p<13>;
.reg .f32 %f<5>;
.reg .s32 %r<55>;
.reg .s64 %rd<9>;
.reg .f64 %fd<59>;
ld.param.u32 %r20, [map_log10_double_param_0];
ld.param.u32 %r21, [map_log10_double_param_1];
ld.param.u64 %rd3, [map_log10_double_param_2];
ld.param.u32 %r22, [map_log10_double_param_3];
ld.param.u64 %rd4, [map_log10_double_param_4];
ld.param.u32 %r23, [map_log10_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 33 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r49, %r24, %r25, %r26;
.loc 2 33 1
setp.ge.s32 %p1, %r49, %r21;
@%p1 bra BB26_18;
.loc 2 33 1
mov.u32 %r27, %ntid.y;
.loc 2 33 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB26_2:
.loc 2 33 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r50, %r27, %r29, %r31;
.loc 2 33 1
setp.ge.s32 %p2, %r50, %r20;
@%p2 bra BB26_17;
.loc 2 33 1
mul.lo.s32 %r4, %r49, %r23;
.loc 2 33 42
mul.lo.s32 %r5, %r49, %r22;
BB26_4:
.loc 2 33 1
add.s32 %r36, %r50, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 33 1
ld.global.f64 %fd1, [%rd6];
.loc 3 233 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd1;
}
setp.gt.f64 %p3, %fd1, 0d0000000000000000;
setp.lt.f64 %p4, %fd1, 0d7FF0000000000000;
and.pred %p5, %p3, %p4;
@%p5 bra BB26_10;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p6, %fd9, 0d7FF0000000000000;
@%p6 bra BB26_9;
setp.neu.f64 %p7, %fd1, 0d0000000000000000;
@%p7 bra BB26_8;
mov.f64 %fd58, 0dFFF0000000000000;
bra.uni BB26_16;
BB26_8:
.loc 3 233 10
setp.eq.f64 %p8, %fd1, 0d7FF0000000000000;
selp.f64 %fd58, %fd1, 0dFFF8000000000000, %p8;
bra.uni BB26_16;
BB26_9:
.loc 3 233 10
add.f64 %fd58, %fd1, %fd1;
bra.uni BB26_16;
BB26_10:
.loc 3 233 10
setp.lt.u32 %p9, %r51, 1048576;
@%p9 bra BB26_12;
mov.u32 %r53, -1023;
bra.uni BB26_13;
BB26_12:
.loc 3 233 10
mul.f64 %fd11, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd11;
}
mov.u32 %r53, -1077;
BB26_13:
.loc 3 233 10
shr.s32 %r39, %r51, 20;
add.s32 %r54, %r53, %r39;
and.b32 %r40, %r51, -2146435073;
or.b32 %r41, %r40, 1072693248;
mov.b64 %fd57, {%r52, %r41};
setp.lt.u32 %p10, %r41, 1073127583;
@%p10 bra BB26_15;
{
.reg .b32 %temp;
mov.b64 {%r42, %temp}, %fd57;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd57;
}
add.s32 %r44, %r43, -1048576;
mov.b64 %fd57, {%r42, %r44};
add.s32 %r54, %r54, 1;
BB26_15:
add.f64 %fd12, %fd57, 0d3FF0000000000000;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 3 233 10
// inline asm
cvt.rn.f32.f64 %f1,%fd12;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd13,%f2;
// inline asm
neg.f64 %fd15, %fd12;
fma.rn.f64 %fd16, %fd15, %fd13, %fd14;
fma.rn.f64 %fd17, %fd16, %fd16, %fd16;
fma.rn.f64 %fd18, %fd17, %fd13, %fd13;
add.f64 %fd19, %fd57, 0dBFF0000000000000;
mul.f64 %fd20, %fd19, %fd18;
fma.rn.f64 %fd21, %fd19, %fd18, %fd20;
mul.f64 %fd22, %fd21, %fd21;
mov.f64 %fd23, 0d3ED0EE258B7A8B04;
mov.f64 %fd24, 0d3EB1380B3AE80F1E;
.loc 3 233 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3EF3B2669F02676F;
.loc 3 233 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0d3F1745CBA9AB0956;
.loc 3 233 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F3C71C72D1B5154;
.loc 3 233 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F624924923BE72D;
.loc 3 233 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F8999999999A3C4;
.loc 3 233 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3FB5555555555554;
.loc 3 233 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
sub.f64 %fd38, %fd19, %fd21;
add.f64 %fd39, %fd38, %fd38;
neg.f64 %fd40, %fd21;
fma.rn.f64 %fd41, %fd40, %fd19, %fd39;
mul.f64 %fd42, %fd18, %fd41;
mul.f64 %fd43, %fd37, %fd22;
fma.rn.f64 %fd44, %fd43, %fd21, %fd42;
cvt.rn.f64.s32 %fd45, %r54;
mov.f64 %fd46, 0d3FE62E42FEFA39EF;
.loc 3 233 10
fma.rn.f64 %fd47, %fd45, %fd46, %fd21;
neg.s32 %r45, %r54;
cvt.rn.f64.s32 %fd48, %r45;
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
sub.f64 %fd50, %fd49, %fd21;
sub.f64 %fd51, %fd44, %fd50;
mov.f64 %fd52, 0d3C7ABC9E3B39803F;
.loc 3 233 10
fma.rn.f64 %fd53, %fd45, %fd52, %fd51;
add.f64 %fd58, %fd47, %fd53;
BB26_16:
mul.f64 %fd54, %fd58, 0d3C695355BAAAFAD3;
mov.f64 %fd55, 0d3FDBCB7B1526E50E;
.loc 3 233 10
fma.rn.f64 %fd56, %fd58, %fd55, %fd54;
.loc 2 33 42
add.s32 %r46, %r50, %r5;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 33 42
st.global.f64 [%rd8], %fd56;
.loc 2 33 22
add.s32 %r50, %r2, %r50;
.loc 2 33 1
setp.lt.s32 %p11, %r50, %r20;
@%p11 bra BB26_4;
BB26_17:
.loc 2 33 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r24, %r49;
.loc 2 33 1
setp.lt.s32 %p12, %r49, %r21;
@%p12 bra BB26_2;
BB26_18:
.loc 2 33 2
ret;
}
.visible .entry map_log1p_double(
.param .u32 map_log1p_double_param_0,
.param .u32 map_log1p_double_param_1,
.param .u64 map_log1p_double_param_2,
.param .u32 map_log1p_double_param_3,
.param .u64 map_log1p_double_param_4,
.param .u32 map_log1p_double_param_5
)
{
.reg .pred %p<16>;
.reg .f32 %f<5>;
.reg .s32 %r<56>;
.reg .s64 %rd<9>;
.reg .f64 %fd<81>;
ld.param.u32 %r20, [map_log1p_double_param_0];
ld.param.u32 %r21, [map_log1p_double_param_1];
ld.param.u64 %rd3, [map_log1p_double_param_2];
ld.param.u32 %r22, [map_log1p_double_param_3];
ld.param.u64 %rd4, [map_log1p_double_param_4];
ld.param.u32 %r23, [map_log1p_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 34 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r50, %r24, %r25, %r26;
.loc 2 34 1
setp.ge.s32 %p1, %r50, %r21;
@%p1 bra BB27_20;
.loc 2 34 1
mov.u32 %r27, %ntid.y;
.loc 2 34 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB27_2:
.loc 2 34 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r51, %r27, %r29, %r31;
.loc 2 34 1
setp.ge.s32 %p2, %r51, %r20;
@%p2 bra BB27_19;
.loc 2 34 1
mul.lo.s32 %r4, %r50, %r23;
.loc 2 34 42
mul.lo.s32 %r5, %r50, %r22;
BB27_4:
.loc 2 34 1
add.s32 %r36, %r51, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 34 1
ld.global.f64 %fd1, [%rd6];
.loc 3 238 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r37}, %fd1;
}
setp.lt.u32 %p3, %r37, 1071994197;
setp.lt.s32 %p4, %r37, -1076258407;
or.pred %p5, %p3, %p4;
@%p5 bra BB27_17;
add.f64 %fd2, %fd1, 0d3FF0000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd2;
}
setp.gt.f64 %p6, %fd2, 0d0000000000000000;
setp.lt.f64 %p7, %fd2, 0d7FF0000000000000;
and.pred %p8, %p6, %p7;
@%p8 bra BB27_11;
abs.f64 %fd11, %fd2;
setp.gtu.f64 %p9, %fd11, 0d7FF0000000000000;
@%p9 bra BB27_10;
setp.neu.f64 %p10, %fd2, 0d0000000000000000;
@%p10 bra BB27_9;
mov.f64 %fd80, 0dFFF0000000000000;
bra.uni BB27_18;
BB27_9:
.loc 3 238 10
setp.eq.f64 %p11, %fd2, 0d7FF0000000000000;
selp.f64 %fd80, %fd2, 0dFFF8000000000000, %p11;
bra.uni BB27_18;
BB27_10:
.loc 3 238 10
add.f64 %fd80, %fd2, %fd2;
bra.uni BB27_18;
BB27_11:
.loc 3 238 10
setp.lt.u32 %p12, %r52, 1048576;
@%p12 bra BB27_13;
mov.u32 %r54, -1023;
bra.uni BB27_14;
BB27_13:
.loc 3 238 10
mul.f64 %fd13, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd13;
}
{
.reg .b32 %temp;
mov.b64 {%r53, %temp}, %fd13;
}
mov.u32 %r54, -1077;
BB27_14:
.loc 3 238 10
shr.s32 %r40, %r52, 20;
add.s32 %r55, %r54, %r40;
and.b32 %r41, %r52, -2146435073;
or.b32 %r42, %r41, 1072693248;
mov.b64 %fd79, {%r53, %r42};
setp.lt.u32 %p13, %r42, 1073127583;
@%p13 bra BB27_16;
{
.reg .b32 %temp;
mov.b64 {%r43, %temp}, %fd79;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd79;
}
add.s32 %r45, %r44, -1048576;
mov.b64 %fd79, {%r43, %r45};
add.s32 %r55, %r55, 1;
BB27_16:
add.f64 %fd14, %fd79, 0d3FF0000000000000;
mov.f64 %fd16, 0d3FF0000000000000;
.loc 3 238 10
// inline asm
cvt.rn.f32.f64 %f1,%fd14;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd15,%f2;
// inline asm
neg.f64 %fd17, %fd14;
fma.rn.f64 %fd18, %fd17, %fd15, %fd16;
fma.rn.f64 %fd19, %fd18, %fd18, %fd18;
fma.rn.f64 %fd20, %fd19, %fd15, %fd15;
add.f64 %fd21, %fd79, 0dBFF0000000000000;
mul.f64 %fd22, %fd21, %fd20;
fma.rn.f64 %fd23, %fd21, %fd20, %fd22;
mul.f64 %fd24, %fd23, %fd23;
mov.f64 %fd25, 0d3ED0EE258B7A8B04;
mov.f64 %fd26, 0d3EB1380B3AE80F1E;
.loc 3 238 10
fma.rn.f64 %fd27, %fd26, %fd24, %fd25;
mov.f64 %fd28, 0d3EF3B2669F02676F;
.loc 3 238 10
fma.rn.f64 %fd29, %fd27, %fd24, %fd28;
mov.f64 %fd30, 0d3F1745CBA9AB0956;
.loc 3 238 10
fma.rn.f64 %fd31, %fd29, %fd24, %fd30;
mov.f64 %fd32, 0d3F3C71C72D1B5154;
.loc 3 238 10
fma.rn.f64 %fd33, %fd31, %fd24, %fd32;
mov.f64 %fd34, 0d3F624924923BE72D;
.loc 3 238 10
fma.rn.f64 %fd35, %fd33, %fd24, %fd34;
mov.f64 %fd36, 0d3F8999999999A3C4;
.loc 3 238 10
fma.rn.f64 %fd37, %fd35, %fd24, %fd36;
mov.f64 %fd38, 0d3FB5555555555554;
.loc 3 238 10
fma.rn.f64 %fd39, %fd37, %fd24, %fd38;
sub.f64 %fd40, %fd21, %fd23;
add.f64 %fd41, %fd40, %fd40;
neg.f64 %fd42, %fd23;
fma.rn.f64 %fd43, %fd42, %fd21, %fd41;
mul.f64 %fd44, %fd20, %fd43;
mul.f64 %fd45, %fd39, %fd24;
fma.rn.f64 %fd46, %fd45, %fd23, %fd44;
cvt.rn.f64.s32 %fd47, %r55;
mov.f64 %fd48, 0d3FE62E42FEFA39EF;
.loc 3 238 10
fma.rn.f64 %fd49, %fd47, %fd48, %fd23;
neg.s32 %r46, %r55;
cvt.rn.f64.s32 %fd50, %r46;
fma.rn.f64 %fd51, %fd50, %fd48, %fd49;
sub.f64 %fd52, %fd51, %fd23;
sub.f64 %fd53, %fd46, %fd52;
mov.f64 %fd54, 0d3C7ABC9E3B39803F;
.loc 3 238 10
fma.rn.f64 %fd55, %fd47, %fd54, %fd53;
add.f64 %fd80, %fd49, %fd55;
bra.uni BB27_18;
BB27_17:
.loc 3 238 10
add.f64 %fd56, %fd1, 0d4000000000000000;
div.rn.f64 %fd57, %fd1, %fd56;
neg.f64 %fd58, %fd1;
mul.f64 %fd59, %fd57, %fd58;
add.f64 %fd60, %fd1, %fd59;
mul.f64 %fd61, %fd60, %fd60;
mov.f64 %fd62, 0d3ED087FFCEB2DC44;
mov.f64 %fd63, 0d3EB372FB2FBE14B5;
.loc 3 238 10
fma.rn.f64 %fd64, %fd63, %fd61, %fd62;
mov.f64 %fd65, 0d3EF3B9FF890F468C;
.loc 3 238 10
fma.rn.f64 %fd66, %fd64, %fd61, %fd65;
mov.f64 %fd67, 0d3F17457EFD51BAF8;
.loc 3 238 10
fma.rn.f64 %fd68, %fd66, %fd61, %fd67;
mov.f64 %fd69, 0d3F3C71C8DE3CE825;
.loc 3 238 10
fma.rn.f64 %fd70, %fd68, %fd61, %fd69;
mov.f64 %fd71, 0d3F6249248FA4661F;
.loc 3 238 10
fma.rn.f64 %fd72, %fd70, %fd61, %fd71;
mov.f64 %fd73, 0d3F899999999D70C4;
.loc 3 238 10
fma.rn.f64 %fd74, %fd72, %fd61, %fd73;
mov.f64 %fd75, 0d3FB5555555555462;
.loc 3 238 10
fma.rn.f64 %fd76, %fd74, %fd61, %fd75;
mul.f64 %fd77, %fd76, %fd61;
fma.rn.f64 %fd78, %fd77, %fd60, %fd59;
add.f64 %fd80, %fd78, %fd1;
BB27_18:
.loc 2 34 42
add.s32 %r47, %r51, %r5;
mul.wide.s32 %rd7, %r47, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 34 42
st.global.f64 [%rd8], %fd80;
.loc 2 34 22
add.s32 %r51, %r2, %r51;
.loc 2 34 1
setp.lt.s32 %p14, %r51, %r20;
@%p14 bra BB27_4;
BB27_19:
.loc 2 34 22
mov.u32 %r48, %nctaid.x;
mad.lo.s32 %r50, %r48, %r24, %r50;
.loc 2 34 1
setp.lt.s32 %p15, %r50, %r21;
@%p15 bra BB27_2;
BB27_20:
.loc 2 34 2
ret;
}
.visible .entry map_log2_double(
.param .u32 map_log2_double_param_0,
.param .u32 map_log2_double_param_1,
.param .u64 map_log2_double_param_2,
.param .u32 map_log2_double_param_3,
.param .u64 map_log2_double_param_4,
.param .u32 map_log2_double_param_5
)
{
.reg .pred %p<13>;
.reg .f32 %f<5>;
.reg .s32 %r<55>;
.reg .s64 %rd<9>;
.reg .f64 %fd<59>;
ld.param.u32 %r20, [map_log2_double_param_0];
ld.param.u32 %r21, [map_log2_double_param_1];
ld.param.u64 %rd3, [map_log2_double_param_2];
ld.param.u32 %r22, [map_log2_double_param_3];
ld.param.u64 %rd4, [map_log2_double_param_4];
ld.param.u32 %r23, [map_log2_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 35 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r49, %r24, %r25, %r26;
.loc 2 35 1
setp.ge.s32 %p1, %r49, %r21;
@%p1 bra BB28_18;
.loc 2 35 1
mov.u32 %r27, %ntid.y;
.loc 2 35 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB28_2:
.loc 2 35 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r50, %r27, %r29, %r31;
.loc 2 35 1
setp.ge.s32 %p2, %r50, %r20;
@%p2 bra BB28_17;
.loc 2 35 1
mul.lo.s32 %r4, %r49, %r23;
.loc 2 35 42
mul.lo.s32 %r5, %r49, %r22;
BB28_4:
.loc 2 35 1
add.s32 %r36, %r50, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 35 1
ld.global.f64 %fd1, [%rd6];
.loc 3 228 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd1;
}
setp.gt.f64 %p3, %fd1, 0d0000000000000000;
setp.lt.f64 %p4, %fd1, 0d7FF0000000000000;
and.pred %p5, %p3, %p4;
@%p5 bra BB28_10;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p6, %fd9, 0d7FF0000000000000;
@%p6 bra BB28_9;
setp.neu.f64 %p7, %fd1, 0d0000000000000000;
@%p7 bra BB28_8;
mov.f64 %fd58, 0dFFF0000000000000;
bra.uni BB28_16;
BB28_8:
.loc 3 228 10
setp.eq.f64 %p8, %fd1, 0d7FF0000000000000;
selp.f64 %fd58, %fd1, 0dFFF8000000000000, %p8;
bra.uni BB28_16;
BB28_9:
.loc 3 228 10
add.f64 %fd58, %fd1, %fd1;
bra.uni BB28_16;
BB28_10:
.loc 3 228 10
setp.lt.u32 %p9, %r51, 1048576;
@%p9 bra BB28_12;
mov.u32 %r53, -1023;
bra.uni BB28_13;
BB28_12:
.loc 3 228 10
mul.f64 %fd11, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd11;
}
mov.u32 %r53, -1077;
BB28_13:
.loc 3 228 10
shr.s32 %r39, %r51, 20;
add.s32 %r54, %r53, %r39;
and.b32 %r40, %r51, -2146435073;
or.b32 %r41, %r40, 1072693248;
mov.b64 %fd57, {%r52, %r41};
setp.lt.u32 %p10, %r41, 1073127583;
@%p10 bra BB28_15;
{
.reg .b32 %temp;
mov.b64 {%r42, %temp}, %fd57;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd57;
}
add.s32 %r44, %r43, -1048576;
mov.b64 %fd57, {%r42, %r44};
add.s32 %r54, %r54, 1;
BB28_15:
add.f64 %fd12, %fd57, 0d3FF0000000000000;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 3 228 10
// inline asm
cvt.rn.f32.f64 %f1,%fd12;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd13,%f2;
// inline asm
neg.f64 %fd15, %fd12;
fma.rn.f64 %fd16, %fd15, %fd13, %fd14;
fma.rn.f64 %fd17, %fd16, %fd16, %fd16;
fma.rn.f64 %fd18, %fd17, %fd13, %fd13;
add.f64 %fd19, %fd57, 0dBFF0000000000000;
mul.f64 %fd20, %fd19, %fd18;
fma.rn.f64 %fd21, %fd19, %fd18, %fd20;
mul.f64 %fd22, %fd21, %fd21;
mov.f64 %fd23, 0d3ED0EE258B7A8B04;
mov.f64 %fd24, 0d3EB1380B3AE80F1E;
.loc 3 228 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3EF3B2669F02676F;
.loc 3 228 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0d3F1745CBA9AB0956;
.loc 3 228 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F3C71C72D1B5154;
.loc 3 228 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F624924923BE72D;
.loc 3 228 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F8999999999A3C4;
.loc 3 228 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3FB5555555555554;
.loc 3 228 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
sub.f64 %fd38, %fd19, %fd21;
add.f64 %fd39, %fd38, %fd38;
neg.f64 %fd40, %fd21;
fma.rn.f64 %fd41, %fd40, %fd19, %fd39;
mul.f64 %fd42, %fd18, %fd41;
mul.f64 %fd43, %fd37, %fd22;
fma.rn.f64 %fd44, %fd43, %fd21, %fd42;
cvt.rn.f64.s32 %fd45, %r54;
mov.f64 %fd46, 0d3FE62E42FEFA39EF;
.loc 3 228 10
fma.rn.f64 %fd47, %fd45, %fd46, %fd21;
neg.s32 %r45, %r54;
cvt.rn.f64.s32 %fd48, %r45;
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
sub.f64 %fd50, %fd49, %fd21;
sub.f64 %fd51, %fd44, %fd50;
mov.f64 %fd52, 0d3C7ABC9E3B39803F;
.loc 3 228 10
fma.rn.f64 %fd53, %fd45, %fd52, %fd51;
add.f64 %fd58, %fd47, %fd53;
BB28_16:
mul.f64 %fd54, %fd58, 0d3C7777D0FFDA0D24;
mov.f64 %fd55, 0d3FF71547652B82FE;
.loc 3 228 10
fma.rn.f64 %fd56, %fd58, %fd55, %fd54;
.loc 2 35 42
add.s32 %r46, %r50, %r5;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 35 42
st.global.f64 [%rd8], %fd56;
.loc 2 35 22
add.s32 %r50, %r2, %r50;
.loc 2 35 1
setp.lt.s32 %p11, %r50, %r20;
@%p11 bra BB28_4;
BB28_17:
.loc 2 35 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r24, %r49;
.loc 2 35 1
setp.lt.s32 %p12, %r49, %r21;
@%p12 bra BB28_2;
BB28_18:
.loc 2 35 2
ret;
}
.visible .entry map_logb_double(
.param .u32 map_logb_double_param_0,
.param .u32 map_logb_double_param_1,
.param .u64 map_logb_double_param_2,
.param .u32 map_logb_double_param_3,
.param .u64 map_logb_double_param_4,
.param .u32 map_logb_double_param_5
)
{
.reg .pred %p<9>;
.reg .s32 %r<35>;
.reg .s64 %rd<13>;
.reg .f64 %fd<9>;
ld.param.u32 %r13, [map_logb_double_param_0];
ld.param.u32 %r14, [map_logb_double_param_1];
ld.param.u64 %rd4, [map_logb_double_param_2];
ld.param.u32 %r15, [map_logb_double_param_3];
ld.param.u64 %rd5, [map_logb_double_param_4];
ld.param.u32 %r16, [map_logb_double_param_5];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
.loc 2 36 1
mov.u32 %r1, %ntid.x;
mov.u32 %r17, %ctaid.x;
mov.u32 %r18, %tid.x;
mad.lo.s32 %r33, %r1, %r17, %r18;
.loc 2 36 1
setp.ge.s32 %p1, %r33, %r14;
@%p1 bra BB29_14;
.loc 2 36 1
mov.u32 %r19, %tid.y;
mov.u32 %r20, %ntid.y;
mov.u32 %r21, %ctaid.y;
mad.lo.s32 %r3, %r20, %r21, %r19;
.loc 2 36 22
mov.u32 %r22, %nctaid.x;
mul.lo.s32 %r4, %r22, %r1;
.loc 2 36 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r20;
BB29_2:
.loc 2 36 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB29_13;
.loc 2 36 1
mul.lo.s32 %r7, %r33, %r16;
.loc 2 36 42
mul.lo.s32 %r8, %r33, %r15;
mov.u32 %r34, %r3;
BB29_4:
.loc 2 36 1
mov.u32 %r9, %r34;
add.s32 %r24, %r9, %r7;
mul.wide.s32 %rd6, %r24, 8;
add.s64 %rd7, %rd2, %rd6;
.loc 2 36 1
ld.global.f64 %fd1, [%rd7];
.loc 3 512 10
abs.f64 %fd8, %fd1;
setp.gtu.f64 %p3, %fd8, 0d7FF0000000000000;
@%p3 bra BB29_11;
setp.eq.f64 %p4, %fd8, 0d7FF0000000000000;
@%p4 bra BB29_12;
setp.neu.f64 %p5, %fd8, 0d0000000000000000;
@%p5 bra BB29_8;
mov.f64 %fd8, 0dFFF0000000000000;
bra.uni BB29_12;
BB29_8:
.loc 3 512 10
{
.reg .b32 %temp;
mov.b64 {%r25, %temp}, %fd8;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r10}, %fd8;
}
cvt.u64.u32 %rd8, %r10;
shl.b64 %rd9, %rd8, 32;
cvt.u64.u32 %rd10, %r25;
or.b64 %rd3, %rd9, %rd10;
setp.ltu.f64 %p6, %fd8, 0d0010000000000000;
@%p6 bra BB29_10;
shr.u32 %r26, %r10, 20;
and.b32 %r27, %r26, 2047;
add.s32 %r28, %r27, -1023;
cvt.rn.f64.s32 %fd8, %r28;
bra.uni BB29_12;
BB29_10:
.loc 3 512 10
clz.b64 %r29, %rd3;
mov.u32 %r30, -1011;
.loc 3 512 10
sub.s32 %r31, %r30, %r29;
cvt.rn.f64.s32 %fd8, %r31;
bra.uni BB29_12;
BB29_11:
.loc 3 512 10
add.f64 %fd8, %fd1, %fd1;
BB29_12:
.loc 2 36 42
add.s32 %r32, %r9, %r8;
mul.wide.s32 %rd11, %r32, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 2 36 42
st.global.f64 [%rd12], %fd8;
.loc 2 36 22
add.s32 %r11, %r5, %r9;
.loc 2 36 1
setp.lt.s32 %p7, %r11, %r13;
mov.u32 %r34, %r11;
@%p7 bra BB29_4;
BB29_13:
.loc 2 36 22
add.s32 %r33, %r4, %r33;
.loc 2 36 1
setp.lt.s32 %p8, %r33, %r14;
@%p8 bra BB29_2;
BB29_14:
.loc 2 36 2
ret;
}
.visible .entry map_log_double(
.param .u32 map_log_double_param_0,
.param .u32 map_log_double_param_1,
.param .u64 map_log_double_param_2,
.param .u32 map_log_double_param_3,
.param .u64 map_log_double_param_4,
.param .u32 map_log_double_param_5
)
{
.reg .pred %p<13>;
.reg .f32 %f<5>;
.reg .s32 %r<55>;
.reg .s64 %rd<9>;
.reg .f64 %fd<56>;
ld.param.u32 %r20, [map_log_double_param_0];
ld.param.u32 %r21, [map_log_double_param_1];
ld.param.u64 %rd3, [map_log_double_param_2];
ld.param.u32 %r22, [map_log_double_param_3];
ld.param.u64 %rd4, [map_log_double_param_4];
ld.param.u32 %r23, [map_log_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 37 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r49, %r24, %r25, %r26;
.loc 2 37 1
setp.ge.s32 %p1, %r49, %r21;
@%p1 bra BB30_18;
.loc 2 37 1
mov.u32 %r27, %ntid.y;
.loc 2 37 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
BB30_2:
.loc 2 37 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r50, %r27, %r29, %r31;
.loc 2 37 1
setp.ge.s32 %p2, %r50, %r20;
@%p2 bra BB30_17;
.loc 2 37 1
mul.lo.s32 %r4, %r49, %r23;
.loc 2 37 42
mul.lo.s32 %r5, %r49, %r22;
BB30_4:
.loc 2 37 1
add.s32 %r36, %r50, %r4;
mul.wide.s32 %rd5, %r36, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 37 1
ld.global.f64 %fd1, [%rd6];
.loc 3 223 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd1;
}
setp.gt.f64 %p3, %fd1, 0d0000000000000000;
setp.lt.f64 %p4, %fd1, 0d7FF0000000000000;
and.pred %p5, %p3, %p4;
@%p5 bra BB30_10;
abs.f64 %fd9, %fd1;
setp.gtu.f64 %p6, %fd9, 0d7FF0000000000000;
@%p6 bra BB30_9;
setp.neu.f64 %p7, %fd1, 0d0000000000000000;
@%p7 bra BB30_8;
mov.f64 %fd55, 0dFFF0000000000000;
bra.uni BB30_16;
BB30_8:
.loc 3 223 10
setp.eq.f64 %p8, %fd1, 0d7FF0000000000000;
selp.f64 %fd55, %fd1, 0dFFF8000000000000, %p8;
bra.uni BB30_16;
BB30_9:
.loc 3 223 10
add.f64 %fd55, %fd1, %fd1;
bra.uni BB30_16;
BB30_10:
.loc 3 223 10
setp.lt.u32 %p9, %r51, 1048576;
@%p9 bra BB30_12;
mov.u32 %r53, -1023;
bra.uni BB30_13;
BB30_12:
.loc 3 223 10
mul.f64 %fd11, %fd1, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd11;
}
mov.u32 %r53, -1077;
BB30_13:
.loc 3 223 10
shr.s32 %r39, %r51, 20;
add.s32 %r54, %r53, %r39;
and.b32 %r40, %r51, -2146435073;
or.b32 %r41, %r40, 1072693248;
mov.b64 %fd54, {%r52, %r41};
setp.lt.u32 %p10, %r41, 1073127583;
@%p10 bra BB30_15;
{
.reg .b32 %temp;
mov.b64 {%r42, %temp}, %fd54;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd54;
}
add.s32 %r44, %r43, -1048576;
mov.b64 %fd54, {%r42, %r44};
add.s32 %r54, %r54, 1;
BB30_15:
add.f64 %fd12, %fd54, 0d3FF0000000000000;
mov.f64 %fd14, 0d3FF0000000000000;
.loc 3 223 10
// inline asm
cvt.rn.f32.f64 %f1,%fd12;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd13,%f2;
// inline asm
neg.f64 %fd15, %fd12;
fma.rn.f64 %fd16, %fd15, %fd13, %fd14;
fma.rn.f64 %fd17, %fd16, %fd16, %fd16;
fma.rn.f64 %fd18, %fd17, %fd13, %fd13;
add.f64 %fd19, %fd54, 0dBFF0000000000000;
mul.f64 %fd20, %fd19, %fd18;
fma.rn.f64 %fd21, %fd19, %fd18, %fd20;
mul.f64 %fd22, %fd21, %fd21;
mov.f64 %fd23, 0d3ED0EE258B7A8B04;
mov.f64 %fd24, 0d3EB1380B3AE80F1E;
.loc 3 223 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3EF3B2669F02676F;
.loc 3 223 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0d3F1745CBA9AB0956;
.loc 3 223 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F3C71C72D1B5154;
.loc 3 223 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F624924923BE72D;
.loc 3 223 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F8999999999A3C4;
.loc 3 223 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3FB5555555555554;
.loc 3 223 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
sub.f64 %fd38, %fd19, %fd21;
add.f64 %fd39, %fd38, %fd38;
neg.f64 %fd40, %fd21;
fma.rn.f64 %fd41, %fd40, %fd19, %fd39;
mul.f64 %fd42, %fd18, %fd41;
mul.f64 %fd43, %fd37, %fd22;
fma.rn.f64 %fd44, %fd43, %fd21, %fd42;
cvt.rn.f64.s32 %fd45, %r54;
mov.f64 %fd46, 0d3FE62E42FEFA39EF;
.loc 3 223 10
fma.rn.f64 %fd47, %fd45, %fd46, %fd21;
neg.s32 %r45, %r54;
cvt.rn.f64.s32 %fd48, %r45;
fma.rn.f64 %fd49, %fd48, %fd46, %fd47;
sub.f64 %fd50, %fd49, %fd21;
sub.f64 %fd51, %fd44, %fd50;
mov.f64 %fd52, 0d3C7ABC9E3B39803F;
.loc 3 223 10
fma.rn.f64 %fd53, %fd45, %fd52, %fd51;
add.f64 %fd55, %fd47, %fd53;
BB30_16:
.loc 2 37 42
add.s32 %r46, %r50, %r5;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 37 42
st.global.f64 [%rd8], %fd55;
.loc 2 37 22
add.s32 %r50, %r2, %r50;
.loc 2 37 1
setp.lt.s32 %p11, %r50, %r20;
@%p11 bra BB30_4;
BB30_17:
.loc 2 37 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r24, %r49;
.loc 2 37 1
setp.lt.s32 %p12, %r49, %r21;
@%p12 bra BB30_2;
BB30_18:
.loc 2 37 2
ret;
}
.visible .entry map_nearbyint_double(
.param .u32 map_nearbyint_double_param_0,
.param .u32 map_nearbyint_double_param_1,
.param .u64 map_nearbyint_double_param_2,
.param .u32 map_nearbyint_double_param_3,
.param .u64 map_nearbyint_double_param_4,
.param .u32 map_nearbyint_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_nearbyint_double_param_0];
ld.param.u32 %r13, [map_nearbyint_double_param_1];
ld.param.u64 %rd3, [map_nearbyint_double_param_2];
ld.param.u32 %r14, [map_nearbyint_double_param_3];
ld.param.u64 %rd4, [map_nearbyint_double_param_4];
ld.param.u32 %r15, [map_nearbyint_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 38 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 38 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB31_6;
.loc 2 38 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 38 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 38 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB31_2:
.loc 2 38 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB31_5;
.loc 2 38 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 38 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB31_4:
.loc 2 38 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 38 1
ld.global.f64 %fd1, [%rd6];
.loc 3 86 10
cvt.rni.f64.f64 %fd2, %fd1;
.loc 2 38 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 38 42
st.global.f64 [%rd8], %fd2;
.loc 2 38 22
add.s32 %r10, %r5, %r9;
.loc 2 38 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB31_4;
BB31_5:
.loc 2 38 22
add.s32 %r25, %r4, %r25;
.loc 2 38 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB31_2;
BB31_6:
.loc 2 38 2
ret;
}
.visible .entry map_normcdf_double(
.param .u32 map_normcdf_double_param_0,
.param .u32 map_normcdf_double_param_1,
.param .u64 map_normcdf_double_param_2,
.param .u32 map_normcdf_double_param_3,
.param .u64 map_normcdf_double_param_4,
.param .u32 map_normcdf_double_param_5
)
{
.reg .pred %p<11>;
.reg .f32 %f<9>;
.reg .s32 %r<51>;
.reg .s64 %rd<9>;
.reg .f64 %fd<146>;
ld.param.u32 %r11, [map_normcdf_double_param_0];
ld.param.u32 %r12, [map_normcdf_double_param_1];
ld.param.u64 %rd1, [map_normcdf_double_param_2];
ld.param.u32 %r13, [map_normcdf_double_param_3];
ld.param.u64 %rd2, [map_normcdf_double_param_4];
ld.param.u32 %r14, [map_normcdf_double_param_5];
.loc 2 39 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r48, %r15, %r16, %r17;
.loc 2 39 1
setp.ge.s32 %p1, %r48, %r12;
@%p1 bra BB32_15;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB32_2:
.loc 2 39 1
mov.u32 %r18, %ctaid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r49, %r19, %r18, %r20;
.loc 2 39 1
setp.ge.s32 %p2, %r49, %r11;
@%p2 bra BB32_14;
BB32_3:
.loc 2 39 1
mad.lo.s32 %r25, %r48, %r14, %r49;
mul.wide.s32 %rd4, %r25, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 2 39 1
ld.global.f64 %fd143, [%rd5];
.loc 3 413 10
abs.f64 %fd18, %fd143;
setp.leu.f64 %p3, %fd18, 0d4043400000000000;
@%p3 bra BB32_5;
{
.reg .b32 %temp;
mov.b64 {%temp, %r26}, %fd143;
}
and.b32 %r27, %r26, -2147483648;
mov.f64 %fd19, 0d4043400000000000;
.loc 3 413 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd19;
}
and.b32 %r29, %r28, 2147483647;
or.b32 %r30, %r29, %r27;
{
.reg .b32 %temp;
mov.b64 {%r31, %temp}, %fd19;
}
mov.b64 %fd143, {%r31, %r30};
BB32_5:
mov.f64 %fd20, 0dBFE6A09E667F3BCD;
.loc 3 413 10
mul.rn.f64 %fd4, %fd143, %fd20;
neg.f64 %fd21, %fd4;
fma.rn.f64 %fd22, %fd143, %fd20, %fd21;
mov.f64 %fd23, 0d3C8BDD3413B26456;
.loc 3 413 10
fma.rn.f64 %fd5, %fd143, %fd23, %fd22;
add.rn.f64 %fd6, %fd4, %fd5;
abs.f64 %fd7, %fd6;
setp.gtu.f64 %p4, %fd7, 0d7FF0000000000000;
@%p4 bra BB32_10;
add.f64 %fd28, %fd7, 0dC010000000000000;
mov.f64 %fd29, 0dC010000000000000;
.loc 3 413 10
add.f64 %fd24, %fd7, 0d4010000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd24;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd25,%f2;
// inline asm
neg.f64 %fd30, %fd24;
mov.f64 %fd31, 0d3FF0000000000000;
.loc 3 413 10
fma.rn.f64 %fd32, %fd30, %fd25, %fd31;
fma.rn.f64 %fd33, %fd32, %fd32, %fd32;
fma.rn.f64 %fd34, %fd33, %fd25, %fd25;
mul.f64 %fd35, %fd28, %fd34;
add.rn.f64 %fd36, %fd35, %fd31;
fma.rn.f64 %fd37, %fd29, %fd36, %fd7;
neg.f64 %fd38, %fd35;
fma.rn.f64 %fd39, %fd38, %fd7, %fd37;
fma.rn.f64 %fd40, %fd34, %fd39, %fd35;
mov.f64 %fd41, 0dBE44E1C6FD03D328;
mov.f64 %fd42, 0dBDF8774AD4E0BFD7;
.loc 3 413 10
fma.rn.f64 %fd43, %fd42, %fd40, %fd41;
mov.f64 %fd44, 0dBE4330149F7A56B6;
.loc 3 413 10
fma.rn.f64 %fd45, %fd43, %fd40, %fd44;
mov.f64 %fd46, 0d3E7BEDDED8376273;
.loc 3 413 10
fma.rn.f64 %fd47, %fd45, %fd40, %fd46;
mov.f64 %fd48, 0d3E6F9254C3ABF22B;
.loc 3 413 10
fma.rn.f64 %fd49, %fd47, %fd40, %fd48;
mov.f64 %fd50, 0dBEAB9068C2148CF0;
.loc 3 413 10
fma.rn.f64 %fd51, %fd49, %fd40, %fd50;
mov.f64 %fd52, 0d3E94C6454DB34009;
.loc 3 413 10
fma.rn.f64 %fd53, %fd51, %fd40, %fd52;
mov.f64 %fd54, 0d3ED7F1C378F2311D;
.loc 3 413 10
fma.rn.f64 %fd55, %fd53, %fd40, %fd54;
mov.f64 %fd56, 0dBEE78E051C6D5C58;
.loc 3 413 10
fma.rn.f64 %fd57, %fd55, %fd40, %fd56;
mov.f64 %fd58, 0dBEF995B4EAD14A90;
.loc 3 413 10
fma.rn.f64 %fd59, %fd57, %fd40, %fd58;
mov.f64 %fd60, 0d3F23BE27CF0A29B2;
.loc 3 413 10
fma.rn.f64 %fd61, %fd59, %fd40, %fd60;
mov.f64 %fd62, 0dBF2A1DEF3E81672E;
.loc 3 413 10
fma.rn.f64 %fd63, %fd61, %fd40, %fd62;
mov.f64 %fd64, 0dBF48D4ABE68C1713;
.loc 3 413 10
fma.rn.f64 %fd65, %fd63, %fd40, %fd64;
mov.f64 %fd66, 0d3F749C67210DD6B4;
.loc 3 413 10
fma.rn.f64 %fd67, %fd65, %fd40, %fd66;
mov.f64 %fd68, 0dBF9096238568E357;
.loc 3 413 10
fma.rn.f64 %fd69, %fd67, %fd40, %fd68;
mov.f64 %fd70, 0d3FA3079EDF8C2DC9;
.loc 3 413 10
fma.rn.f64 %fd71, %fd69, %fd40, %fd70;
mov.f64 %fd72, 0dBFB0FB06DFF601FC;
.loc 3 413 10
fma.rn.f64 %fd73, %fd71, %fd40, %fd72;
mov.f64 %fd74, 0d3FB7FEE004DFBCDC;
.loc 3 413 10
fma.rn.f64 %fd75, %fd73, %fd40, %fd74;
mov.f64 %fd76, 0dBFB9DDB23C3DB8C6;
.loc 3 413 10
fma.rn.f64 %fd77, %fd75, %fd40, %fd76;
mov.f64 %fd78, 0d3FB16ECEFCFA5FDA;
.loc 3 413 10
fma.rn.f64 %fd79, %fd77, %fd40, %fd78;
mov.f64 %fd80, 0d3F8F7F5DF66FB6D6;
.loc 3 413 10
fma.rn.f64 %fd81, %fd79, %fd40, %fd80;
mov.f64 %fd82, 0dBFC1DF1AD154A29D;
.loc 3 413 10
fma.rn.f64 %fd83, %fd81, %fd40, %fd82;
mov.f64 %fd84, 0d3FF3BA5916E9FD7F;
.loc 3 413 10
fma.rn.f64 %fd85, %fd83, %fd40, %fd84;
mov.f64 %fd86, 0d4000000000000000;
.loc 3 413 10
fma.rn.f64 %fd26, %fd86, %fd7, %fd31;
// inline asm
cvt.rn.f32.f64 %f5,%fd26;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd27,%f6;
// inline asm
neg.f64 %fd87, %fd26;
fma.rn.f64 %fd88, %fd87, %fd27, %fd31;
fma.rn.f64 %fd89, %fd88, %fd88, %fd88;
fma.rn.f64 %fd90, %fd89, %fd27, %fd27;
mul.f64 %fd91, %fd85, %fd90;
mul.f64 %fd92, %fd91, 0dC000000000000000;
fma.rn.f64 %fd93, %fd7, %fd92, %fd85;
neg.f64 %fd94, %fd91;
add.rn.f64 %fd95, %fd93, %fd94;
fma.rn.f64 %fd8, %fd95, %fd90, %fd91;
neg.f64 %fd96, %fd7;
mul.f64 %fd9, %fd7, %fd96;
mul.f64 %fd97, %fd9, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd98, %fd97;
cvt.rzi.s32.f64 %r5, %fd98;
mov.f64 %fd99, 0dBFE62E42FEFA39EF;
.loc 3 413 10
fma.rn.f64 %fd100, %fd98, %fd99, %fd9;
mov.f64 %fd101, 0dBC7ABC9E3B39803F;
.loc 3 413 10
fma.rn.f64 %fd102, %fd98, %fd101, %fd100;
mov.f64 %fd103, 0d3E928A27E30F5561;
mov.f64 %fd104, 0d3E5AE6449C0686C0;
.loc 3 413 10
fma.rn.f64 %fd105, %fd104, %fd102, %fd103;
mov.f64 %fd106, 0d3EC71DE8E6486D6B;
.loc 3 413 10
fma.rn.f64 %fd107, %fd105, %fd102, %fd106;
mov.f64 %fd108, 0d3EFA019A6B2464C5;
.loc 3 413 10
fma.rn.f64 %fd109, %fd107, %fd102, %fd108;
mov.f64 %fd110, 0d3F2A01A0171064A5;
.loc 3 413 10
fma.rn.f64 %fd111, %fd109, %fd102, %fd110;
mov.f64 %fd112, 0d3F56C16C17F29C8D;
.loc 3 413 10
fma.rn.f64 %fd113, %fd111, %fd102, %fd112;
mov.f64 %fd114, 0d3F8111111111A24E;
.loc 3 413 10
fma.rn.f64 %fd115, %fd113, %fd102, %fd114;
mov.f64 %fd116, 0d3FA555555555211D;
.loc 3 413 10
fma.rn.f64 %fd117, %fd115, %fd102, %fd116;
mov.f64 %fd118, 0d3FC5555555555530;
.loc 3 413 10
fma.rn.f64 %fd119, %fd117, %fd102, %fd118;
mov.f64 %fd120, 0d3FE0000000000005;
.loc 3 413 10
fma.rn.f64 %fd121, %fd119, %fd102, %fd120;
fma.rn.f64 %fd122, %fd121, %fd102, %fd31;
fma.rn.f64 %fd144, %fd122, %fd102, %fd31;
abs.s32 %r32, %r5;
setp.lt.s32 %p5, %r32, 1023;
@%p5 bra BB32_8;
add.s32 %r33, %r5, 2046;
shl.b32 %r34, %r33, 19;
and.b32 %r35, %r34, -1048576;
shl.b32 %r36, %r33, 20;
sub.s32 %r50, %r36, %r35;
mov.u32 %r37, 0;
.loc 3 413 10
mov.b64 %fd123, {%r37, %r35};
mul.f64 %fd144, %fd144, %fd123;
bra.uni BB32_9;
BB32_8:
.loc 3 413 10
shl.b32 %r38, %r5, 20;
add.s32 %r50, %r38, 1072693248;
BB32_9:
mov.f64 %fd140, 0d4000000000000000;
.loc 3 413 10
neg.f64 %fd139, %fd7;
mov.u32 %r39, 0;
.loc 3 413 10
mov.b64 %fd124, {%r39, %r50};
mul.f64 %fd125, %fd144, %fd124;
neg.f64 %fd127, %fd9;
fma.rn.f64 %fd128, %fd139, %fd7, %fd127;
fma.rn.f64 %fd129, %fd125, %fd128, %fd125;
mul.f64 %fd130, %fd8, %fd129;
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd7;
}
setp.gt.s32 %p6, %r40, 1077624832;
selp.f64 %fd131, 0d0000000000000000, %fd130, %p6;
{
.reg .b32 %temp;
mov.b64 {%temp, %r41}, %fd6;
}
setp.lt.s32 %p7, %r41, 0;
sub.f64 %fd133, %fd140, %fd131;
selp.f64 %fd145, %fd133, %fd131, %p7;
bra.uni BB32_11;
BB32_10:
.loc 3 413 10
add.f64 %fd145, %fd6, %fd6;
BB32_11:
setp.geu.f64 %p8, %fd143, 0dBFF0000000000000;
@%p8 bra BB32_13;
mov.f64 %fd142, 0dBFE6A09E667F3BCD;
.loc 3 413 10
mul.rn.f64 %fd141, %fd143, %fd142;
sub.f64 %fd134, %fd141, %fd6;
add.rn.f64 %fd135, %fd134, %fd5;
mul.f64 %fd136, %fd6, 0dC000000000000000;
mul.f64 %fd137, %fd136, %fd145;
fma.rn.f64 %fd145, %fd137, %fd135, %fd145;
BB32_13:
.loc 2 39 1
mov.u32 %r47, %ntid.y;
.loc 2 39 42
mad.lo.s32 %r42, %r48, %r13, %r49;
mul.wide.s32 %rd7, %r42, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 3 413 10
mul.f64 %fd138, %fd145, 0d3FE0000000000000;
.loc 2 39 42
st.global.f64 [%rd8], %fd138;
.loc 2 39 22
mov.u32 %r44, %nctaid.y;
mad.lo.s32 %r49, %r44, %r47, %r49;
.loc 2 39 1
setp.lt.s32 %p9, %r49, %r11;
@%p9 bra BB32_3;
BB32_14:
.loc 2 39 22
mov.u32 %r45, %nctaid.x;
mad.lo.s32 %r48, %r45, %r15, %r48;
.loc 2 39 1
setp.lt.s32 %p10, %r48, %r12;
@%p10 bra BB32_2;
BB32_15:
.loc 2 39 2
ret;
}
.visible .entry map_normcdfinv_double(
.param .u32 map_normcdfinv_double_param_0,
.param .u32 map_normcdfinv_double_param_1,
.param .u64 map_normcdfinv_double_param_2,
.param .u32 map_normcdfinv_double_param_3,
.param .u64 map_normcdfinv_double_param_4,
.param .u32 map_normcdfinv_double_param_5
)
{
.reg .pred %p<19>;
.reg .f32 %f<17>;
.reg .s32 %r<69>;
.reg .s64 %rd<9>;
.reg .f64 %fd<265>;
ld.param.u32 %r20, [map_normcdfinv_double_param_0];
ld.param.u32 %r21, [map_normcdfinv_double_param_1];
ld.param.u64 %rd2, [map_normcdfinv_double_param_2];
ld.param.u32 %r22, [map_normcdfinv_double_param_3];
ld.param.u64 %rd3, [map_normcdfinv_double_param_4];
ld.param.u32 %r23, [map_normcdfinv_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 2 40 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r63, %r24, %r25, %r26;
.loc 2 40 1
setp.ge.s32 %p1, %r63, %r21;
@%p1 bra BB33_23;
.loc 2 40 1
mov.u32 %r27, %ntid.y;
.loc 2 40 22
mov.u32 %r28, %nctaid.y;
mul.lo.s32 %r2, %r28, %r27;
cvta.to.global.u64 %rd6, %rd2;
BB33_2:
.loc 2 40 1
mov.u32 %r29, %ctaid.y;
mov.u32 %r31, %tid.y;
mad.lo.s32 %r64, %r27, %r29, %r31;
.loc 2 40 1
setp.ge.s32 %p2, %r64, %r20;
@%p2 bra BB33_22;
BB33_3:
.loc 2 40 1
mul.lo.s32 %r62, %r63, %r23;
add.s32 %r36, %r64, %r62;
mul.wide.s32 %rd4, %r36, 8;
add.s64 %rd5, %rd1, %rd4;
.loc 2 40 1
ld.global.f64 %fd18, [%rd5];
.loc 3 394 10
add.f64 %fd1, %fd18, %fd18;
neg.f64 %fd2, %fd1;
mov.f64 %fd19, 0d4000000000000000;
.loc 3 394 10
add.rn.f64 %fd3, %fd19, %fd2;
setp.le.f64 %p3, %fd1, 0d3FFFFC0B65AA4E0E;
setp.ge.f64 %p4, %fd1, 0d3F4FA4D2AD8F904D;
and.pred %p5, %p4, %p3;
@%p5 bra BB33_20;
setp.gt.f64 %p6, %fd1, 0d3FF0000000000000;
selp.f64 %fd4, %fd3, %fd1, %p6;
setp.ltu.f64 %p7, %fd4, 0d2B2BFF2EE48E0530;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd4;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd4;
}
@%p7 bra BB33_6;
shr.u32 %r37, %r65, 20;
and.b32 %r38, %r37, 2046;
add.s32 %r39, %r38, -1022;
cvt.rn.f64.s32 %fd24, %r39;
and.b32 %r40, %r65, -2145386497;
add.s32 %r41, %r40, 1071644672;
mov.b64 %fd25, {%r66, %r41};
add.f64 %fd26, %fd25, 0dBFF0000000000000;
add.f64 %fd20, %fd25, 0d3FF0000000000000;
mov.f64 %fd27, 0d3FF0000000000000;
.loc 3 394 10
// inline asm
cvt.rn.f32.f64 %f1,%fd20;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd21,%f2;
// inline asm
neg.f64 %fd28, %fd20;
fma.rn.f64 %fd29, %fd28, %fd21, %fd27;
fma.rn.f64 %fd30, %fd29, %fd29, %fd29;
fma.rn.f64 %fd31, %fd30, %fd21, %fd21;
mul.f64 %fd32, %fd26, %fd31;
mov.f64 %fd33, 0dC000000000000000;
.loc 3 394 10
fma.rn.f64 %fd34, %fd33, %fd32, %fd26;
neg.f64 %fd35, %fd32;
fma.rn.f64 %fd36, %fd35, %fd26, %fd34;
fma.rn.f64 %fd37, %fd36, %fd31, %fd32;
mul.f64 %fd38, %fd37, %fd37;
mov.f64 %fd39, 0d3FA55CF59CDC5D89;
mov.f64 %fd40, 0d3FB5C5C218C775C9;
.loc 3 394 10
fma.rn.f64 %fd41, %fd40, %fd38, %fd39;
mov.f64 %fd42, 0d3FAEFD18CF6EBB9C;
.loc 3 394 10
fma.rn.f64 %fd43, %fd41, %fd38, %fd42;
mov.f64 %fd44, 0d3FB10682EDCB8D1B;
.loc 3 394 10
fma.rn.f64 %fd45, %fd43, %fd38, %fd44;
mov.f64 %fd46, 0d3FB3B1DD3AC7FC96;
.loc 3 394 10
fma.rn.f64 %fd47, %fd45, %fd38, %fd46;
mov.f64 %fd48, 0d3FB745CB459B54A6;
.loc 3 394 10
fma.rn.f64 %fd49, %fd47, %fd38, %fd48;
mov.f64 %fd50, 0d3FBC71C741A0669F;
.loc 3 394 10
fma.rn.f64 %fd51, %fd49, %fd38, %fd50;
mov.f64 %fd52, 0d3FC249249209112E;
.loc 3 394 10
fma.rn.f64 %fd53, %fd51, %fd38, %fd52;
mov.f64 %fd54, 0d3FC99999999A06C1;
.loc 3 394 10
fma.rn.f64 %fd55, %fd53, %fd38, %fd54;
mov.f64 %fd56, 0d3FD5555555555535;
.loc 3 394 10
fma.rn.f64 %fd57, %fd55, %fd38, %fd56;
mul.f64 %fd58, %fd57, %fd38;
fma.rn.f64 %fd59, %fd58, %fd37, %fd37;
add.f64 %fd60, %fd59, %fd59;
mov.f64 %fd61, 0d3FE62E42FEFA39EF;
.loc 3 394 10
fma.rn.f64 %fd62, %fd24, %fd61, %fd60;
neg.f64 %fd22, %fd62;
// inline asm
cvt.rn.f32.f64 %f5, %fd22;
// inline asm
// inline asm
rsqrt.approx.ftz.f32 %f6, %f5;
// inline asm
// inline asm
cvt.f64.f32 %fd23, %f6;
// inline asm
mul.rn.f64 %fd63, %fd23, %fd23;
neg.f64 %fd64, %fd63;
fma.rn.f64 %fd65, %fd22, %fd64, %fd27;
mov.f64 %fd66, 0d3FE0000000000000;
mov.f64 %fd67, 0d3FD8000000000000;
.loc 3 394 10
fma.rn.f64 %fd68, %fd67, %fd65, %fd66;
mul.rn.f64 %fd69, %fd65, %fd23;
fma.rn.f64 %fd70, %fd68, %fd69, %fd23;
mov.f64 %fd71, 0d4000A0E7333839AA;
mov.f64 %fd72, 0d3FEBE9222591AFAB;
.loc 3 394 10
fma.rn.f64 %fd73, %fd72, %fd70, %fd71;
mov.f64 %fd74, 0d4008768CF7E57D5C;
.loc 3 394 10
fma.rn.f64 %fd75, %fd73, %fd70, %fd74;
mov.f64 %fd76, 0d400B77E7E28DA583;
.loc 3 394 10
fma.rn.f64 %fd77, %fd75, %fd70, %fd76;
mov.f64 %fd78, 0d3FF34F26A4F99CF9;
.loc 3 394 10
fma.rn.f64 %fd79, %fd77, %fd70, %fd78;
mov.f64 %fd80, 0d3FC1F674ADB019ED;
.loc 3 394 10
fma.rn.f64 %fd81, %fd79, %fd70, %fd80;
mov.f64 %fd82, 0d3F75DDAE9506431D;
.loc 3 394 10
fma.rn.f64 %fd83, %fd81, %fd70, %fd82;
mov.f64 %fd84, 0d3F0ADA49AA32489C;
.loc 3 394 10
fma.rn.f64 %fd85, %fd83, %fd70, %fd84;
add.f64 %fd86, %fd70, 0d4001E90FF51C2197;
mov.f64 %fd87, 0d40111EA3A7CF3820;
.loc 3 394 10
fma.rn.f64 %fd88, %fd86, %fd70, %fd87;
mov.f64 %fd89, 0d4011A0E4A4749594;
.loc 3 394 10
fma.rn.f64 %fd90, %fd88, %fd70, %fd89;
mov.f64 %fd91, 0d400D4E977D38C14D;
.loc 3 394 10
fma.rn.f64 %fd92, %fd90, %fd70, %fd91;
mov.f64 %fd93, 0d3FF37FD567EC0D5F;
.loc 3 394 10
fma.rn.f64 %fd94, %fd92, %fd70, %fd93;
mov.f64 %fd95, 0d3FC1FB9D7F676033;
.loc 3 394 10
fma.rn.f64 %fd96, %fd94, %fd70, %fd95;
mov.f64 %fd97, 0d3F75DDCDF98946E4;
.loc 3 394 10
fma.rn.f64 %fd98, %fd96, %fd70, %fd97;
mov.f64 %fd99, 0d3F0ADA42D79D8DBB;
.loc 3 394 10
fma.rn.f64 %fd100, %fd98, %fd70, %fd99;
mul.f64 %fd101, %fd100, %fd70;
div.rn.f64 %fd263, %fd85, %fd101;
bra.uni BB33_19;
BB33_6:
.loc 3 394 10
setp.gt.f64 %p8, %fd4, 0d0000000000000000;
setp.lt.f64 %p9, %fd4, 0d7FF0000000000000;
and.pred %p10, %p8, %p9;
@%p10 bra BB33_12;
abs.f64 %fd102, %fd4;
setp.gtu.f64 %p11, %fd102, 0d7FF0000000000000;
@%p11 bra BB33_11;
setp.neu.f64 %p12, %fd4, 0d0000000000000000;
@%p12 bra BB33_10;
mov.f64 %fd262, 0dFFF0000000000000;
bra.uni BB33_18;
BB33_10:
.loc 3 394 10
setp.eq.f64 %p13, %fd4, 0d7FF0000000000000;
selp.f64 %fd262, %fd4, 0dFFF8000000000000, %p13;
bra.uni BB33_18;
BB33_11:
.loc 3 394 10
add.f64 %fd262, %fd4, %fd4;
bra.uni BB33_18;
BB33_12:
.loc 3 394 10
setp.lt.u32 %p14, %r65, 1048576;
@%p14 bra BB33_14;
mov.u32 %r67, -1023;
bra.uni BB33_15;
BB33_14:
.loc 3 394 10
mul.f64 %fd104, %fd4, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd104;
}
{
.reg .b32 %temp;
mov.b64 {%r66, %temp}, %fd104;
}
mov.u32 %r67, -1077;
BB33_15:
.loc 3 394 10
shr.s32 %r44, %r65, 20;
add.s32 %r68, %r67, %r44;
and.b32 %r45, %r65, -2146435073;
or.b32 %r46, %r45, 1072693248;
mov.b64 %fd261, {%r66, %r46};
setp.lt.u32 %p15, %r46, 1073127583;
@%p15 bra BB33_17;
{
.reg .b32 %temp;
mov.b64 {%r47, %temp}, %fd261;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r48}, %fd261;
}
add.s32 %r49, %r48, -1048576;
mov.b64 %fd261, {%r47, %r49};
add.s32 %r68, %r68, 1;
BB33_17:
add.f64 %fd105, %fd261, 0d3FF0000000000000;
mov.f64 %fd107, 0d3FF0000000000000;
.loc 3 394 10
// inline asm
cvt.rn.f32.f64 %f9,%fd105;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd106,%f10;
// inline asm
neg.f64 %fd108, %fd105;
fma.rn.f64 %fd109, %fd108, %fd106, %fd107;
fma.rn.f64 %fd110, %fd109, %fd109, %fd109;
fma.rn.f64 %fd111, %fd110, %fd106, %fd106;
add.f64 %fd112, %fd261, 0dBFF0000000000000;
mul.f64 %fd113, %fd112, %fd111;
fma.rn.f64 %fd114, %fd112, %fd111, %fd113;
mul.f64 %fd115, %fd114, %fd114;
mov.f64 %fd116, 0d3ED0EE258B7A8B04;
mov.f64 %fd117, 0d3EB1380B3AE80F1E;
.loc 3 394 10
fma.rn.f64 %fd118, %fd117, %fd115, %fd116;
mov.f64 %fd119, 0d3EF3B2669F02676F;
.loc 3 394 10
fma.rn.f64 %fd120, %fd118, %fd115, %fd119;
mov.f64 %fd121, 0d3F1745CBA9AB0956;
.loc 3 394 10
fma.rn.f64 %fd122, %fd120, %fd115, %fd121;
mov.f64 %fd123, 0d3F3C71C72D1B5154;
.loc 3 394 10
fma.rn.f64 %fd124, %fd122, %fd115, %fd123;
mov.f64 %fd125, 0d3F624924923BE72D;
.loc 3 394 10
fma.rn.f64 %fd126, %fd124, %fd115, %fd125;
mov.f64 %fd127, 0d3F8999999999A3C4;
.loc 3 394 10
fma.rn.f64 %fd128, %fd126, %fd115, %fd127;
mov.f64 %fd129, 0d3FB5555555555554;
.loc 3 394 10
fma.rn.f64 %fd130, %fd128, %fd115, %fd129;
sub.f64 %fd131, %fd112, %fd114;
add.f64 %fd132, %fd131, %fd131;
neg.f64 %fd133, %fd114;
fma.rn.f64 %fd134, %fd133, %fd112, %fd132;
mul.f64 %fd135, %fd111, %fd134;
mul.f64 %fd136, %fd130, %fd115;
fma.rn.f64 %fd137, %fd136, %fd114, %fd135;
cvt.rn.f64.s32 %fd138, %r68;
mov.f64 %fd139, 0d3FE62E42FEFA39EF;
.loc 3 394 10
fma.rn.f64 %fd140, %fd138, %fd139, %fd114;
neg.s32 %r50, %r68;
cvt.rn.f64.s32 %fd141, %r50;
fma.rn.f64 %fd142, %fd141, %fd139, %fd140;
sub.f64 %fd143, %fd142, %fd114;
sub.f64 %fd144, %fd137, %fd143;
mov.f64 %fd145, 0d3C7ABC9E3B39803F;
.loc 3 394 10
fma.rn.f64 %fd146, %fd138, %fd145, %fd144;
add.f64 %fd262, %fd140, %fd146;
BB33_18:
neg.f64 %fd147, %fd262;
rsqrt.approx.f64 %fd148, %fd147;
mov.f64 %fd149, 0d3FFA2013964E259C;
mov.f64 %fd150, 0d3FE8E2101C71B0BF;
.loc 3 394 10
fma.rn.f64 %fd151, %fd150, %fd148, %fd149;
mov.f64 %fd152, 0d3FDABFE90921BE68;
.loc 3 394 10
fma.rn.f64 %fd153, %fd151, %fd148, %fd152;
mov.f64 %fd154, 0d3F97E41314DE00D4;
.loc 3 394 10
fma.rn.f64 %fd155, %fd153, %fd148, %fd154;
mov.f64 %fd156, 0d3F311BD487102E94;
.loc 3 394 10
fma.rn.f64 %fd157, %fd155, %fd148, %fd156;
add.f64 %fd158, %fd148, 0d3FF59895C30BAA54;
mov.f64 %fd159, 0d3FFAE8E5956A143F;
.loc 3 394 10
fma.rn.f64 %fd160, %fd158, %fd148, %fd159;
mov.f64 %fd161, 0d3FDACCE85FF7383D;
.loc 3 394 10
fma.rn.f64 %fd162, %fd160, %fd148, %fd161;
mov.f64 %fd163, 0d3F97E43B6CAC34FE;
.loc 3 394 10
fma.rn.f64 %fd164, %fd162, %fd148, %fd163;
mov.f64 %fd165, 0d3F311BD08289EB12;
.loc 3 394 10
fma.rn.f64 %fd166, %fd164, %fd148, %fd165;
mul.f64 %fd167, %fd166, %fd148;
div.rn.f64 %fd263, %fd157, %fd167;
BB33_19:
neg.f64 %fd168, %fd263;
selp.f64 %fd264, %fd168, %fd263, %p6;
bra.uni BB33_21;
BB33_20:
.loc 3 394 10
mul.rn.f64 %fd171, %fd3, %fd1;
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd171;
}
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd171;
}
shr.u32 %r53, %r51, 20;
and.b32 %r54, %r53, 2046;
add.s32 %r55, %r54, -1022;
cvt.rn.f64.s32 %fd172, %r55;
and.b32 %r56, %r51, -2145386497;
add.s32 %r57, %r56, 1071644672;
mov.b64 %fd173, {%r52, %r57};
add.f64 %fd174, %fd173, 0dBFF0000000000000;
add.f64 %fd169, %fd173, 0d3FF0000000000000;
mov.f64 %fd175, 0d3FF0000000000000;
.loc 3 394 10
// inline asm
cvt.rn.f32.f64 %f13,%fd169;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f14,%f13;
// inline asm
// inline asm
cvt.f64.f32 %fd170,%f14;
// inline asm
neg.f64 %fd176, %fd169;
fma.rn.f64 %fd177, %fd176, %fd170, %fd175;
fma.rn.f64 %fd178, %fd177, %fd177, %fd177;
fma.rn.f64 %fd179, %fd178, %fd170, %fd170;
mul.f64 %fd180, %fd174, %fd179;
mov.f64 %fd181, 0dC000000000000000;
.loc 3 394 10
fma.rn.f64 %fd182, %fd181, %fd180, %fd174;
neg.f64 %fd183, %fd180;
fma.rn.f64 %fd184, %fd183, %fd174, %fd182;
fma.rn.f64 %fd185, %fd184, %fd179, %fd180;
mul.f64 %fd186, %fd185, %fd185;
mov.f64 %fd187, 0d3FA55CF59CDC5D89;
mov.f64 %fd188, 0d3FB5C5C218C775C9;
.loc 3 394 10
fma.rn.f64 %fd189, %fd188, %fd186, %fd187;
mov.f64 %fd190, 0d3FAEFD18CF6EBB9C;
.loc 3 394 10
fma.rn.f64 %fd191, %fd189, %fd186, %fd190;
mov.f64 %fd192, 0d3FB10682EDCB8D1B;
.loc 3 394 10
fma.rn.f64 %fd193, %fd191, %fd186, %fd192;
mov.f64 %fd194, 0d3FB3B1DD3AC7FC96;
.loc 3 394 10
fma.rn.f64 %fd195, %fd193, %fd186, %fd194;
mov.f64 %fd196, 0d3FB745CB459B54A6;
.loc 3 394 10
fma.rn.f64 %fd197, %fd195, %fd186, %fd196;
mov.f64 %fd198, 0d3FBC71C741A0669F;
.loc 3 394 10
fma.rn.f64 %fd199, %fd197, %fd186, %fd198;
mov.f64 %fd200, 0d3FC249249209112E;
.loc 3 394 10
fma.rn.f64 %fd201, %fd199, %fd186, %fd200;
mov.f64 %fd202, 0d3FC99999999A06C1;
.loc 3 394 10
fma.rn.f64 %fd203, %fd201, %fd186, %fd202;
mov.f64 %fd204, 0d3FD5555555555535;
.loc 3 394 10
fma.rn.f64 %fd205, %fd203, %fd186, %fd204;
mul.f64 %fd206, %fd205, %fd186;
fma.rn.f64 %fd207, %fd206, %fd185, %fd185;
add.f64 %fd208, %fd207, %fd207;
mov.f64 %fd209, 0d3FE62E42FEFA39EF;
.loc 3 394 10
fma.rn.f64 %fd210, %fd172, %fd209, %fd208;
mov.f64 %fd211, 0dC009000000000000;
.loc 3 394 10
sub.f64 %fd212, %fd211, %fd210;
mov.f64 %fd213, 0dBC08DDF93324D327;
mov.f64 %fd214, 0dBBB135D2E746E627;
.loc 3 394 10
fma.rn.f64 %fd215, %fd214, %fd212, %fd213;
mov.f64 %fd216, 0d3C37B83EEF0B7C9F;
.loc 3 394 10
fma.rn.f64 %fd217, %fd215, %fd212, %fd216;
mov.f64 %fd218, 0d3C69BA72CD589B91;
.loc 3 394 10
fma.rn.f64 %fd219, %fd217, %fd212, %fd218;
mov.f64 %fd220, 0dBCA33689090A6B96;
.loc 3 394 10
fma.rn.f64 %fd221, %fd219, %fd212, %fd220;
mov.f64 %fd222, 0d3C782E11898132E0;
.loc 3 394 10
fma.rn.f64 %fd223, %fd221, %fd212, %fd222;
mov.f64 %fd224, 0d3CFDE4ACFD9E26BA;
.loc 3 394 10
fma.rn.f64 %fd225, %fd223, %fd212, %fd224;
mov.f64 %fd226, 0dBD26D33EED66C487;
.loc 3 394 10
fma.rn.f64 %fd227, %fd225, %fd212, %fd226;
mov.f64 %fd228, 0dBD36F2167040D8E2;
.loc 3 394 10
fma.rn.f64 %fd229, %fd227, %fd212, %fd228;
mov.f64 %fd230, 0d3D872A22C2D77E20;
.loc 3 394 10
fma.rn.f64 %fd231, %fd229, %fd212, %fd230;
mov.f64 %fd232, 0dBDAC8859C4E5C0AF;
.loc 3 394 10
fma.rn.f64 %fd233, %fd231, %fd212, %fd232;
mov.f64 %fd234, 0dBDCDC583D118A561;
.loc 3 394 10
fma.rn.f64 %fd235, %fd233, %fd212, %fd234;
mov.f64 %fd236, 0d3E120F47CCF46B3C;
.loc 3 394 10
fma.rn.f64 %fd237, %fd235, %fd212, %fd236;
mov.f64 %fd238, 0dBE31A9E38DC84D60;
.loc 3 394 10
fma.rn.f64 %fd239, %fd237, %fd212, %fd238;
mov.f64 %fd240, 0dBE5F36CD6D3D46A9;
.loc 3 394 10
fma.rn.f64 %fd241, %fd239, %fd212, %fd240;
mov.f64 %fd242, 0d3E9C6B4F5D03B787;
.loc 3 394 10
fma.rn.f64 %fd243, %fd241, %fd212, %fd242;
mov.f64 %fd244, 0dBEB6E8A5434AE8A2;
.loc 3 394 10
fma.rn.f64 %fd245, %fd243, %fd212, %fd244;
mov.f64 %fd246, 0dBEED1D1F7B8736F6;
.loc 3 394 10
fma.rn.f64 %fd247, %fd245, %fd212, %fd246;
mov.f64 %fd248, 0d3F2879C2A212F024;
.loc 3 394 10
fma.rn.f64 %fd249, %fd247, %fd212, %fd248;
mov.f64 %fd250, 0dBF4845769484FCA8;
.loc 3 394 10
fma.rn.f64 %fd251, %fd249, %fd212, %fd250;
mov.f64 %fd252, 0dBF78B6C33114F909;
.loc 3 394 10
fma.rn.f64 %fd253, %fd251, %fd212, %fd252;
mov.f64 %fd254, 0d3FCEBD80D9B13E28;
.loc 3 394 10
fma.rn.f64 %fd255, %fd253, %fd212, %fd254;
mov.f64 %fd256, 0d3FFA755E7C99AE86;
.loc 3 394 10
fma.rn.f64 %fd257, %fd255, %fd212, %fd256;
fma.rn.f64 %fd264, %fd257, %fd2, %fd257;
BB33_21:
.loc 2 40 42
mul.lo.s32 %r61, %r63, %r22;
.loc 3 394 10
mul.f64 %fd258, %fd264, 0dBCA21165F626CDD5;
mov.f64 %fd259, 0dBFF6A09E667F3BCC;
.loc 3 394 10
fma.rn.f64 %fd260, %fd259, %fd264, %fd258;
.loc 2 40 42
add.s32 %r58, %r64, %r61;
mul.wide.s32 %rd7, %r58, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 40 42
st.global.f64 [%rd8], %fd260;
.loc 2 40 22
add.s32 %r64, %r2, %r64;
.loc 2 40 1
setp.lt.s32 %p17, %r64, %r20;
@%p17 bra BB33_3;
BB33_22:
.loc 2 40 22
mov.u32 %r59, %nctaid.x;
mad.lo.s32 %r63, %r59, %r24, %r63;
.loc 2 40 1
setp.lt.s32 %p18, %r63, %r21;
@%p18 bra BB33_2;
BB33_23:
.loc 2 40 2
ret;
}
.visible .entry map_rcbrt_double(
.param .u32 map_rcbrt_double_param_0,
.param .u32 map_rcbrt_double_param_1,
.param .u64 map_rcbrt_double_param_2,
.param .u32 map_rcbrt_double_param_3,
.param .u64 map_rcbrt_double_param_4,
.param .u32 map_rcbrt_double_param_5
)
{
.reg .pred %p<12>;
.reg .f32 %f<7>;
.reg .s32 %r<59>;
.reg .s64 %rd<9>;
.reg .f64 %fd<27>;
ld.param.u32 %r20, [map_rcbrt_double_param_0];
ld.param.u32 %r21, [map_rcbrt_double_param_1];
ld.param.u64 %rd3, [map_rcbrt_double_param_2];
ld.param.u32 %r22, [map_rcbrt_double_param_3];
ld.param.u64 %rd4, [map_rcbrt_double_param_4];
ld.param.u32 %r23, [map_rcbrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 41 1
mov.u32 %r24, %ntid.x;
mov.u32 %r25, %ctaid.x;
mov.u32 %r26, %tid.x;
mad.lo.s32 %r53, %r24, %r25, %r26;
.loc 2 41 1
setp.ge.s32 %p1, %r53, %r21;
@%p1 bra BB34_12;
.loc 2 41 1
mov.u32 %r27, %tid.y;
mov.u32 %r28, %ntid.y;
mov.u32 %r29, %ctaid.y;
mad.lo.s32 %r2, %r28, %r29, %r27;
.loc 2 41 22
mov.u32 %r30, %nctaid.y;
mul.lo.s32 %r3, %r30, %r28;
BB34_2:
.loc 2 41 1
setp.ge.s32 %p2, %r2, %r20;
@%p2 bra BB34_11;
.loc 2 41 1
mul.lo.s32 %r5, %r53, %r23;
.loc 2 41 42
mul.lo.s32 %r6, %r53, %r22;
mov.u32 %r54, %r2;
BB34_4:
.loc 2 41 1
mov.u32 %r7, %r54;
add.s32 %r31, %r7, %r5;
mul.wide.s32 %rd5, %r31, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 41 1
ld.global.f64 %fd1, [%rd6];
.loc 3 323 10
setp.eq.f64 %p3, %fd1, 0d0000000000000000;
abs.f64 %fd2, %fd1;
setp.geu.f64 %p4, %fd2, 0d7FF0000000000000;
or.pred %p5, %p3, %p4;
@%p5 bra BB34_9;
{
.reg .b32 %temp;
mov.b64 {%r55, %temp}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r56}, %fd2;
}
shr.u32 %r32, %r56, 20;
and.b32 %r57, %r32, 2047;
setp.eq.s32 %p6, %r57, 0;
@%p6 bra BB34_7;
mov.u32 %r58, 0;
bra.uni BB34_8;
BB34_7:
.loc 3 323 10
mul.f64 %fd6, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%r55, %temp}, %fd6;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r56}, %fd6;
}
shr.u32 %r35, %r56, 20;
and.b32 %r57, %r35, 2047;
mov.u32 %r58, 18;
BB34_8:
.loc 3 323 10
add.s32 %r36, %r57, -1022;
cvt.rn.f32.s32 %f1, %r36;
mul.f32 %f2, %f1, 0f3EAAAAAB;
cvt.rni.s32.f32 %r37, %f2;
mad.lo.s32 %r38, %r37, -3145728, %r56;
mov.b64 %fd7, {%r55, %r38};
cvt.rn.f32.f64 %f3, %fd7;
lg2.approx.f32 %f4, %f3;
mul.f32 %f5, %f4, 0fBEAAAAAB;
ex2.approx.f32 %f6, %f5;
cvt.f64.f32 %fd8, %f6;
mul.f64 %fd9, %fd8, %fd8;
neg.f64 %fd10, %fd7;
mul.f64 %fd11, %fd8, %fd10;
mov.f64 %fd12, 0d3FF0000000000000;
.loc 3 323 10
fma.rn.f64 %fd13, %fd9, %fd11, %fd12;
mul.f64 %fd14, %fd8, 0d3FD5555555555555;
fma.rn.f64 %fd15, %fd13, %fd14, %fd8;
mul.f64 %fd16, %fd15, %fd15;
mul.f64 %fd17, %fd15, %fd10;
fma.rn.f64 %fd18, %fd16, %fd17, %fd12;
mul.f64 %fd19, %fd15, 0d3FD5555555555555;
fma.rn.f64 %fd20, %fd18, %fd19, %fd15;
{
.reg .b32 %temp;
mov.b64 {%r39, %temp}, %fd20;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd20;
}
sub.s32 %r41, %r58, %r37;
shl.b32 %r42, %r41, 20;
add.s32 %r43, %r40, %r42;
mov.b64 %fd21, {%r39, %r43};
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd1;
}
setp.lt.s32 %p7, %r44, 0;
neg.f64 %fd22, %fd21;
selp.f64 %fd26, %fd22, %fd21, %p7;
bra.uni BB34_10;
BB34_9:
.loc 3 323 10
selp.f64 %fd23, 0d7FF0000000000000, 0d0000000000000000, %p3;
add.f64 %fd24, %fd1, %fd1;
setp.gtu.f64 %p9, %fd2, 0d7FF0000000000000;
selp.f64 %fd25, %fd24, %fd23, %p9;
{
.reg .b32 %temp;
mov.b64 {%r45, %temp}, %fd25;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r46}, %fd25;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r47}, %fd1;
}
and.b32 %r48, %r47, -2147483648;
or.b32 %r49, %r46, %r48;
mov.b64 %fd26, {%r45, %r49};
BB34_10:
.loc 2 41 42
add.s32 %r50, %r7, %r6;
mul.wide.s32 %rd7, %r50, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 41 42
st.global.f64 [%rd8], %fd26;
.loc 2 41 22
add.s32 %r18, %r3, %r7;
.loc 2 41 1
setp.lt.s32 %p10, %r18, %r20;
mov.u32 %r54, %r18;
@%p10 bra BB34_4;
BB34_11:
.loc 2 41 22
mov.u32 %r51, %nctaid.x;
mad.lo.s32 %r53, %r51, %r24, %r53;
.loc 2 41 1
setp.lt.s32 %p11, %r53, %r21;
@%p11 bra BB34_2;
BB34_12:
.loc 2 41 2
ret;
}
.visible .entry map_rint_double(
.param .u32 map_rint_double_param_0,
.param .u32 map_rint_double_param_1,
.param .u64 map_rint_double_param_2,
.param .u32 map_rint_double_param_3,
.param .u64 map_rint_double_param_4,
.param .u32 map_rint_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_rint_double_param_0];
ld.param.u32 %r13, [map_rint_double_param_1];
ld.param.u64 %rd3, [map_rint_double_param_2];
ld.param.u32 %r14, [map_rint_double_param_3];
ld.param.u64 %rd4, [map_rint_double_param_4];
ld.param.u32 %r15, [map_rint_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 42 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 42 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB35_6;
.loc 2 42 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 42 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 42 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB35_2:
.loc 2 42 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB35_5;
.loc 2 42 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 42 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB35_4:
.loc 2 42 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 42 1
ld.global.f64 %fd1, [%rd6];
.loc 3 67 10
cvt.rni.f64.f64 %fd2, %fd1;
.loc 2 42 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 42 42
st.global.f64 [%rd8], %fd2;
.loc 2 42 22
add.s32 %r10, %r5, %r9;
.loc 2 42 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB35_4;
BB35_5:
.loc 2 42 22
add.s32 %r25, %r4, %r25;
.loc 2 42 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB35_2;
BB35_6:
.loc 2 42 2
ret;
}
.visible .entry map_round_double(
.param .u32 map_round_double_param_0,
.param .u32 map_round_double_param_1,
.param .u64 map_round_double_param_2,
.param .u32 map_round_double_param_3,
.param .u64 map_round_double_param_4,
.param .u32 map_round_double_param_5
)
{
.reg .pred %p<7>;
.reg .s32 %r<32>;
.reg .s64 %rd<9>;
.reg .f64 %fd<9>;
ld.param.u32 %r10, [map_round_double_param_0];
ld.param.u32 %r11, [map_round_double_param_1];
ld.param.u64 %rd3, [map_round_double_param_2];
ld.param.u32 %r12, [map_round_double_param_3];
ld.param.u64 %rd4, [map_round_double_param_4];
ld.param.u32 %r13, [map_round_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 43 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r30, %r14, %r15, %r16;
.loc 2 43 1
setp.ge.s32 %p1, %r30, %r11;
@%p1 bra BB36_8;
.loc 2 43 1
mov.u32 %r17, %tid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %ctaid.y;
mad.lo.s32 %r2, %r18, %r19, %r17;
.loc 2 43 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r3, %r20, %r18;
BB36_2:
.loc 2 43 1
setp.ge.s32 %p2, %r2, %r10;
@%p2 bra BB36_7;
.loc 2 43 1
mul.lo.s32 %r5, %r30, %r13;
.loc 2 43 42
mul.lo.s32 %r6, %r30, %r12;
mov.u32 %r31, %r2;
BB36_4:
.loc 2 43 1
mov.u32 %r7, %r31;
add.s32 %r21, %r7, %r5;
mul.wide.s32 %rd5, %r21, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 43 1
ld.global.f64 %fd8, [%rd6];
.loc 3 483 10
abs.f64 %fd2, %fd8;
setp.ge.f64 %p3, %fd2, 0d4330000000000000;
@%p3 bra BB36_6;
add.f64 %fd5, %fd2, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd6, %fd5;
setp.lt.f64 %p4, %fd2, 0d3FE0000000000000;
selp.f64 %fd7, 0d0000000000000000, %fd6, %p4;
{
.reg .b32 %temp;
mov.b64 {%r22, %temp}, %fd7;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r23}, %fd7;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r24}, %fd8;
}
and.b32 %r25, %r24, -2147483648;
or.b32 %r26, %r23, %r25;
mov.b64 %fd8, {%r22, %r26};
BB36_6:
.loc 2 43 42
add.s32 %r27, %r7, %r6;
mul.wide.s32 %rd7, %r27, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 43 42
st.global.f64 [%rd8], %fd8;
.loc 2 43 22
add.s32 %r8, %r3, %r7;
.loc 2 43 1
setp.lt.s32 %p5, %r8, %r10;
mov.u32 %r31, %r8;
@%p5 bra BB36_4;
BB36_7:
.loc 2 43 22
mov.u32 %r28, %nctaid.x;
mad.lo.s32 %r30, %r28, %r14, %r30;
.loc 2 43 1
setp.lt.s32 %p6, %r30, %r11;
@%p6 bra BB36_2;
BB36_8:
.loc 2 43 2
ret;
}
.visible .entry map_rsqrt_double(
.param .u32 map_rsqrt_double_param_0,
.param .u32 map_rsqrt_double_param_1,
.param .u64 map_rsqrt_double_param_2,
.param .u32 map_rsqrt_double_param_3,
.param .u64 map_rsqrt_double_param_4,
.param .u32 map_rsqrt_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_rsqrt_double_param_0];
ld.param.u32 %r13, [map_rsqrt_double_param_1];
ld.param.u64 %rd3, [map_rsqrt_double_param_2];
ld.param.u32 %r14, [map_rsqrt_double_param_3];
ld.param.u64 %rd4, [map_rsqrt_double_param_4];
ld.param.u32 %r15, [map_rsqrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 44 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 44 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB37_6;
.loc 2 44 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 44 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 44 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB37_2:
.loc 2 44 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB37_5;
.loc 2 44 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 44 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB37_4:
.loc 2 44 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 44 1
ld.global.f64 %fd1, [%rd6];
.loc 4 2790 10
rsqrt.approx.f64 %fd2, %fd1;
.loc 2 44 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 44 42
st.global.f64 [%rd8], %fd2;
.loc 2 44 22
add.s32 %r10, %r5, %r9;
.loc 2 44 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB37_4;
BB37_5:
.loc 2 44 22
add.s32 %r25, %r4, %r25;
.loc 2 44 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB37_2;
BB37_6:
.loc 2 44 2
ret;
}
.visible .entry map_sin_double(
.param .u32 map_sin_double_param_0,
.param .u32 map_sin_double_param_1,
.param .u64 map_sin_double_param_2,
.param .u32 map_sin_double_param_3,
.param .u64 map_sin_double_param_4,
.param .u32 map_sin_double_param_5
)
{
.local .align 4 .b8 __local_depot38[4];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<10>;
.reg .s32 %r<40>;
.reg .s64 %rd<16>;
.reg .f64 %fd<43>;
mov.u64 %SPL, __local_depot38;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r13, [map_sin_double_param_0];
ld.param.u32 %r14, [map_sin_double_param_1];
ld.param.u64 %rd1, [map_sin_double_param_2];
ld.param.u32 %r15, [map_sin_double_param_3];
ld.param.u64 %rd2, [map_sin_double_param_4];
ld.param.u32 %r16, [map_sin_double_param_5];
.loc 2 45 1
mov.u32 %r17, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r37, %r17, %r18, %r19;
.loc 2 45 1
setp.ge.s32 %p1, %r37, %r14;
@%p1 bra BB38_14;
.loc 2 45 1
mov.u32 %r20, %ntid.y;
.loc 2 45 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r2, %r21, %r20;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd13, %rd1;
BB38_2:
.loc 2 45 1
mov.u32 %r22, %ctaid.y;
mov.u32 %r24, %tid.y;
mad.lo.s32 %r38, %r20, %r22, %r24;
.loc 2 45 1
setp.ge.s32 %p2, %r38, %r13;
@%p2 bra BB38_13;
.loc 2 45 1
mul.lo.s32 %r4, %r37, %r16;
.loc 2 45 42
mul.lo.s32 %r5, %r37, %r15;
BB38_4:
.loc 2 45 1
add.s32 %r29, %r38, %r4;
mul.wide.s32 %rd4, %r29, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd40, [%rd5];
.loc 3 198 10
abs.f64 %fd14, %fd40;
setp.neu.f64 %p3, %fd14, 0d7FF0000000000000;
@%p3 bra BB38_6;
mov.f64 %fd15, 0d0000000000000000;
.loc 3 198 10
mul.rn.f64 %fd40, %fd40, %fd15;
BB38_6:
add.u64 %rd6, %SP, 0;
.loc 3 198 10
mul.f64 %fd16, %fd40, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r39, %fd16;
cvta.to.local.u64 %rd7, %rd6;
.loc 3 198 10
st.local.u32 [%rd7], %r39;
cvt.rn.f64.s32 %fd17, %r39;
neg.f64 %fd18, %fd17;
mov.f64 %fd19, 0d3FF921FB54442D18;
.loc 3 198 10
fma.rn.f64 %fd20, %fd18, %fd19, %fd40;
mov.f64 %fd21, 0d3C91A62633145C00;
.loc 3 198 10
fma.rn.f64 %fd22, %fd18, %fd21, %fd20;
mov.f64 %fd23, 0d397B839A252049C0;
.loc 3 198 10
fma.rn.f64 %fd41, %fd18, %fd23, %fd22;
abs.f64 %fd24, %fd40;
setp.leu.f64 %p4, %fd24, 0d41E0000000000000;
@%p4 bra BB38_8;
// Callseq Start 5
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd40;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 3 198 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd41, [retval0+0];
}
// Callseq End 5
ld.local.u32 %r39, [%rd7];
BB38_8:
shl.b32 %r30, %r39, 3;
and.b32 %r31, %r30, 8;
and.b32 %r32, %r39, 1;
setp.eq.b32 %p5, %r32, 1;
not.pred %p6, %p5;
selp.f64 %fd25, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p6;
mul.wide.u32 %rd10, %r31, 8;
mov.u64 %rd11, __cudart_sin_cos_coeffs;
add.s64 %rd12, %rd11, %rd10;
.loc 3 198 10
ld.const.f64 %fd26, [%rd12+8];
mul.rn.f64 %fd7, %fd41, %fd41;
fma.rn.f64 %fd27, %fd25, %fd7, %fd26;
ld.const.f64 %fd28, [%rd12+16];
fma.rn.f64 %fd29, %fd27, %fd7, %fd28;
ld.const.f64 %fd30, [%rd12+24];
fma.rn.f64 %fd31, %fd29, %fd7, %fd30;
ld.const.f64 %fd32, [%rd12+32];
fma.rn.f64 %fd33, %fd31, %fd7, %fd32;
ld.const.f64 %fd34, [%rd12+40];
fma.rn.f64 %fd35, %fd33, %fd7, %fd34;
ld.const.f64 %fd36, [%rd12+48];
fma.rn.f64 %fd8, %fd35, %fd7, %fd36;
fma.rn.f64 %fd42, %fd8, %fd41, %fd41;
@%p6 bra BB38_10;
mov.f64 %fd37, 0d3FF0000000000000;
.loc 3 198 10
fma.rn.f64 %fd42, %fd8, %fd7, %fd37;
BB38_10:
and.b32 %r33, %r39, 2;
setp.eq.s32 %p7, %r33, 0;
@%p7 bra BB38_12;
mov.f64 %fd38, 0d0000000000000000;
mov.f64 %fd39, 0dBFF0000000000000;
.loc 3 198 10
fma.rn.f64 %fd42, %fd42, %fd39, %fd38;
BB38_12:
.loc 2 45 42
add.s32 %r34, %r38, %r5;
mul.wide.s32 %rd14, %r34, 8;
add.s64 %rd15, %rd13, %rd14;
st.global.f64 [%rd15], %fd42;
.loc 2 45 22
add.s32 %r38, %r2, %r38;
.loc 2 45 1
setp.lt.s32 %p8, %r38, %r13;
@%p8 bra BB38_4;
BB38_13:
.loc 2 45 22
mov.u32 %r35, %nctaid.x;
mad.lo.s32 %r37, %r35, %r17, %r37;
.loc 2 45 1
setp.lt.s32 %p9, %r37, %r14;
@%p9 bra BB38_2;
BB38_14:
.loc 2 45 2
ret;
}
.visible .entry map_sinh_double(
.param .u32 map_sinh_double_param_0,
.param .u32 map_sinh_double_param_1,
.param .u64 map_sinh_double_param_2,
.param .u32 map_sinh_double_param_3,
.param .u64 map_sinh_double_param_4,
.param .u32 map_sinh_double_param_5
)
{
.reg .pred %p<10>;
.reg .s32 %r<45>;
.reg .s64 %rd<9>;
.reg .f64 %fd<65>;
ld.param.u32 %r11, [map_sinh_double_param_0];
ld.param.u32 %r12, [map_sinh_double_param_1];
ld.param.u64 %rd3, [map_sinh_double_param_2];
ld.param.u32 %r13, [map_sinh_double_param_3];
ld.param.u64 %rd4, [map_sinh_double_param_4];
ld.param.u32 %r14, [map_sinh_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 46 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r43, %r15, %r16, %r17;
.loc 2 46 1
setp.ge.s32 %p1, %r43, %r12;
@%p1 bra BB39_9;
.loc 2 46 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r2, %r19, %r20, %r18;
.loc 2 46 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r3, %r21, %r19;
BB39_2:
.loc 2 46 1
setp.ge.s32 %p2, %r2, %r11;
@%p2 bra BB39_8;
.loc 2 46 1
mul.lo.s32 %r5, %r43, %r14;
.loc 2 46 42
mul.lo.s32 %r6, %r43, %r13;
mov.u32 %r44, %r2;
BB39_4:
.loc 2 46 1
mov.u32 %r7, %r44;
add.s32 %r22, %r7, %r5;
mul.wide.s32 %rd5, %r22, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 46 1
ld.global.f64 %fd5, [%rd6];
.loc 3 268 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd5;
}
and.b32 %r23, %r8, 2147483647;
{
.reg .b32 %temp;
mov.b64 {%r24, %temp}, %fd5;
}
mov.b64 %fd1, {%r24, %r23};
setp.lt.u32 %p3, %r23, 1072693248;
@%p3 bra BB39_6;
{
.reg .b32 %temp;
mov.b64 {%temp, %r25}, %fd1;
}
mul.f64 %fd6, %fd1, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd7, %fd6;
cvt.rzi.s32.f64 %r26, %fd7;
add.s32 %r27, %r26, -1;
mov.f64 %fd8, 0dBFE62E42FEFA39EF;
.loc 3 268 10
fma.rn.f64 %fd9, %fd7, %fd8, %fd1;
mov.f64 %fd10, 0dBC7ABC9E3B39803F;
.loc 3 268 10
fma.rn.f64 %fd11, %fd7, %fd10, %fd9;
shl.b32 %r28, %r25, 1;
setp.lt.u32 %p4, %r28, 2142496327;
selp.b32 %r29, 0, %r27, %p4;
mov.u32 %r30, 0;
.loc 3 268 10
selp.f64 %fd12, %fd1, %fd11, %p4;
mov.f64 %fd13, 0d3E5AF86D8EBD13CD;
mov.f64 %fd14, 0d3E21F4076ACD15B6;
.loc 3 268 10
fma.rn.f64 %fd15, %fd14, %fd12, %fd13;
mov.f64 %fd16, 0d3E927E5092BA033D;
.loc 3 268 10
fma.rn.f64 %fd17, %fd15, %fd12, %fd16;
mov.f64 %fd18, 0d3EC71DDE6C5F9DA1;
.loc 3 268 10
fma.rn.f64 %fd19, %fd17, %fd12, %fd18;
mov.f64 %fd20, 0d3EFA01A018D034E6;
.loc 3 268 10
fma.rn.f64 %fd21, %fd19, %fd12, %fd20;
mov.f64 %fd22, 0d3F2A01A01B3B6940;
.loc 3 268 10
fma.rn.f64 %fd23, %fd21, %fd12, %fd22;
mov.f64 %fd24, 0d3F56C16C16C1B5DD;
.loc 3 268 10
fma.rn.f64 %fd25, %fd23, %fd12, %fd24;
mov.f64 %fd26, 0d3F8111111110F74D;
.loc 3 268 10
fma.rn.f64 %fd27, %fd25, %fd12, %fd26;
mov.f64 %fd28, 0d3FA555555555554D;
.loc 3 268 10
fma.rn.f64 %fd29, %fd27, %fd12, %fd28;
mov.f64 %fd30, 0d3FC5555555555557;
.loc 3 268 10
fma.rn.f64 %fd31, %fd29, %fd12, %fd30;
mov.f64 %fd32, 0d3FE0000000000000;
.loc 3 268 10
fma.rn.f64 %fd33, %fd31, %fd12, %fd32;
mul.f64 %fd34, %fd33, %fd12;
fma.rn.f64 %fd35, %fd34, %fd12, %fd12;
setp.eq.s32 %p5, %r29, 1024;
selp.b32 %r31, -1, 0, %p5;
add.s32 %r32, %r31, %r29;
shl.b32 %r33, %r32, 20;
add.s32 %r34, %r33, 1072693248;
mov.b64 %fd36, {%r30, %r34};
mov.u32 %r35, 1071644672;
.loc 3 268 10
mov.b64 %fd37, {%r30, %r35};
sub.f64 %fd38, %fd36, %fd37;
fma.rn.f64 %fd39, %fd35, %fd36, %fd38;
add.f64 %fd40, %fd39, %fd39;
selp.f64 %fd41, %fd40, %fd39, %p5;
setp.eq.s32 %p6, %r28, 0;
selp.f64 %fd42, %fd12, %fd41, %p6;
mov.f64 %fd43, 0d3FF0000000000000;
mov.f64 %fd44, 0d4000000000000000;
.loc 3 268 10
fma.rn.f64 %fd45, %fd44, %fd42, %fd43;
div.rn.f64 %fd46, %fd42, %fd45;
add.f64 %fd47, %fd42, %fd46;
setp.ltu.f64 %p7, %fd1, 0d408633CE8FB9F87E;
selp.f64 %fd64, %fd47, 0d7FF0000000000000, %p7;
bra.uni BB39_7;
BB39_6:
.loc 3 268 10
mul.f64 %fd48, %fd1, %fd1;
mov.f64 %fd49, 0d3DE611A561D87DEF;
mov.f64 %fd50, 0d3D6B4C75AB274C53;
.loc 3 268 10
fma.rn.f64 %fd51, %fd50, %fd48, %fd49;
mov.f64 %fd52, 0d3E5AE64671B18F5C;
.loc 3 268 10
fma.rn.f64 %fd53, %fd51, %fd48, %fd52;
mov.f64 %fd54, 0d3EC71DE3A465B1E4;
.loc 3 268 10
fma.rn.f64 %fd55, %fd53, %fd48, %fd54;
mov.f64 %fd56, 0d3F2A01A01A02899D;
.loc 3 268 10
fma.rn.f64 %fd57, %fd55, %fd48, %fd56;
mov.f64 %fd58, 0d3F811111111110A6;
.loc 3 268 10
fma.rn.f64 %fd59, %fd57, %fd48, %fd58;
mov.f64 %fd60, 0d3FC5555555555556;
.loc 3 268 10
fma.rn.f64 %fd61, %fd59, %fd48, %fd60;
mul.f64 %fd62, %fd61, %fd48;
fma.rn.f64 %fd64, %fd62, %fd1, %fd1;
BB39_7:
and.b32 %r36, %r8, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%temp, %r37}, %fd64;
}
or.b32 %r38, %r37, %r36;
{
.reg .b32 %temp;
mov.b64 {%r39, %temp}, %fd64;
}
mov.b64 %fd63, {%r39, %r38};
.loc 2 46 42
add.s32 %r40, %r7, %r6;
mul.wide.s32 %rd7, %r40, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 46 42
st.global.f64 [%rd8], %fd63;
.loc 2 46 22
add.s32 %r9, %r3, %r7;
.loc 2 46 1
setp.lt.s32 %p8, %r9, %r11;
mov.u32 %r44, %r9;
@%p8 bra BB39_4;
BB39_8:
.loc 2 46 22
mov.u32 %r41, %nctaid.x;
mad.lo.s32 %r43, %r41, %r15, %r43;
.loc 2 46 1
setp.lt.s32 %p9, %r43, %r12;
@%p9 bra BB39_2;
BB39_9:
.loc 2 46 2
ret;
}
.visible .entry map_sinpi_double(
.param .u32 map_sinpi_double_param_0,
.param .u32 map_sinpi_double_param_1,
.param .u64 map_sinpi_double_param_2,
.param .u32 map_sinpi_double_param_3,
.param .u64 map_sinpi_double_param_4,
.param .u32 map_sinpi_double_param_5
)
{
.reg .pred %p<9>;
.reg .s32 %r<38>;
.reg .s64 %rd<14>;
.reg .f64 %fd<37>;
ld.param.u32 %r10, [map_sinpi_double_param_0];
ld.param.u32 %r11, [map_sinpi_double_param_1];
ld.param.u64 %rd1, [map_sinpi_double_param_2];
ld.param.u32 %r12, [map_sinpi_double_param_3];
ld.param.u64 %rd2, [map_sinpi_double_param_4];
ld.param.u32 %r13, [map_sinpi_double_param_5];
.loc 2 47 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r36, %r14, %r15, %r16;
.loc 2 47 1
setp.ge.s32 %p1, %r36, %r11;
@%p1 bra BB40_12;
.loc 2 47 1
mov.u32 %r17, %ntid.y;
.loc 2 47 22
mov.u32 %r18, %nctaid.y;
mul.lo.s32 %r2, %r18, %r17;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd11, %rd1;
BB40_2:
.loc 2 47 1
mov.u32 %r19, %ctaid.y;
mov.u32 %r21, %tid.y;
mad.lo.s32 %r37, %r17, %r19, %r21;
.loc 2 47 1
setp.ge.s32 %p2, %r37, %r10;
@%p2 bra BB40_11;
.loc 2 47 1
mul.lo.s32 %r4, %r36, %r13;
BB40_4:
add.s32 %r26, %r37, %r4;
mul.wide.s32 %rd4, %r26, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 2 47 1
ld.global.f64 %fd1, [%rd5];
.loc 3 208 10
{
.reg .b32 %temp;
mov.b64 {%r27, %temp}, %fd1;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd1;
}
add.s32 %r29, %r28, 1048576;
mov.b64 %fd11, {%r27, %r29};
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s64.f64 %rd6, %fd12;
cvt.u32.u64 %r7, %rd6;
neg.f64 %fd13, %fd12;
mov.f64 %fd14, 0d3FE0000000000000;
.loc 3 208 10
fma.rn.f64 %fd15, %fd13, %fd14, %fd1;
mul.f64 %fd16, %fd15, 0d3CA1A62633145C07;
mov.f64 %fd17, 0d400921FB54442D18;
.loc 3 208 10
fma.rn.f64 %fd18, %fd15, %fd17, %fd16;
shl.b32 %r30, %r7, 3;
and.b32 %r31, %r30, 8;
mul.rn.f64 %fd2, %fd18, %fd18;
and.b64 %rd7, %rd6, 1;
setp.eq.b64 %p3, %rd7, 1;
not.pred %p4, %p3;
selp.f64 %fd19, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p4;
mul.wide.u32 %rd8, %r31, 8;
mov.u64 %rd9, __cudart_sin_cos_coeffs;
add.s64 %rd10, %rd9, %rd8;
.loc 3 208 10
ld.const.f64 %fd20, [%rd10+8];
fma.rn.f64 %fd21, %fd19, %fd2, %fd20;
ld.const.f64 %fd22, [%rd10+16];
fma.rn.f64 %fd23, %fd21, %fd2, %fd22;
ld.const.f64 %fd24, [%rd10+24];
fma.rn.f64 %fd25, %fd23, %fd2, %fd24;
ld.const.f64 %fd26, [%rd10+32];
fma.rn.f64 %fd27, %fd25, %fd2, %fd26;
ld.const.f64 %fd28, [%rd10+40];
fma.rn.f64 %fd29, %fd27, %fd2, %fd28;
ld.const.f64 %fd30, [%rd10+48];
fma.rn.f64 %fd3, %fd29, %fd2, %fd30;
fma.rn.f64 %fd36, %fd3, %fd18, %fd18;
@%p4 bra BB40_6;
mov.f64 %fd31, 0d3FF0000000000000;
.loc 3 208 10
fma.rn.f64 %fd36, %fd3, %fd2, %fd31;
BB40_6:
and.b32 %r32, %r7, 2;
setp.eq.s32 %p5, %r32, 0;
@%p5 bra BB40_8;
mov.f64 %fd32, 0d0000000000000000;
mov.f64 %fd33, 0dBFF0000000000000;
.loc 3 208 10
fma.rn.f64 %fd36, %fd36, %fd33, %fd32;
BB40_8:
cvt.rzi.f64.f64 %fd34, %fd1;
setp.neu.f64 %p6, %fd1, %fd34;
@%p6 bra BB40_10;
mov.f64 %fd35, 0d0000000000000000;
.loc 3 208 10
mul.rn.f64 %fd36, %fd1, %fd35;
BB40_10:
.loc 2 47 42
mad.lo.s32 %r33, %r36, %r12, %r37;
mul.wide.s32 %rd12, %r33, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 47 42
st.global.f64 [%rd13], %fd36;
.loc 2 47 22
add.s32 %r37, %r2, %r37;
.loc 2 47 1
setp.lt.s32 %p7, %r37, %r10;
@%p7 bra BB40_4;
BB40_11:
.loc 2 47 22
mov.u32 %r34, %nctaid.x;
mad.lo.s32 %r36, %r34, %r14, %r36;
.loc 2 47 1
setp.lt.s32 %p8, %r36, %r11;
@%p8 bra BB40_2;
BB40_12:
.loc 2 47 2
ret;
}
.visible .entry map_sqrt_double(
.param .u32 map_sqrt_double_param_0,
.param .u32 map_sqrt_double_param_1,
.param .u64 map_sqrt_double_param_2,
.param .u32 map_sqrt_double_param_3,
.param .u64 map_sqrt_double_param_4,
.param .u32 map_sqrt_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_sqrt_double_param_0];
ld.param.u32 %r13, [map_sqrt_double_param_1];
ld.param.u64 %rd3, [map_sqrt_double_param_2];
ld.param.u32 %r14, [map_sqrt_double_param_3];
ld.param.u64 %rd4, [map_sqrt_double_param_4];
ld.param.u32 %r15, [map_sqrt_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 48 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 48 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB41_6;
.loc 2 48 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 48 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 48 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB41_2:
.loc 2 48 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB41_5;
.loc 2 48 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 48 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB41_4:
.loc 2 48 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 48 1
ld.global.f64 %fd1, [%rd6];
.loc 4 3060 10
sqrt.rn.f64 %fd2, %fd1;
.loc 2 48 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 48 42
st.global.f64 [%rd8], %fd2;
.loc 2 48 22
add.s32 %r10, %r5, %r9;
.loc 2 48 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB41_4;
BB41_5:
.loc 2 48 22
add.s32 %r25, %r4, %r25;
.loc 2 48 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB41_2;
BB41_6:
.loc 2 48 2
ret;
}
.visible .entry map_tan_double(
.param .u32 map_tan_double_param_0,
.param .u32 map_tan_double_param_1,
.param .u64 map_tan_double_param_2,
.param .u32 map_tan_double_param_3,
.param .u64 map_tan_double_param_4,
.param .u32 map_tan_double_param_5
)
{
.local .align 4 .b8 __local_depot42[4];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<8>;
.reg .f32 %f<5>;
.reg .s32 %r<37>;
.reg .s64 %rd<13>;
.reg .f64 %fd<68>;
mov.u64 %SPL, __local_depot42;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r13, [map_tan_double_param_0];
ld.param.u32 %r14, [map_tan_double_param_1];
ld.param.u64 %rd2, [map_tan_double_param_2];
ld.param.u32 %r15, [map_tan_double_param_3];
ld.param.u64 %rd3, [map_tan_double_param_4];
ld.param.u32 %r16, [map_tan_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
.loc 2 49 1
mov.u32 %r17, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r34, %r17, %r18, %r19;
.loc 2 49 1
setp.ge.s32 %p1, %r34, %r14;
@%p1 bra BB42_12;
.loc 2 49 1
mov.u32 %r20, %ntid.y;
.loc 2 49 22
mov.u32 %r21, %nctaid.y;
mul.lo.s32 %r2, %r21, %r20;
cvta.to.global.u64 %rd10, %rd2;
BB42_2:
.loc 2 49 1
mov.u32 %r22, %ctaid.y;
mov.u32 %r24, %tid.y;
mad.lo.s32 %r35, %r20, %r22, %r24;
.loc 2 49 1
setp.ge.s32 %p2, %r35, %r13;
@%p2 bra BB42_11;
.loc 2 49 1
mul.lo.s32 %r4, %r34, %r16;
.loc 2 49 42
mul.lo.s32 %r5, %r34, %r15;
BB42_4:
.loc 2 49 1
add.s32 %r29, %r35, %r4;
mul.wide.s32 %rd4, %r29, 8;
add.s64 %rd5, %rd1, %rd4;
ld.global.f64 %fd65, [%rd5];
.loc 3 218 10
abs.f64 %fd11, %fd65;
setp.neu.f64 %p3, %fd11, 0d7FF0000000000000;
@%p3 bra BB42_6;
mov.f64 %fd12, 0d0000000000000000;
.loc 3 218 10
mul.rn.f64 %fd65, %fd65, %fd12;
BB42_6:
add.u64 %rd6, %SP, 0;
.loc 3 218 10
mul.f64 %fd13, %fd65, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r36, %fd13;
cvta.to.local.u64 %rd7, %rd6;
.loc 3 218 10
st.local.u32 [%rd7], %r36;
cvt.rn.f64.s32 %fd14, %r36;
neg.f64 %fd15, %fd14;
mov.f64 %fd16, 0d3FF921FB54442D18;
.loc 3 218 10
fma.rn.f64 %fd17, %fd15, %fd16, %fd65;
mov.f64 %fd18, 0d3C91A62633145C00;
.loc 3 218 10
fma.rn.f64 %fd19, %fd15, %fd18, %fd17;
mov.f64 %fd20, 0d397B839A252049C0;
.loc 3 218 10
fma.rn.f64 %fd66, %fd15, %fd20, %fd19;
abs.f64 %fd21, %fd65;
setp.leu.f64 %p4, %fd21, 0d41E0000000000000;
@%p4 bra BB42_8;
// Callseq Start 6
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd65;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 3 218 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd66, [retval0+0];
}
// Callseq End 6
ld.local.u32 %r36, [%rd7];
BB42_8:
mul.f64 %fd22, %fd66, %fd66;
mov.f64 %fd23, 0dBEF9757C5B27EBB1;
mov.f64 %fd24, 0d3EE48DAC2799BCB9;
.loc 3 218 10
fma.rn.f64 %fd25, %fd24, %fd22, %fd23;
mov.f64 %fd26, 0d3F0980E90FD91E04;
.loc 3 218 10
fma.rn.f64 %fd27, %fd25, %fd22, %fd26;
mov.f64 %fd28, 0dBEFAE2B0417D7E1D;
.loc 3 218 10
fma.rn.f64 %fd29, %fd27, %fd22, %fd28;
mov.f64 %fd30, 0d3F119F5341BFBA57;
.loc 3 218 10
fma.rn.f64 %fd31, %fd29, %fd22, %fd30;
mov.f64 %fd32, 0d3F15E791A00F6919;
.loc 3 218 10
fma.rn.f64 %fd33, %fd31, %fd22, %fd32;
mov.f64 %fd34, 0d3F2FF2E7FADEC73A;
.loc 3 218 10
fma.rn.f64 %fd35, %fd33, %fd22, %fd34;
mov.f64 %fd36, 0d3F434BC1B206DA62;
.loc 3 218 10
fma.rn.f64 %fd37, %fd35, %fd22, %fd36;
mov.f64 %fd38, 0d3F57DB18EF2F83F9;
.loc 3 218 10
fma.rn.f64 %fd39, %fd37, %fd22, %fd38;
mov.f64 %fd40, 0d3F6D6D2E7AE49FBC;
.loc 3 218 10
fma.rn.f64 %fd41, %fd39, %fd22, %fd40;
mov.f64 %fd42, 0d3F8226E3A816A776;
.loc 3 218 10
fma.rn.f64 %fd43, %fd41, %fd22, %fd42;
mov.f64 %fd44, 0d3F9664F485D25660;
.loc 3 218 10
fma.rn.f64 %fd45, %fd43, %fd22, %fd44;
mov.f64 %fd46, 0d3FABA1BA1BABF31D;
.loc 3 218 10
fma.rn.f64 %fd47, %fd45, %fd22, %fd46;
mov.f64 %fd48, 0d3FC11111111105D2;
.loc 3 218 10
fma.rn.f64 %fd49, %fd47, %fd22, %fd48;
mov.f64 %fd50, 0d3FD555555555555E;
.loc 3 218 10
fma.rn.f64 %fd51, %fd49, %fd22, %fd50;
mul.f64 %fd7, %fd51, %fd22;
fma.rn.f64 %fd67, %fd7, %fd66, %fd66;
and.b32 %r30, %r36, 1;
setp.eq.b32 %p5, %r30, 1;
@!%p5 bra BB42_10;
bra.uni BB42_9;
BB42_9:
sub.f64 %fd54, %fd67, %fd66;
neg.f64 %fd55, %fd54;
fma.rn.f64 %fd56, %fd7, %fd66, %fd55;
// inline asm
cvt.rn.f32.f64 %f1,%fd67;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd53,%f2;
// inline asm
neg.f64 %fd57, %fd67;
mov.f64 %fd58, 0d3FF0000000000000;
.loc 3 218 10
fma.rn.f64 %fd59, %fd57, %fd53, %fd58;
fma.rn.f64 %fd60, %fd59, %fd59, %fd59;
fma.rn.f64 %fd61, %fd60, %fd53, %fd53;
neg.f64 %fd62, %fd61;
fma.rn.f64 %fd63, %fd67, %fd62, %fd58;
fma.rn.f64 %fd64, %fd62, %fd56, %fd63;
fma.rn.f64 %fd67, %fd64, %fd62, %fd62;
BB42_10:
.loc 2 49 42
add.s32 %r31, %r35, %r5;
mul.wide.s32 %rd11, %r31, 8;
add.s64 %rd12, %rd10, %rd11;
st.global.f64 [%rd12], %fd67;
.loc 2 49 22
add.s32 %r35, %r2, %r35;
.loc 2 49 1
setp.lt.s32 %p6, %r35, %r13;
@%p6 bra BB42_4;
BB42_11:
.loc 2 49 22
mov.u32 %r32, %nctaid.x;
mad.lo.s32 %r34, %r32, %r17, %r34;
.loc 2 49 1
setp.lt.s32 %p7, %r34, %r14;
@%p7 bra BB42_2;
BB42_12:
.loc 2 49 2
ret;
}
.visible .entry map_tanh_double(
.param .u32 map_tanh_double_param_0,
.param .u32 map_tanh_double_param_1,
.param .u64 map_tanh_double_param_2,
.param .u32 map_tanh_double_param_3,
.param .u64 map_tanh_double_param_4,
.param .u32 map_tanh_double_param_5
)
{
.reg .pred %p<9>;
.reg .f32 %f<5>;
.reg .s32 %r<53>;
.reg .s64 %rd<9>;
.reg .f64 %fd<78>;
ld.param.u32 %r12, [map_tanh_double_param_0];
ld.param.u32 %r13, [map_tanh_double_param_1];
ld.param.u64 %rd1, [map_tanh_double_param_2];
ld.param.u32 %r14, [map_tanh_double_param_3];
ld.param.u64 %rd2, [map_tanh_double_param_4];
ld.param.u32 %r15, [map_tanh_double_param_5];
.loc 2 50 1
mov.u32 %r16, %ntid.x;
mov.u32 %r17, %ctaid.x;
mov.u32 %r18, %tid.x;
mad.lo.s32 %r51, %r16, %r17, %r18;
.loc 2 50 1
setp.ge.s32 %p1, %r51, %r13;
@%p1 bra BB43_12;
.loc 2 50 1
mov.u32 %r19, %ntid.y;
.loc 2 50 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r2, %r20, %r19;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB43_2:
.loc 2 50 1
mov.u32 %r21, %ctaid.y;
mov.u32 %r23, %tid.y;
mad.lo.s32 %r52, %r19, %r21, %r23;
.loc 2 50 1
setp.ge.s32 %p2, %r52, %r12;
@%p2 bra BB43_11;
.loc 2 50 1
mul.lo.s32 %r4, %r51, %r15;
BB43_4:
add.s32 %r28, %r52, %r4;
mul.wide.s32 %rd4, %r28, 8;
add.s64 %rd5, %rd3, %rd4;
.loc 2 50 1
ld.global.f64 %fd1, [%rd5];
.loc 3 273 10
abs.f64 %fd2, %fd1;
setp.ltu.f64 %p3, %fd2, 0d3FE199999999999A;
@%p3 bra BB43_9;
add.f64 %fd10, %fd2, %fd2;
mul.f64 %fd11, %fd10, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd12, %fd11;
cvt.rzi.s32.f64 %r7, %fd12;
mov.f64 %fd13, 0dBFE62E42FEFA39EF;
.loc 3 273 10
fma.rn.f64 %fd14, %fd12, %fd13, %fd10;
mov.f64 %fd15, 0dBC7ABC9E3B39803F;
.loc 3 273 10
fma.rn.f64 %fd16, %fd12, %fd15, %fd14;
mov.f64 %fd17, 0d3E5AF86D8EBD13CD;
mov.f64 %fd18, 0d3E21F4076ACD15B6;
.loc 3 273 10
fma.rn.f64 %fd19, %fd18, %fd16, %fd17;
mov.f64 %fd20, 0d3E927E5092BA033D;
.loc 3 273 10
fma.rn.f64 %fd21, %fd19, %fd16, %fd20;
mov.f64 %fd22, 0d3EC71DDE6C5F9DA1;
.loc 3 273 10
fma.rn.f64 %fd23, %fd21, %fd16, %fd22;
mov.f64 %fd24, 0d3EFA01A018D034E6;
.loc 3 273 10
fma.rn.f64 %fd25, %fd23, %fd16, %fd24;
mov.f64 %fd26, 0d3F2A01A01B3B6940;
.loc 3 273 10
fma.rn.f64 %fd27, %fd25, %fd16, %fd26;
mov.f64 %fd28, 0d3F56C16C16C1B5DD;
.loc 3 273 10
fma.rn.f64 %fd29, %fd27, %fd16, %fd28;
mov.f64 %fd30, 0d3F8111111110F74D;
.loc 3 273 10
fma.rn.f64 %fd31, %fd29, %fd16, %fd30;
mov.f64 %fd32, 0d3FA555555555554D;
.loc 3 273 10
fma.rn.f64 %fd33, %fd31, %fd16, %fd32;
mov.f64 %fd34, 0d3FC5555555555557;
.loc 3 273 10
fma.rn.f64 %fd35, %fd33, %fd16, %fd34;
mov.f64 %fd36, 0d3FE0000000000000;
.loc 3 273 10
fma.rn.f64 %fd37, %fd35, %fd16, %fd36;
mul.f64 %fd38, %fd37, %fd16;
fma.rn.f64 %fd3, %fd38, %fd16, %fd16;
shl.b32 %r8, %r7, 20;
add.s32 %r9, %r8, 1072693248;
abs.s32 %r29, %r7;
setp.lt.s32 %p4, %r29, 1021;
@%p4 bra BB43_7;
add.s32 %r30, %r8, 1130364928;
setp.lt.s32 %p5, %r7, 0;
mov.u32 %r31, 0;
.loc 3 273 10
selp.b32 %r32, %r30, %r9, %p5;
shr.s32 %r33, %r7, 31;
add.s32 %r34, %r33, 1073741824;
and.b32 %r35, %r34, -57671680;
add.s32 %r36, %r32, -1048576;
mov.b64 %fd39, {%r31, %r35};
fma.rn.f64 %fd40, %fd3, %fd39, %fd39;
mov.b64 %fd41, {%r31, %r36};
mul.f64 %fd76, %fd40, %fd41;
bra.uni BB43_8;
BB43_7:
mov.u32 %r37, 0;
.loc 3 273 10
mov.b64 %fd42, {%r37, %r9};
fma.rn.f64 %fd76, %fd3, %fd42, %fd42;
BB43_8:
add.f64 %fd43, %fd76, 0d3FF0000000000000;
mov.f64 %fd45, 0d3FF0000000000000;
.loc 3 273 10
// inline asm
cvt.rn.f32.f64 %f1,%fd43;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd44,%f2;
// inline asm
neg.f64 %fd46, %fd43;
fma.rn.f64 %fd47, %fd46, %fd44, %fd45;
fma.rn.f64 %fd48, %fd47, %fd47, %fd47;
fma.rn.f64 %fd49, %fd48, %fd44, %fd44;
neg.f64 %fd50, %fd49;
mov.f64 %fd51, 0d4000000000000000;
.loc 3 273 10
fma.rn.f64 %fd52, %fd51, %fd50, %fd45;
setp.gt.f64 %p6, %fd2, 0d4075E00000000000;
selp.f64 %fd53, 0d3FF0000000000000, %fd52, %p6;
{
.reg .b32 %temp;
mov.b64 {%r38, %temp}, %fd53;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r39}, %fd53;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd1;
}
and.b32 %r41, %r40, -2147483648;
or.b32 %r42, %r39, %r41;
mov.b64 %fd77, {%r38, %r42};
bra.uni BB43_10;
BB43_9:
.loc 3 273 10
mul.f64 %fd54, %fd1, %fd1;
mov.f64 %fd55, 0dBF2B9093D89F0E23;
mov.f64 %fd56, 0d3F0ABFFC9B5786C4;
.loc 3 273 10
fma.rn.f64 %fd57, %fd56, %fd54, %fd55;
mov.f64 %fd58, 0d3F42FA2744C30B61;
.loc 3 273 10
fma.rn.f64 %fd59, %fd57, %fd54, %fd58;
mov.f64 %fd60, 0dBF57CF3B9C1E491D;
.loc 3 273 10
fma.rn.f64 %fd61, %fd59, %fd54, %fd60;
mov.f64 %fd62, 0d3F6D6C61D450119A;
.loc 3 273 10
fma.rn.f64 %fd63, %fd61, %fd54, %fd62;
mov.f64 %fd64, 0dBF8226DDD44294F5;
.loc 3 273 10
fma.rn.f64 %fd65, %fd63, %fd54, %fd64;
mov.f64 %fd66, 0d3F9664F45C2B04A6;
.loc 3 273 10
fma.rn.f64 %fd67, %fd65, %fd54, %fd66;
mov.f64 %fd68, 0dBFABA1BA1AD70754;
.loc 3 273 10
fma.rn.f64 %fd69, %fd67, %fd54, %fd68;
mov.f64 %fd70, 0d3FC111111110295E;
.loc 3 273 10
fma.rn.f64 %fd71, %fd69, %fd54, %fd70;
mov.f64 %fd72, 0dBFD555555555549F;
.loc 3 273 10
fma.rn.f64 %fd73, %fd71, %fd54, %fd72;
mul.f64 %fd74, %fd73, %fd54;
fma.rn.f64 %fd75, %fd74, %fd1, %fd1;
{
.reg .b32 %temp;
mov.b64 {%r43, %temp}, %fd75;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r44}, %fd75;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r45}, %fd1;
}
and.b32 %r46, %r45, -2147483648;
or.b32 %r47, %r44, %r46;
mov.b64 %fd77, {%r43, %r47};
BB43_10:
.loc 2 50 42
mad.lo.s32 %r48, %r51, %r14, %r52;
mul.wide.s32 %rd7, %r48, 8;
add.s64 %rd8, %rd6, %rd7;
.loc 2 50 42
st.global.f64 [%rd8], %fd77;
.loc 2 50 22
add.s32 %r52, %r2, %r52;
.loc 2 50 1
setp.lt.s32 %p7, %r52, %r12;
@%p7 bra BB43_4;
BB43_11:
.loc 2 50 22
mov.u32 %r49, %nctaid.x;
mad.lo.s32 %r51, %r49, %r16, %r51;
.loc 2 50 1
setp.lt.s32 %p8, %r51, %r13;
@%p8 bra BB43_2;
BB43_12:
.loc 2 50 2
ret;
}
.visible .entry map_tgamma_double(
.param .u32 map_tgamma_double_param_0,
.param .u32 map_tgamma_double_param_1,
.param .u64 map_tgamma_double_param_2,
.param .u32 map_tgamma_double_param_3,
.param .u64 map_tgamma_double_param_4,
.param .u32 map_tgamma_double_param_5
)
{
.reg .pred %p<35>;
.reg .f32 %f<13>;
.reg .s32 %r<91>;
.reg .s64 %rd<14>;
.reg .f64 %fd<401>;
ld.param.u32 %r30, [map_tgamma_double_param_0];
ld.param.u32 %r31, [map_tgamma_double_param_1];
ld.param.u64 %rd1, [map_tgamma_double_param_2];
ld.param.u32 %r32, [map_tgamma_double_param_3];
ld.param.u64 %rd2, [map_tgamma_double_param_4];
ld.param.u32 %r33, [map_tgamma_double_param_5];
.loc 2 51 1
mov.u32 %r34, %ntid.x;
mov.u32 %r35, %ctaid.x;
mov.u32 %r36, %tid.x;
mad.lo.s32 %r83, %r34, %r35, %r36;
.loc 2 51 1
setp.ge.s32 %p1, %r83, %r31;
@%p1 bra BB44_49;
.loc 2 51 1
mov.u32 %r37, %ntid.y;
.loc 2 51 22
mov.u32 %r38, %nctaid.y;
mul.lo.s32 %r2, %r38, %r37;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd11, %rd1;
BB44_2:
.loc 2 51 1
mov.u32 %r39, %ctaid.y;
mov.u32 %r41, %tid.y;
mad.lo.s32 %r84, %r37, %r39, %r41;
.loc 2 51 1
setp.ge.s32 %p2, %r84, %r30;
@%p2 bra BB44_48;
BB44_3:
.loc 2 51 1
mad.lo.s32 %r46, %r83, %r33, %r84;
mul.wide.s32 %rd4, %r46, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd397, [%rd5];
.loc 3 418 10
abs.f64 %fd2, %fd397;
setp.gtu.f64 %p3, %fd2, 0d7FF0000000000000;
@%p3 bra BB44_46;
setp.lt.f64 %p4, %fd2, 0d402E000000000000;
@%p4 bra BB44_35;
setp.ltu.f64 %p5, %fd397, 0d0000000000000000;
.loc 3 418 10
@%p5 bra BB44_21;
setp.lt.f64 %p6, %fd397, 0d406573FAE561F648;
@%p6 bra BB44_8;
mov.f64 %fd400, 0d7FF0000000000000;
bra.uni BB44_47;
BB44_8:
.loc 3 418 10
// inline asm
cvt.rn.f32.f64 %f1,%fd397;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd53,%f2;
// inline asm
neg.f64 %fd54, %fd397;
mov.f64 %fd55, 0d3FF0000000000000;
.loc 3 418 10
fma.rn.f64 %fd56, %fd54, %fd53, %fd55;
fma.rn.f64 %fd57, %fd56, %fd56, %fd56;
fma.rn.f64 %fd58, %fd57, %fd53, %fd53;
mov.f64 %fd59, 0d3F4B8239C670E690;
mov.f64 %fd60, 0d0000000000000000;
.loc 3 418 10
fma.rn.f64 %fd61, %fd60, %fd58, %fd59;
mov.f64 %fd62, 0dBF0B1D75D3346711;
.loc 3 418 10
fma.rn.f64 %fd63, %fd61, %fd58, %fd62;
mov.f64 %fd64, 0dBF436773BDB97B48;
.loc 3 418 10
fma.rn.f64 %fd65, %fd63, %fd58, %fd64;
mov.f64 %fd66, 0d3F1247604839C038;
.loc 3 418 10
fma.rn.f64 %fd67, %fd65, %fd58, %fd66;
mov.f64 %fd68, 0d3F49B0FF6874F2C4;
.loc 3 418 10
fma.rn.f64 %fd69, %fd67, %fd58, %fd68;
mov.f64 %fd70, 0dBF2E13CE465FA859;
.loc 3 418 10
fma.rn.f64 %fd71, %fd69, %fd58, %fd70;
mov.f64 %fd72, 0dBF65F7268EDAB4C8;
.loc 3 418 10
fma.rn.f64 %fd73, %fd71, %fd58, %fd72;
mov.f64 %fd74, 0d3F6C71C71C71C71C;
.loc 3 418 10
fma.rn.f64 %fd75, %fd73, %fd58, %fd74;
mov.f64 %fd76, 0d3FB5555555555555;
.loc 3 418 10
fma.rn.f64 %fd77, %fd75, %fd58, %fd76;
fma.rn.f64 %fd3, %fd77, %fd58, %fd55;
add.f64 %fd4, %fd397, 0dBFE0000000000000;
{
.reg .b32 %temp;
mov.b64 {%r85, %temp}, %fd397;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r86}, %fd397;
}
shr.u32 %r47, %r86, 20;
and.b32 %r87, %r47, 2047;
setp.ne.s32 %p7, %r87, 0;
@%p7 bra BB44_10;
mul.f64 %fd78, %fd397, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r86}, %fd78;
}
{
.reg .b32 %temp;
mov.b64 {%r85, %temp}, %fd78;
}
shr.u32 %r48, %r86, 20;
and.b32 %r49, %r48, 2047;
add.s32 %r87, %r49, -54;
BB44_10:
add.s32 %r88, %r87, -1023;
and.b32 %r50, %r86, -2146435073;
or.b32 %r51, %r50, 1072693248;
mov.b64 %fd379, {%r85, %r51};
setp.lt.u32 %p8, %r51, 1073127583;
@%p8 bra BB44_12;
{
.reg .b32 %temp;
mov.b64 {%r52, %temp}, %fd379;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd379;
}
add.s32 %r54, %r53, -1048576;
mov.b64 %fd379, {%r52, %r54};
add.s32 %r88, %r87, -1022;
BB44_12:
add.f64 %fd79, %fd379, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f5,%fd79;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd80,%f6;
// inline asm
neg.f64 %fd82, %fd79;
fma.rn.f64 %fd83, %fd82, %fd80, %fd55;
fma.rn.f64 %fd84, %fd83, %fd83, %fd83;
fma.rn.f64 %fd85, %fd84, %fd80, %fd80;
add.f64 %fd86, %fd379, 0dBFF0000000000000;
mul.f64 %fd87, %fd86, %fd85;
fma.rn.f64 %fd88, %fd86, %fd85, %fd87;
mul.f64 %fd89, %fd88, %fd88;
mov.f64 %fd90, 0d3ED0F5D241AD3B5A;
mov.f64 %fd91, 0d3EB0F5FF7D2CAFE2;
.loc 3 418 10
fma.rn.f64 %fd92, %fd91, %fd89, %fd90;
mov.f64 %fd93, 0d3EF3B20A75488A3F;
.loc 3 418 10
fma.rn.f64 %fd94, %fd92, %fd89, %fd93;
mov.f64 %fd95, 0d3F1745CDE4FAECD5;
.loc 3 418 10
fma.rn.f64 %fd96, %fd94, %fd89, %fd95;
mov.f64 %fd97, 0d3F3C71C7258A578B;
.loc 3 418 10
fma.rn.f64 %fd98, %fd96, %fd89, %fd97;
mov.f64 %fd99, 0d3F6249249242B910;
.loc 3 418 10
fma.rn.f64 %fd100, %fd98, %fd89, %fd99;
mov.f64 %fd101, 0d3F89999999999DFB;
.loc 3 418 10
fma.rn.f64 %fd102, %fd100, %fd89, %fd101;
sub.f64 %fd103, %fd86, %fd88;
add.f64 %fd104, %fd103, %fd103;
neg.f64 %fd105, %fd88;
fma.rn.f64 %fd106, %fd105, %fd86, %fd104;
mul.f64 %fd107, %fd85, %fd106;
fma.rn.f64 %fd108, %fd102, %fd89, 0d3FB5555555555555;
sub.f64 %fd110, %fd76, %fd108;
fma.rn.f64 %fd111, %fd102, %fd89, %fd110;
add.f64 %fd112, %fd111, 0d0000000000000000;
add.f64 %fd113, %fd112, 0dBC46A4CB00B9E7B0;
add.f64 %fd114, %fd108, %fd113;
sub.f64 %fd115, %fd108, %fd114;
add.f64 %fd116, %fd115, %fd113;
mul.rn.f64 %fd117, %fd114, %fd88;
neg.f64 %fd118, %fd117;
fma.rn.f64 %fd119, %fd114, %fd88, %fd118;
fma.rn.f64 %fd120, %fd114, %fd107, %fd119;
fma.rn.f64 %fd121, %fd116, %fd88, %fd120;
add.f64 %fd122, %fd117, %fd121;
sub.f64 %fd123, %fd117, %fd122;
add.f64 %fd124, %fd123, %fd121;
mul.rn.f64 %fd125, %fd122, %fd88;
neg.f64 %fd126, %fd125;
fma.rn.f64 %fd127, %fd122, %fd88, %fd126;
fma.rn.f64 %fd128, %fd122, %fd107, %fd127;
fma.rn.f64 %fd129, %fd124, %fd88, %fd128;
add.f64 %fd130, %fd125, %fd129;
sub.f64 %fd131, %fd125, %fd130;
add.f64 %fd132, %fd131, %fd129;
mul.rn.f64 %fd133, %fd130, %fd88;
neg.f64 %fd134, %fd133;
fma.rn.f64 %fd135, %fd130, %fd88, %fd134;
fma.rn.f64 %fd136, %fd130, %fd107, %fd135;
fma.rn.f64 %fd137, %fd132, %fd88, %fd136;
add.f64 %fd138, %fd133, %fd137;
sub.f64 %fd139, %fd133, %fd138;
add.f64 %fd140, %fd139, %fd137;
add.f64 %fd141, %fd88, %fd138;
sub.f64 %fd142, %fd88, %fd141;
add.f64 %fd143, %fd142, %fd138;
add.f64 %fd144, %fd143, %fd140;
add.f64 %fd145, %fd144, %fd107;
add.f64 %fd146, %fd141, %fd145;
sub.f64 %fd147, %fd141, %fd146;
add.f64 %fd148, %fd147, %fd145;
cvt.rn.f64.s32 %fd149, %r88;
mov.f64 %fd150, 0d3FE62E42FEFA3000;
.loc 3 418 10
mul.rn.f64 %fd151, %fd149, %fd150;
mov.f64 %fd152, 0d3D53DE6AF278ECE6;
.loc 3 418 10
mul.rn.f64 %fd153, %fd149, %fd152;
add.f64 %fd154, %fd151, %fd146;
sub.f64 %fd155, %fd151, %fd154;
add.f64 %fd156, %fd155, %fd146;
add.f64 %fd157, %fd156, %fd148;
add.f64 %fd158, %fd157, %fd153;
add.f64 %fd159, %fd154, %fd158;
sub.f64 %fd160, %fd154, %fd159;
add.f64 %fd161, %fd160, %fd158;
mul.rn.f64 %fd162, %fd159, %fd4;
neg.f64 %fd163, %fd162;
fma.rn.f64 %fd164, %fd159, %fd4, %fd163;
fma.rn.f64 %fd165, %fd161, %fd4, %fd164;
add.f64 %fd166, %fd162, %fd165;
sub.f64 %fd167, %fd162, %fd166;
add.f64 %fd168, %fd167, %fd165;
sub.f64 %fd169, %fd166, %fd397;
sub.f64 %fd170, %fd166, %fd169;
sub.f64 %fd171, %fd170, %fd397;
add.f64 %fd172, %fd171, 0d0000000000000000;
add.f64 %fd173, %fd172, %fd168;
add.f64 %fd8, %fd169, %fd173;
sub.f64 %fd174, %fd169, %fd8;
add.f64 %fd9, %fd174, %fd173;
{
.reg .b32 %temp;
mov.b64 {%temp, %r18}, %fd8;
}
setp.lt.u32 %p9, %r18, 1082535491;
setp.lt.s32 %p10, %r18, -1064875759;
or.pred %p11, %p9, %p10;
@%p11 bra BB44_14;
setp.lt.s32 %p12, %r18, 0;
selp.f64 %fd175, 0d0000000000000000, 0d7FF0000000000000, %p12;
abs.f64 %fd176, %fd8;
setp.gtu.f64 %p13, %fd176, 0d7FF0000000000000;
add.f64 %fd177, %fd8, %fd8;
selp.f64 %fd381, %fd177, %fd175, %p13;
bra.uni BB44_18;
BB44_14:
mov.f64 %fd378, 0d3FF0000000000000;
.loc 3 418 10
mul.f64 %fd178, %fd8, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd179, %fd178;
cvt.rzi.s32.f64 %r19, %fd179;
mov.f64 %fd180, 0dBFE62E42FEFA39EF;
.loc 3 418 10
fma.rn.f64 %fd181, %fd179, %fd180, %fd8;
mov.f64 %fd182, 0dBC7ABC9E3B39803F;
.loc 3 418 10
fma.rn.f64 %fd183, %fd179, %fd182, %fd181;
mov.f64 %fd184, 0d3E928A27E30F5561;
mov.f64 %fd185, 0d3E5AE6449C0686C0;
.loc 3 418 10
fma.rn.f64 %fd186, %fd185, %fd183, %fd184;
mov.f64 %fd187, 0d3EC71DE8E6486D6B;
.loc 3 418 10
fma.rn.f64 %fd188, %fd186, %fd183, %fd187;
mov.f64 %fd189, 0d3EFA019A6B2464C5;
.loc 3 418 10
fma.rn.f64 %fd190, %fd188, %fd183, %fd189;
mov.f64 %fd191, 0d3F2A01A0171064A5;
.loc 3 418 10
fma.rn.f64 %fd192, %fd190, %fd183, %fd191;
mov.f64 %fd193, 0d3F56C16C17F29C8D;
.loc 3 418 10
fma.rn.f64 %fd194, %fd192, %fd183, %fd193;
mov.f64 %fd195, 0d3F8111111111A24E;
.loc 3 418 10
fma.rn.f64 %fd196, %fd194, %fd183, %fd195;
mov.f64 %fd197, 0d3FA555555555211D;
.loc 3 418 10
fma.rn.f64 %fd198, %fd196, %fd183, %fd197;
mov.f64 %fd199, 0d3FC5555555555530;
.loc 3 418 10
fma.rn.f64 %fd200, %fd198, %fd183, %fd199;
mov.f64 %fd201, 0d3FE0000000000005;
.loc 3 418 10
fma.rn.f64 %fd202, %fd200, %fd183, %fd201;
fma.rn.f64 %fd204, %fd202, %fd183, %fd378;
fma.rn.f64 %fd380, %fd204, %fd183, %fd378;
abs.s32 %r55, %r19;
setp.lt.s32 %p14, %r55, 1023;
@%p14 bra BB44_16;
add.s32 %r56, %r19, 2046;
shl.b32 %r57, %r56, 19;
and.b32 %r58, %r57, -1048576;
shl.b32 %r59, %r56, 20;
sub.s32 %r89, %r59, %r58;
mov.u32 %r60, 0;
.loc 3 418 10
mov.b64 %fd205, {%r60, %r58};
mul.f64 %fd380, %fd380, %fd205;
bra.uni BB44_17;
BB44_16:
.loc 3 418 10
shl.b32 %r61, %r19, 20;
add.s32 %r89, %r61, 1072693248;
BB44_17:
mov.u32 %r62, 0;
.loc 3 418 10
mov.b64 %fd206, {%r62, %r89};
mul.f64 %fd381, %fd380, %fd206;
BB44_18:
abs.f64 %fd207, %fd381;
setp.eq.f64 %p15, %fd207, 0d7FF0000000000000;
@%p15 bra BB44_20;
fma.rn.f64 %fd381, %fd381, %fd9, %fd381;
BB44_20:
mul.f64 %fd208, %fd381, 0dBCAA6A0D6F814637;
mov.f64 %fd209, 0d40040D931FF62706;
.loc 3 418 10
fma.rn.f64 %fd210, %fd381, %fd209, %fd208;
mul.f64 %fd400, %fd210, %fd3;
bra.uni BB44_47;
BB44_21:
.loc 3 418 10
cvt.rzi.f64.f64 %fd211, %fd397;
setp.neu.f64 %p16, %fd397, %fd211;
@%p16 bra BB44_23;
mov.f64 %fd400, 0dFFF8000000000000;
bra.uni BB44_47;
BB44_23:
.loc 3 418 10
setp.lt.f64 %p17, %fd397, 0dC067200000000000;
@%p17 bra BB44_34;
{
.reg .b32 %temp;
mov.b64 {%r63, %temp}, %fd397;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r64}, %fd397;
}
add.s32 %r65, %r64, 1048576;
mov.b64 %fd212, {%r63, %r65};
cvt.rni.f64.f64 %fd213, %fd212;
cvt.rzi.s64.f64 %rd6, %fd213;
cvt.u32.u64 %r23, %rd6;
neg.f64 %fd214, %fd213;
mov.f64 %fd215, 0d3FE0000000000000;
.loc 3 418 10
fma.rn.f64 %fd216, %fd214, %fd215, %fd397;
mul.f64 %fd217, %fd216, 0d3CA1A62633145C07;
mov.f64 %fd218, 0d400921FB54442D18;
.loc 3 418 10
fma.rn.f64 %fd219, %fd216, %fd218, %fd217;
shl.b32 %r66, %r23, 3;
and.b32 %r67, %r66, 8;
mul.rn.f64 %fd19, %fd219, %fd219;
and.b64 %rd7, %rd6, 1;
setp.eq.b64 %p18, %rd7, 1;
not.pred %p19, %p18;
selp.f64 %fd220, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p19;
mul.wide.u32 %rd8, %r67, 8;
mov.u64 %rd9, __cudart_sin_cos_coeffs;
add.s64 %rd10, %rd9, %rd8;
.loc 3 418 10
ld.const.f64 %fd221, [%rd10+8];
fma.rn.f64 %fd222, %fd220, %fd19, %fd221;
ld.const.f64 %fd223, [%rd10+16];
fma.rn.f64 %fd224, %fd222, %fd19, %fd223;
ld.const.f64 %fd225, [%rd10+24];
fma.rn.f64 %fd226, %fd224, %fd19, %fd225;
ld.const.f64 %fd227, [%rd10+32];
fma.rn.f64 %fd228, %fd226, %fd19, %fd227;
ld.const.f64 %fd229, [%rd10+40];
fma.rn.f64 %fd230, %fd228, %fd19, %fd229;
ld.const.f64 %fd231, [%rd10+48];
fma.rn.f64 %fd20, %fd230, %fd19, %fd231;
fma.rn.f64 %fd382, %fd20, %fd219, %fd219;
@%p19 bra BB44_26;
mov.f64 %fd232, 0d3FF0000000000000;
.loc 3 418 10
fma.rn.f64 %fd382, %fd20, %fd19, %fd232;
BB44_26:
and.b32 %r68, %r23, 2;
setp.eq.s32 %p20, %r68, 0;
@%p20 bra BB44_28;
mov.f64 %fd233, 0d0000000000000000;
mov.f64 %fd234, 0dBFF0000000000000;
.loc 3 418 10
fma.rn.f64 %fd382, %fd382, %fd234, %fd233;
BB44_28:
mul.f64 %fd235, %fd397, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd236, %fd235;
cvt.rzi.s32.f64 %r24, %fd236;
mov.f64 %fd237, 0dBFE62E42FEFA39EF;
.loc 3 418 10
fma.rn.f64 %fd238, %fd236, %fd237, %fd397;
mov.f64 %fd239, 0dBC7ABC9E3B39803F;
.loc 3 418 10
fma.rn.f64 %fd240, %fd236, %fd239, %fd238;
mov.f64 %fd241, 0d3E928A27E30F5561;
mov.f64 %fd242, 0d3E5AE6449C0686C0;
.loc 3 418 10
fma.rn.f64 %fd243, %fd242, %fd240, %fd241;
mov.f64 %fd244, 0d3EC71DE8E6486D6B;
.loc 3 418 10
fma.rn.f64 %fd245, %fd243, %fd240, %fd244;
mov.f64 %fd246, 0d3EFA019A6B2464C5;
.loc 3 418 10
fma.rn.f64 %fd247, %fd245, %fd240, %fd246;
mov.f64 %fd248, 0d3F2A01A0171064A5;
.loc 3 418 10
fma.rn.f64 %fd249, %fd247, %fd240, %fd248;
mov.f64 %fd250, 0d3F56C16C17F29C8D;
.loc 3 418 10
fma.rn.f64 %fd251, %fd249, %fd240, %fd250;
mov.f64 %fd252, 0d3F8111111111A24E;
.loc 3 418 10
fma.rn.f64 %fd253, %fd251, %fd240, %fd252;
mov.f64 %fd254, 0d3FA555555555211D;
.loc 3 418 10
fma.rn.f64 %fd255, %fd253, %fd240, %fd254;
mov.f64 %fd256, 0d3FC5555555555530;
.loc 3 418 10
fma.rn.f64 %fd257, %fd255, %fd240, %fd256;
mov.f64 %fd258, 0d3FE0000000000005;
.loc 3 418 10
fma.rn.f64 %fd259, %fd257, %fd240, %fd258;
mov.f64 %fd260, 0d3FF0000000000000;
.loc 3 418 10
fma.rn.f64 %fd261, %fd259, %fd240, %fd260;
fma.rn.f64 %fd383, %fd261, %fd240, %fd260;
abs.s32 %r69, %r24;
setp.lt.s32 %p21, %r69, 1023;
@%p21 bra BB44_30;
add.s32 %r70, %r24, 2046;
shl.b32 %r71, %r70, 19;
and.b32 %r72, %r71, -1048576;
shl.b32 %r73, %r70, 20;
sub.s32 %r90, %r73, %r72;
mov.u32 %r74, 0;
.loc 3 418 10
mov.b64 %fd262, {%r74, %r72};
mul.f64 %fd383, %fd383, %fd262;
bra.uni BB44_31;
BB44_30:
.loc 3 418 10
shl.b32 %r75, %r24, 20;
add.s32 %r90, %r75, 1072693248;
BB44_31:
mov.u32 %r76, 0;
.loc 3 418 10
mov.b64 %fd263, {%r76, %r90};
mul.f64 %fd29, %fd383, %fd263;
add.f64 %fd384, %fd2, 0dBFE0000000000000;
setp.leu.f64 %p22, %fd2, 0d4061800000000000;
@%p22 bra BB44_33;
{
.reg .b32 %temp;
mov.b64 {%r77, %temp}, %fd384;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r78}, %fd384;
}
add.s32 %r79, %r78, -1048576;
mov.b64 %fd384, {%r77, %r79};
BB44_33:
// Callseq Start 7
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd2;
.param .b64 param1;
st.param.f64 [param1+0], %fd384;
.param .b64 retval0;
.loc 3 418 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd266, [retval0+0];
}
// Callseq End 7
mul.f64 %fd267, %fd29, %fd266;
setp.gt.f64 %p23, %fd2, 0d4061800000000000;
.loc 3 418 10
selp.f64 %fd268, %fd267, %fd29, %p23;
// inline asm
cvt.rn.f32.f64 %f9,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd265,%f10;
// inline asm
neg.f64 %fd269, %fd2;
fma.rn.f64 %fd271, %fd269, %fd265, %fd260;
fma.rn.f64 %fd272, %fd271, %fd271, %fd271;
fma.rn.f64 %fd273, %fd272, %fd265, %fd265;
mov.f64 %fd274, 0d3F4B8239C670E690;
mov.f64 %fd275, 0d0000000000000000;
.loc 3 418 10
fma.rn.f64 %fd276, %fd275, %fd273, %fd274;
mov.f64 %fd277, 0dBF0B1D75D3346711;
.loc 3 418 10
fma.rn.f64 %fd278, %fd276, %fd273, %fd277;
mov.f64 %fd279, 0dBF436773BDB97B48;
.loc 3 418 10
fma.rn.f64 %fd280, %fd278, %fd273, %fd279;
mov.f64 %fd281, 0d3F1247604839C038;
.loc 3 418 10
fma.rn.f64 %fd282, %fd280, %fd273, %fd281;
mov.f64 %fd283, 0d3F49B0FF6874F2C4;
.loc 3 418 10
fma.rn.f64 %fd284, %fd282, %fd273, %fd283;
mov.f64 %fd285, 0dBF2E13CE465FA859;
.loc 3 418 10
fma.rn.f64 %fd286, %fd284, %fd273, %fd285;
mov.f64 %fd287, 0dBF65F7268EDAB4C8;
.loc 3 418 10
fma.rn.f64 %fd288, %fd286, %fd273, %fd287;
mov.f64 %fd289, 0d3F6C71C71C71C71C;
.loc 3 418 10
fma.rn.f64 %fd290, %fd288, %fd273, %fd289;
mov.f64 %fd291, 0d3FB5555555555555;
.loc 3 418 10
fma.rn.f64 %fd292, %fd290, %fd273, %fd291;
fma.rn.f64 %fd293, %fd292, %fd273, %fd260;
mul.f64 %fd294, %fd268, %fd293;
mul.f64 %fd295, %fd294, %fd2;
mul.f64 %fd296, %fd295, %fd382;
rcp.rn.f64 %fd297, %fd296;
mul.f64 %fd298, %fd297, 0dBC9A6A0D6F814637;
mov.f64 %fd299, 0d3FF40D931FF62706;
.loc 3 418 10
fma.rn.f64 %fd300, %fd297, %fd299, %fd298;
div.rn.f64 %fd400, %fd300, %fd266;
bra.uni BB44_47;
BB44_34:
.loc 3 418 10
cvt.rmi.f64.f64 %fd301, %fd397;
mul.f64 %fd302, %fd301, 0d3FE0000000000000;
cvt.rmi.f64.f64 %fd303, %fd302;
fma.rn.f64 %fd304, %fd303, 0dC000000000000000, %fd301;
setp.eq.f64 %p24, %fd304, 0d3FF0000000000000;
selp.f64 %fd400, 0d8000000000000000, 0d0000000000000000, %p24;
bra.uni BB44_47;
BB44_35:
setp.ltu.f64 %p25, %fd397, 0d0000000000000000;
.loc 3 418 10
@%p25 bra BB44_41;
mov.f64 %fd385, 0d3FF0000000000000;
.loc 3 418 10
setp.gt.f64 %p26, %fd397, 0d3FF8000000000000;
@%p26 bra BB44_38;
mov.f64 %fd398, %fd397;
bra.uni BB44_40;
BB44_38:
mov.f64 %fd399, %fd397;
BB44_39:
.loc 3 418 10
neg.f64 %fd308, %fd385;
fma.rn.f64 %fd385, %fd385, %fd399, %fd308;
add.f64 %fd399, %fd399, 0dBFF0000000000000;
setp.gt.f64 %p27, %fd399, 0d3FF8000000000000;
mov.f64 %fd398, %fd399;
@%p27 bra BB44_39;
BB44_40:
add.f64 %fd309, %fd398, 0dBFF0000000000000;
setp.ltu.f64 %p28, %fd397, 0d3FE0000000000000;
selp.f64 %fd310, %fd398, %fd309, %p28;
mov.f64 %fd311, 0dBE8B338C457183B6;
mov.f64 %fd312, 0dBDFE6BDF8CC487CD;
.loc 3 418 10
fma.rn.f64 %fd313, %fd312, %fd310, %fd311;
mov.f64 %fd314, 0d3EB31831766A0388;
.loc 3 418 10
fma.rn.f64 %fd315, %fd313, %fd310, %fd314;
mov.f64 %fd316, 0dBEB4FC07FC9F1563;
.loc 3 418 10
fma.rn.f64 %fd317, %fd315, %fd310, %fd316;
mov.f64 %fd318, 0dBEF51D59DCE6A679;
.loc 3 418 10
fma.rn.f64 %fd319, %fd317, %fd310, %fd318;
mov.f64 %fd320, 0d3F20C8A6351CB1F9;
.loc 3 418 10
fma.rn.f64 %fd321, %fd319, %fd310, %fd320;
mov.f64 %fd322, 0dBF2C364D9E00D4CA;
.loc 3 418 10
fma.rn.f64 %fd323, %fd321, %fd310, %fd322;
mov.f64 %fd324, 0dBF5317112046830B;
.loc 3 418 10
fma.rn.f64 %fd325, %fd323, %fd310, %fd324;
mov.f64 %fd326, 0d3F7D919C50FF9416;
.loc 3 418 10
fma.rn.f64 %fd327, %fd325, %fd310, %fd326;
mov.f64 %fd328, 0dBF83B4AF28728BB0;
.loc 3 418 10
fma.rn.f64 %fd329, %fd327, %fd310, %fd328;
mov.f64 %fd330, 0dBFA59AF103C171DC;
.loc 3 418 10
fma.rn.f64 %fd331, %fd329, %fd310, %fd330;
mov.f64 %fd332, 0d3FC5512320B45D97;
.loc 3 418 10
fma.rn.f64 %fd333, %fd331, %fd310, %fd332;
mov.f64 %fd334, 0dBFA5815E8FA27607;
.loc 3 418 10
fma.rn.f64 %fd335, %fd333, %fd310, %fd334;
mov.f64 %fd336, 0dBFE4FCF4026AFA4B;
.loc 3 418 10
fma.rn.f64 %fd337, %fd335, %fd310, %fd336;
mov.f64 %fd338, 0d3FE2788CFC6FB619;
.loc 3 418 10
fma.rn.f64 %fd339, %fd337, %fd310, %fd338;
mov.f64 %fd340, 0d3FF0000000000000;
.loc 3 418 10
fma.rn.f64 %fd341, %fd339, %fd310, %fd340;
mul.f64 %fd342, %fd341, %fd397;
setp.lt.f64 %p29, %fd397, 0d3FE0000000000000;
selp.f64 %fd343, %fd342, %fd341, %p29;
div.rn.f64 %fd400, %fd385, %fd343;
bra.uni BB44_47;
BB44_41:
.loc 3 418 10
cvt.rzi.f64.f64 %fd344, %fd397;
setp.neu.f64 %p30, %fd397, %fd344;
@%p30 bra BB44_43;
mov.f64 %fd400, 0dFFF8000000000000;
bra.uni BB44_47;
BB44_43:
.loc 3 418 10
setp.geu.f64 %p31, %fd397, 0dBFE0000000000000;
mov.f64 %fd394, %fd397;
mov.f64 %fd395, %fd397;
mov.f64 %fd396, %fd397;
@%p31 bra BB44_45;
BB44_44:
fma.rn.f64 %fd397, %fd397, %fd396, %fd397;
add.f64 %fd396, %fd396, 0d3FF0000000000000;
setp.lt.f64 %p32, %fd396, 0dBFE0000000000000;
mov.f64 %fd395, %fd397;
mov.f64 %fd394, %fd396;
@%p32 bra BB44_44;
BB44_45:
mov.f64 %fd345, 0dBE8B338C457183B6;
mov.f64 %fd346, 0dBDFE6BDF8CC487CD;
.loc 3 418 10
fma.rn.f64 %fd347, %fd346, %fd394, %fd345;
mov.f64 %fd348, 0d3EB31831766A0388;
.loc 3 418 10
fma.rn.f64 %fd349, %fd347, %fd394, %fd348;
mov.f64 %fd350, 0dBEB4FC07FC9F1563;
.loc 3 418 10
fma.rn.f64 %fd351, %fd349, %fd394, %fd350;
mov.f64 %fd352, 0dBEF51D59DCE6A679;
.loc 3 418 10
fma.rn.f64 %fd353, %fd351, %fd394, %fd352;
mov.f64 %fd354, 0d3F20C8A6351CB1F9;
.loc 3 418 10
fma.rn.f64 %fd355, %fd353, %fd394, %fd354;
mov.f64 %fd356, 0dBF2C364D9E00D4CA;
.loc 3 418 10
fma.rn.f64 %fd357, %fd355, %fd394, %fd356;
mov.f64 %fd358, 0dBF5317112046830B;
.loc 3 418 10
fma.rn.f64 %fd359, %fd357, %fd394, %fd358;
mov.f64 %fd360, 0d3F7D919C50FF9416;
.loc 3 418 10
fma.rn.f64 %fd361, %fd359, %fd394, %fd360;
mov.f64 %fd362, 0dBF83B4AF28728BB0;
.loc 3 418 10
fma.rn.f64 %fd363, %fd361, %fd394, %fd362;
mov.f64 %fd364, 0dBFA59AF103C171DC;
.loc 3 418 10
fma.rn.f64 %fd365, %fd363, %fd394, %fd364;
mov.f64 %fd366, 0d3FC5512320B45D97;
.loc 3 418 10
fma.rn.f64 %fd367, %fd365, %fd394, %fd366;
mov.f64 %fd368, 0dBFA5815E8FA27607;
.loc 3 418 10
fma.rn.f64 %fd369, %fd367, %fd394, %fd368;
mov.f64 %fd370, 0dBFE4FCF4026AFA4B;
.loc 3 418 10
fma.rn.f64 %fd371, %fd369, %fd394, %fd370;
mov.f64 %fd372, 0d3FE2788CFC6FB619;
.loc 3 418 10
fma.rn.f64 %fd373, %fd371, %fd394, %fd372;
mov.f64 %fd374, 0d3FF0000000000000;
.loc 3 418 10
fma.rn.f64 %fd375, %fd373, %fd394, %fd374;
mul.f64 %fd376, %fd395, %fd375;
rcp.rn.f64 %fd400, %fd376;
bra.uni BB44_47;
BB44_46:
.loc 3 418 10
add.f64 %fd400, %fd397, %fd397;
BB44_47:
.loc 2 51 42
mad.lo.s32 %r80, %r83, %r32, %r84;
mul.wide.s32 %rd12, %r80, 8;
add.s64 %rd13, %rd11, %rd12;
st.global.f64 [%rd13], %fd400;
.loc 2 51 22
add.s32 %r84, %r2, %r84;
.loc 2 51 1
setp.lt.s32 %p33, %r84, %r30;
@%p33 bra BB44_3;
BB44_48:
.loc 2 51 22
mov.u32 %r81, %nctaid.x;
mad.lo.s32 %r83, %r81, %r34, %r83;
.loc 2 51 1
setp.lt.s32 %p34, %r83, %r31;
@%p34 bra BB44_2;
BB44_49:
.loc 2 51 2
ret;
}
.visible .entry map_trunc_double(
.param .u32 map_trunc_double_param_0,
.param .u32 map_trunc_double_param_1,
.param .u64 map_trunc_double_param_2,
.param .u32 map_trunc_double_param_3,
.param .u64 map_trunc_double_param_4,
.param .u32 map_trunc_double_param_5
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<3>;
ld.param.u32 %r12, [map_trunc_double_param_0];
ld.param.u32 %r13, [map_trunc_double_param_1];
ld.param.u64 %rd3, [map_trunc_double_param_2];
ld.param.u32 %r14, [map_trunc_double_param_3];
ld.param.u64 %rd4, [map_trunc_double_param_4];
ld.param.u32 %r15, [map_trunc_double_param_5];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 52 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 52 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB45_6;
.loc 2 52 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 52 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 52 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB45_2:
.loc 2 52 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB45_5;
.loc 2 52 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 52 42
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB45_4:
.loc 2 52 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 52 1
ld.global.f64 %fd1, [%rd6];
.loc 4 2800 10
cvt.rzi.f64.f64 %fd2, %fd1;
.loc 2 52 42
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 52 42
st.global.f64 [%rd8], %fd2;
.loc 2 52 22
add.s32 %r10, %r5, %r9;
.loc 2 52 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB45_4;
BB45_5:
.loc 2 52 22
add.s32 %r25, %r4, %r25;
.loc 2 52 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB45_2;
BB45_6:
.loc 2 52 2
ret;
}
.visible .entry map_y0_double(
.param .u32 map_y0_double_param_0,
.param .u32 map_y0_double_param_1,
.param .u64 map_y0_double_param_2,
.param .u32 map_y0_double_param_3,
.param .u64 map_y0_double_param_4,
.param .u32 map_y0_double_param_5
)
{
.local .align 4 .b8 __local_depot46[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<35>;
.reg .f32 %f<13>;
.reg .s32 %r<80>;
.reg .s64 %rd<31>;
.reg .f64 %fd<549>;
mov.u64 %SPL, __local_depot46;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r31, [map_y0_double_param_0];
ld.param.u32 %r32, [map_y0_double_param_1];
ld.param.u64 %rd1, [map_y0_double_param_2];
ld.param.u32 %r33, [map_y0_double_param_3];
ld.param.u64 %rd2, [map_y0_double_param_4];
ld.param.u32 %r34, [map_y0_double_param_5];
.loc 2 53 1
mov.u32 %r35, %ntid.x;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %tid.x;
mad.lo.s32 %r70, %r35, %r36, %r37;
.loc 2 53 1
setp.ge.s32 %p1, %r70, %r32;
@%p1 bra BB46_56;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd28, %rd1;
BB46_2:
.loc 2 53 1
mov.u32 %r38, %ctaid.y;
mov.u32 %r39, %ntid.y;
mov.u32 %r40, %tid.y;
mad.lo.s32 %r71, %r39, %r38, %r40;
.loc 2 53 1
setp.ge.s32 %p2, %r71, %r31;
@%p2 bra BB46_55;
BB46_3:
.loc 2 53 1
mad.lo.s32 %r45, %r70, %r34, %r71;
mul.wide.s32 %rd4, %r45, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 3 343 10
abs.f64 %fd2, %fd1;
setp.gtu.f64 %p3, %fd2, 0d3FE97F4A8F9D3F28;
@%p3 bra BB46_35;
mul.f64 %fd62, %fd2, %fd2;
mov.f64 %fd63, 0dBD13098C51C18514;
mov.f64 %fd64, 0d3C8EFBD0A1B77C65;
.loc 3 343 10
fma.rn.f64 %fd65, %fd64, %fd62, %fd63;
mov.f64 %fd66, 0d3D923102D2F5F2F5;
.loc 3 343 10
fma.rn.f64 %fd67, %fd65, %fd62, %fd66;
mov.f64 %fd68, 0dBE0A5F2DEE7D526E;
.loc 3 343 10
fma.rn.f64 %fd69, %fd67, %fd62, %fd68;
mov.f64 %fd70, 0d3E7BB77E758B38AF;
.loc 3 343 10
fma.rn.f64 %fd71, %fd69, %fd62, %fd70;
mov.f64 %fd72, 0dBEE3D1A206EC4F36;
.loc 3 343 10
fma.rn.f64 %fd73, %fd71, %fd62, %fd72;
mov.f64 %fd74, 0d3F4183DCD3ED6294;
.loc 3 343 10
fma.rn.f64 %fd75, %fd73, %fd62, %fd74;
mov.f64 %fd76, 0dBF903921CF04F123;
.loc 3 343 10
fma.rn.f64 %fd77, %fd75, %fd62, %fd76;
mov.f64 %fd78, 0d3FC5DB69D7753176;
.loc 3 343 10
fma.rn.f64 %fd79, %fd77, %fd62, %fd78;
add.f64 %fd80, %fd62, 0dBFDBA96740000000;
add.f64 %fd81, %fd80, 0d3E15A30C80000000;
mul.f64 %fd3, %fd79, %fd81;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd2;
}
setp.lt.f64 %p4, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p5, %fd2, 0d0000000000000000;
and.pred %p6, %p5, %p4;
@%p6 bra BB46_10;
abs.f64 %fd538, %fd2;
setp.gtu.f64 %p7, %fd538, 0d7FF0000000000000;
@%p7 bra BB46_9;
setp.neu.f64 %p8, %fd2, 0d0000000000000000;
@%p8 bra BB46_8;
mov.f64 %fd539, 0dFFF0000000000000;
bra.uni BB46_16;
BB46_8:
.loc 3 343 10
setp.eq.f64 %p9, %fd2, 0d7FF0000000000000;
selp.f64 %fd539, %fd2, 0dFFF8000000000000, %p9;
bra.uni BB46_16;
BB46_9:
.loc 3 343 10
add.f64 %fd539, %fd2, %fd2;
bra.uni BB46_16;
BB46_10:
.loc 3 343 10
setp.lt.u32 %p10, %r72, 1048576;
@%p10 bra BB46_12;
mov.u32 %r74, -1023;
bra.uni BB46_13;
BB46_12:
.loc 3 343 10
mul.f64 %fd83, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd83;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd83;
}
mov.u32 %r74, -1077;
BB46_13:
.loc 3 343 10
shr.s32 %r48, %r72, 20;
add.s32 %r75, %r74, %r48;
and.b32 %r49, %r72, -2146435073;
or.b32 %r50, %r49, 1072693248;
mov.b64 %fd537, {%r73, %r50};
setp.lt.u32 %p11, %r50, 1073127583;
@%p11 bra BB46_15;
{
.reg .b32 %temp;
mov.b64 {%r51, %temp}, %fd537;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd537;
}
add.s32 %r53, %r52, -1048576;
mov.b64 %fd537, {%r51, %r53};
add.s32 %r75, %r75, 1;
BB46_15:
add.f64 %fd84, %fd537, 0d3FF0000000000000;
mov.f64 %fd86, 0d3FF0000000000000;
.loc 3 343 10
// inline asm
cvt.rn.f32.f64 %f1,%fd84;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd85,%f2;
// inline asm
neg.f64 %fd87, %fd84;
fma.rn.f64 %fd88, %fd87, %fd85, %fd86;
fma.rn.f64 %fd89, %fd88, %fd88, %fd88;
fma.rn.f64 %fd90, %fd89, %fd85, %fd85;
add.f64 %fd91, %fd537, 0dBFF0000000000000;
mul.f64 %fd92, %fd91, %fd90;
fma.rn.f64 %fd93, %fd91, %fd90, %fd92;
mul.f64 %fd94, %fd93, %fd93;
mov.f64 %fd95, 0d3ED0EE258B7A8B04;
mov.f64 %fd96, 0d3EB1380B3AE80F1E;
.loc 3 343 10
fma.rn.f64 %fd97, %fd96, %fd94, %fd95;
mov.f64 %fd98, 0d3EF3B2669F02676F;
.loc 3 343 10
fma.rn.f64 %fd99, %fd97, %fd94, %fd98;
mov.f64 %fd100, 0d3F1745CBA9AB0956;
.loc 3 343 10
fma.rn.f64 %fd101, %fd99, %fd94, %fd100;
mov.f64 %fd102, 0d3F3C71C72D1B5154;
.loc 3 343 10
fma.rn.f64 %fd103, %fd101, %fd94, %fd102;
mov.f64 %fd104, 0d3F624924923BE72D;
.loc 3 343 10
fma.rn.f64 %fd105, %fd103, %fd94, %fd104;
mov.f64 %fd106, 0d3F8999999999A3C4;
.loc 3 343 10
fma.rn.f64 %fd107, %fd105, %fd94, %fd106;
mov.f64 %fd108, 0d3FB5555555555554;
.loc 3 343 10
fma.rn.f64 %fd109, %fd107, %fd94, %fd108;
sub.f64 %fd110, %fd91, %fd93;
add.f64 %fd111, %fd110, %fd110;
neg.f64 %fd112, %fd93;
fma.rn.f64 %fd113, %fd112, %fd91, %fd111;
mul.f64 %fd114, %fd90, %fd113;
mul.f64 %fd115, %fd109, %fd94;
fma.rn.f64 %fd116, %fd115, %fd93, %fd114;
cvt.rn.f64.s32 %fd117, %r75;
mov.f64 %fd118, 0d3FE62E42FEFA39EF;
.loc 3 343 10
fma.rn.f64 %fd119, %fd117, %fd118, %fd93;
neg.s32 %r54, %r75;
cvt.rn.f64.s32 %fd120, %r54;
fma.rn.f64 %fd121, %fd120, %fd118, %fd119;
sub.f64 %fd122, %fd121, %fd93;
sub.f64 %fd123, %fd116, %fd122;
mov.f64 %fd124, 0d3C7ABC9E3B39803F;
.loc 3 343 10
fma.rn.f64 %fd125, %fd117, %fd124, %fd123;
add.f64 %fd539, %fd119, %fd125;
abs.f64 %fd538, %fd2;
BB46_16:
mul.f64 %fd14, %fd539, 0d3FE45F306DC9C883;
setp.gtu.f64 %p12, %fd538, 0d400FB319F277BBE5;
@%p12 bra BB46_18;
add.f64 %fd126, %fd538, 0dC0033D152E971B40;
add.f64 %fd127, %fd126, 0d3CA0F539D7DA258E;
mov.f64 %fd128, 0dBCFCF8F9A8C294BC;
mov.f64 %fd129, 0dBCC0D18564C48C61;
.loc 3 343 10
fma.rn.f64 %fd130, %fd129, %fd127, %fd128;
mov.f64 %fd131, 0d3D3FAB983CAE498B;
.loc 3 343 10
fma.rn.f64 %fd132, %fd130, %fd127, %fd131;
mov.f64 %fd133, 0d3D7CD7C018579B88;
.loc 3 343 10
fma.rn.f64 %fd134, %fd132, %fd127, %fd133;
mov.f64 %fd135, 0dBDBBDD2342D64FDD;
.loc 3 343 10
fma.rn.f64 %fd136, %fd134, %fd127, %fd135;
mov.f64 %fd137, 0dBDF5C2D9416B1E2B;
.loc 3 343 10
fma.rn.f64 %fd138, %fd136, %fd127, %fd137;
mov.f64 %fd139, 0d3E32951D73174DD5;
.loc 3 343 10
fma.rn.f64 %fd140, %fd138, %fd127, %fd139;
mov.f64 %fd141, 0d3E67FF99802CAEB5;
.loc 3 343 10
fma.rn.f64 %fd142, %fd140, %fd127, %fd141;
mov.f64 %fd143, 0dBEA1CCE305C4C9F7;
.loc 3 343 10
fma.rn.f64 %fd144, %fd142, %fd127, %fd143;
mov.f64 %fd145, 0dBED232C77E29E1BB;
.loc 3 343 10
fma.rn.f64 %fd146, %fd144, %fd127, %fd145;
mov.f64 %fd147, 0d3F06ED3B9F0EF757;
.loc 3 343 10
fma.rn.f64 %fd148, %fd146, %fd127, %fd147;
mov.f64 %fd149, 0d3F315382BA096A62;
.loc 3 343 10
fma.rn.f64 %fd150, %fd148, %fd127, %fd149;
mov.f64 %fd151, 0dBF61F992590D1AE4;
.loc 3 343 10
fma.rn.f64 %fd152, %fd150, %fd127, %fd151;
mov.f64 %fd153, 0dBF81BB1CBE1A465F;
.loc 3 343 10
fma.rn.f64 %fd154, %fd152, %fd127, %fd153;
mov.f64 %fd155, 0d3FACFAE864368D84;
.loc 3 343 10
fma.rn.f64 %fd156, %fd154, %fd127, %fd155;
mov.f64 %fd157, 0d3FBBA1DEEA0294A3;
.loc 3 343 10
fma.rn.f64 %fd158, %fd156, %fd127, %fd157;
mov.f64 %fd159, 0dBFE09CDB36551280;
.loc 3 343 10
fma.rn.f64 %fd160, %fd158, %fd127, %fd159;
mul.f64 %fd15, %fd160, %fd127;
fma.rn.f64 %fd548, %fd14, %fd15, %fd3;
bra.uni BB46_54;
BB46_18:
.loc 3 343 10
setp.gtu.f64 %p13, %fd538, 0d401C58FD1A62F5EC;
@%p13 bra BB46_20;
add.f64 %fd161, %fd538, 0dC016148F5B2C2E45;
add.f64 %fd162, %fd161, 0dBC975054CD60A517;
mov.f64 %fd163, 0d3CF83FD1F333EB61;
mov.f64 %fd164, 0d3CBCB0A8F126B343;
.loc 3 343 10
fma.rn.f64 %fd165, %fd164, %fd162, %fd163;
mov.f64 %fd166, 0dBD4100E33E3FB413;
.loc 3 343 10
fma.rn.f64 %fd167, %fd165, %fd162, %fd166;
mov.f64 %fd168, 0dBD7846076D004627;
.loc 3 343 10
fma.rn.f64 %fd169, %fd167, %fd162, %fd168;
mov.f64 %fd170, 0d3DBE2F1D4F90720D;
.loc 3 343 10
fma.rn.f64 %fd171, %fd169, %fd162, %fd170;
mov.f64 %fd172, 0d3DF1D03B1E4A119B;
.loc 3 343 10
fma.rn.f64 %fd173, %fd171, %fd162, %fd172;
mov.f64 %fd174, 0dBE341D72B1B3BCE9;
.loc 3 343 10
fma.rn.f64 %fd175, %fd173, %fd162, %fd174;
mov.f64 %fd176, 0dBE62DA37CE2A9EF8;
.loc 3 343 10
fma.rn.f64 %fd177, %fd175, %fd162, %fd176;
mov.f64 %fd178, 0d3EA32E6D9974F763;
.loc 3 343 10
fma.rn.f64 %fd179, %fd177, %fd162, %fd178;
mov.f64 %fd180, 0d3ECAD77D744A1879;
.loc 3 343 10
fma.rn.f64 %fd181, %fd179, %fd162, %fd180;
mov.f64 %fd182, 0dBF0863F481A37337;
.loc 3 343 10
fma.rn.f64 %fd183, %fd181, %fd162, %fd182;
mov.f64 %fd184, 0dBF26F641F418F0F4;
.loc 3 343 10
fma.rn.f64 %fd185, %fd183, %fd162, %fd184;
mov.f64 %fd186, 0d3F627E31FE9A969E;
.loc 3 343 10
fma.rn.f64 %fd187, %fd185, %fd162, %fd186;
mov.f64 %fd188, 0d3F72F7FFE9025628;
.loc 3 343 10
fma.rn.f64 %fd189, %fd187, %fd162, %fd188;
mov.f64 %fd190, 0dBFAB2150CB41E8BF;
.loc 3 343 10
fma.rn.f64 %fd191, %fd189, %fd162, %fd190;
mov.f64 %fd192, 0dBF9F8F72E7A848DE;
.loc 3 343 10
fma.rn.f64 %fd193, %fd191, %fd162, %fd192;
mov.f64 %fd194, 0d3FD5C6E60A097823;
.loc 3 343 10
fma.rn.f64 %fd195, %fd193, %fd162, %fd194;
mul.f64 %fd16, %fd195, %fd162;
fma.rn.f64 %fd548, %fd14, %fd16, %fd3;
bra.uni BB46_54;
BB46_20:
.loc 3 343 10
setp.gtu.f64 %p14, %fd538, 0d402471FCB6A7A8C0;
@%p14 bra BB46_22;
add.f64 %fd196, %fd538, 0dC0214EB56CCCDECA;
add.f64 %fd197, %fd196, 0d3CB51970714C7C25;
mov.f64 %fd198, 0dBCF4B3A71AAAC629;
mov.f64 %fd199, 0dBCBDB7FFCF659E24;
.loc 3 343 10
fma.rn.f64 %fd200, %fd199, %fd197, %fd198;
mov.f64 %fd201, 0d3D417EC150ECDCE7;
.loc 3 343 10
fma.rn.f64 %fd202, %fd200, %fd197, %fd201;
mov.f64 %fd203, 0d3D7438F5EA1D10B2;
.loc 3 343 10
fma.rn.f64 %fd204, %fd202, %fd197, %fd203;
mov.f64 %fd205, 0dBDBEDAE7EC2C9E87;
.loc 3 343 10
fma.rn.f64 %fd206, %fd204, %fd197, %fd205;
mov.f64 %fd207, 0dBDECADD2C4B91F58;
.loc 3 343 10
fma.rn.f64 %fd208, %fd206, %fd197, %fd207;
mov.f64 %fd209, 0d3E34582C8EE12204;
.loc 3 343 10
fma.rn.f64 %fd210, %fd208, %fd197, %fd209;
mov.f64 %fd211, 0d3E5CEDA451DD20F8;
.loc 3 343 10
fma.rn.f64 %fd212, %fd210, %fd197, %fd211;
mov.f64 %fd213, 0dBEA30E8CC3165E2F;
.loc 3 343 10
fma.rn.f64 %fd214, %fd212, %fd197, %fd213;
mov.f64 %fd215, 0dBEC3324842BB1A2E;
.loc 3 343 10
fma.rn.f64 %fd216, %fd214, %fd197, %fd215;
mov.f64 %fd217, 0d3F07800BC54FBDDB;
.loc 3 343 10
fma.rn.f64 %fd218, %fd216, %fd197, %fd217;
mov.f64 %fd219, 0d3F1D79605276949A;
.loc 3 343 10
fma.rn.f64 %fd220, %fd218, %fd197, %fd219;
mov.f64 %fd221, 0dBF60E0D60385A629;
.loc 3 343 10
fma.rn.f64 %fd222, %fd220, %fd197, %fd221;
mov.f64 %fd223, 0dBF648E63600D82F3;
.loc 3 343 10
fma.rn.f64 %fd224, %fd222, %fd197, %fd223;
mov.f64 %fd225, 0d3FA68B984EC6493A;
.loc 3 343 10
fma.rn.f64 %fd226, %fd224, %fd197, %fd225;
mov.f64 %fd227, 0d3F900F7FCF183E0B;
.loc 3 343 10
fma.rn.f64 %fd228, %fd226, %fd197, %fd227;
mov.f64 %fd229, 0dBFD15F7977A772D4;
.loc 3 343 10
fma.rn.f64 %fd230, %fd228, %fd197, %fd229;
mul.f64 %fd17, %fd230, %fd197;
fma.rn.f64 %fd548, %fd14, %fd17, %fd3;
bra.uni BB46_54;
BB46_22:
.loc 3 343 10
abs.f64 %fd231, %fd538;
setp.neu.f64 %p15, %fd231, 0d7FF0000000000000;
@%p15 bra BB46_24;
mov.f64 %fd232, 0d0000000000000000;
.loc 3 343 10
fma.rn.f64 %fd548, %fd14, %fd232, %fd3;
bra.uni BB46_54;
BB46_24:
add.u64 %rd6, %SP, 4;
.loc 3 343 10
// inline asm
cvt.rn.f32.f64 %f5,%fd538;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd234,%f6;
// inline asm
neg.f64 %fd235, %fd538;
mov.f64 %fd236, 0d3FF0000000000000;
.loc 3 343 10
fma.rn.f64 %fd237, %fd235, %fd234, %fd236;
fma.rn.f64 %fd238, %fd237, %fd237, %fd237;
fma.rn.f64 %fd239, %fd238, %fd234, %fd234;
mul.f64 %fd240, %fd239, %fd239;
mov.f64 %fd241, 0d409927467A655012;
mov.f64 %fd242, 0dC0D115CB8C11A9DC;
.loc 3 343 10
fma.rn.f64 %fd243, %fd242, %fd240, %fd241;
mov.f64 %fd244, 0dC05751787E247BD4;
.loc 3 343 10
fma.rn.f64 %fd245, %fd243, %fd240, %fd244;
mov.f64 %fd246, 0d401704C4E5FC36B2;
.loc 3 343 10
fma.rn.f64 %fd247, %fd245, %fd240, %fd246;
mov.f64 %fd248, 0dBFE15B747A2FD531;
.loc 3 343 10
fma.rn.f64 %fd249, %fd247, %fd240, %fd248;
mov.f64 %fd250, 0d3FBA7FEACF6CB79B;
.loc 3 343 10
fma.rn.f64 %fd251, %fd249, %fd240, %fd250;
mov.f64 %fd252, 0dBFAFFFFFEDDCF548;
.loc 3 343 10
fma.rn.f64 %fd253, %fd251, %fd240, %fd252;
mov.f64 %fd254, 0d3FEFFFFFFFFFC9E5;
.loc 3 343 10
fma.rn.f64 %fd255, %fd253, %fd240, %fd254;
mov.f64 %fd256, 0d410ECD4523B12B84;
mov.f64 %fd257, 0dC14602FE1C34685E;
.loc 3 343 10
fma.rn.f64 %fd258, %fd257, %fd240, %fd256;
mov.f64 %fd259, 0dC0C7A2FC1972F05A;
.loc 3 343 10
fma.rn.f64 %fd260, %fd258, %fd240, %fd259;
mov.f64 %fd261, 0d407EBA131F7E5BEB;
.loc 3 343 10
fma.rn.f64 %fd262, %fd260, %fd240, %fd261;
mov.f64 %fd263, 0dC0373B92E6E7CC7D;
.loc 3 343 10
fma.rn.f64 %fd264, %fd262, %fd240, %fd263;
mov.f64 %fd265, 0d3FFA31BEE63A2F08;
.loc 3 343 10
fma.rn.f64 %fd266, %fd264, %fd240, %fd265;
mov.f64 %fd267, 0dBFCAD320104D5D05;
.loc 3 343 10
fma.rn.f64 %fd268, %fd266, %fd240, %fd267;
mov.f64 %fd269, 0d3FB0AAAA9C76D07E;
.loc 3 343 10
fma.rn.f64 %fd270, %fd268, %fd240, %fd269;
mov.f64 %fd271, 0dBFBFFFFFFFFDACEC;
.loc 3 343 10
fma.rn.f64 %fd272, %fd270, %fd240, %fd271;
fma.rn.f64 %fd18, %fd272, %fd239, %fd538;
rsqrt.approx.f64 %fd273, %fd538;
mul.f64 %fd274, %fd273, 0d3FE9884533D43651;
mul.f64 %fd19, %fd274, %fd255;
mul.f64 %fd275, %fd18, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r76, %fd275;
cvta.to.local.u64 %rd7, %rd6;
.loc 3 343 10
st.local.u32 [%rd7], %r76;
cvt.rn.f64.s32 %fd276, %r76;
neg.f64 %fd277, %fd276;
mov.f64 %fd278, 0d3FF921FB54442D18;
.loc 3 343 10
fma.rn.f64 %fd279, %fd277, %fd278, %fd18;
mov.f64 %fd280, 0d3C91A62633145C00;
.loc 3 343 10
fma.rn.f64 %fd281, %fd277, %fd280, %fd279;
mov.f64 %fd282, 0d397B839A252049C0;
.loc 3 343 10
fma.rn.f64 %fd540, %fd277, %fd282, %fd281;
abs.f64 %fd283, %fd18;
setp.leu.f64 %p16, %fd283, 0d41E0000000000000;
@%p16 bra BB46_26;
// Callseq Start 8
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd18;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 3 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd540, [retval0+0];
}
// Callseq End 8
ld.local.u32 %r76, [%rd7];
BB46_26:
and.b32 %r55, %r76, 3;
cvt.rn.f64.s32 %fd284, %r55;
add.f64 %fd285, %fd540, 0dBFE921FB54442D18;
fma.rn.f64 %fd541, %fd284, 0d3FF921FB54442D18, %fd285;
abs.f64 %fd286, %fd541;
setp.neu.f64 %p17, %fd286, 0d7FF0000000000000;
@%p17 bra BB46_28;
mov.f64 %fd287, 0d0000000000000000;
.loc 3 343 10
mul.rn.f64 %fd541, %fd541, %fd287;
BB46_28:
add.u64 %rd10, %SP, 0;
.loc 3 343 10
mul.f64 %fd288, %fd541, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r77, %fd288;
cvta.to.local.u64 %rd11, %rd10;
.loc 3 343 10
st.local.u32 [%rd11], %r77;
cvt.rn.f64.s32 %fd289, %r77;
neg.f64 %fd290, %fd289;
fma.rn.f64 %fd292, %fd290, %fd278, %fd541;
fma.rn.f64 %fd294, %fd290, %fd280, %fd292;
fma.rn.f64 %fd542, %fd290, %fd282, %fd294;
abs.f64 %fd296, %fd541;
setp.leu.f64 %p18, %fd296, 0d41E0000000000000;
@%p18 bra BB46_30;
// Callseq Start 9
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd541;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 3 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd542, [retval0+0];
}
// Callseq End 9
ld.local.u32 %r77, [%rd11];
BB46_30:
add.s32 %r21, %r77, 1;
shl.b32 %r56, %r21, 3;
and.b32 %r57, %r56, 8;
and.b32 %r58, %r21, 1;
setp.eq.b32 %p19, %r58, 1;
not.pred %p20, %p19;
selp.f64 %fd297, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p20;
mul.wide.u32 %rd14, %r57, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 3 343 10
ld.const.f64 %fd298, [%rd16+8];
mul.rn.f64 %fd29, %fd542, %fd542;
fma.rn.f64 %fd299, %fd297, %fd29, %fd298;
ld.const.f64 %fd300, [%rd16+16];
fma.rn.f64 %fd301, %fd299, %fd29, %fd300;
ld.const.f64 %fd302, [%rd16+24];
fma.rn.f64 %fd303, %fd301, %fd29, %fd302;
ld.const.f64 %fd304, [%rd16+32];
fma.rn.f64 %fd305, %fd303, %fd29, %fd304;
ld.const.f64 %fd306, [%rd16+40];
fma.rn.f64 %fd307, %fd305, %fd29, %fd306;
ld.const.f64 %fd308, [%rd16+48];
fma.rn.f64 %fd30, %fd307, %fd29, %fd308;
fma.rn.f64 %fd543, %fd30, %fd542, %fd542;
@%p20 bra BB46_32;
fma.rn.f64 %fd543, %fd30, %fd29, %fd236;
BB46_32:
and.b32 %r59, %r21, 2;
setp.eq.s32 %p21, %r59, 0;
@%p21 bra BB46_34;
mov.f64 %fd310, 0d0000000000000000;
mov.f64 %fd311, 0dBFF0000000000000;
.loc 3 343 10
fma.rn.f64 %fd543, %fd543, %fd311, %fd310;
BB46_34:
mul.f64 %fd36, %fd19, %fd543;
fma.rn.f64 %fd548, %fd14, %fd36, %fd3;
bra.uni BB46_54;
BB46_35:
.loc 3 343 10
setp.gtu.f64 %p22, %fd2, 0d4000347C4AB37B18;
@%p22 bra BB46_37;
add.f64 %fd312, %fd2, 0dBFEC982EB8D417EA;
add.f64 %fd313, %fd312, 0dBC7EA9D270347F83;
mov.f64 %fd314, 0d3F3D054B05D3C52D;
mov.f64 %fd315, 0dBF01630132D75FC3;
.loc 3 343 10
fma.rn.f64 %fd316, %fd315, %fd313, %fd314;
mov.f64 %fd317, 0dBF66DAC0B314B2E5;
.loc 3 343 10
fma.rn.f64 %fd318, %fd316, %fd313, %fd317;
mov.f64 %fd319, 0d3F86A5D1DE76263F;
.loc 3 343 10
fma.rn.f64 %fd320, %fd318, %fd313, %fd319;
mov.f64 %fd321, 0dBF9FD16652824592;
.loc 3 343 10
fma.rn.f64 %fd322, %fd320, %fd313, %fd321;
mov.f64 %fd323, 0d3FB0F69A9CC79FBD;
.loc 3 343 10
fma.rn.f64 %fd324, %fd322, %fd313, %fd323;
mov.f64 %fd325, 0dBFBCCE40EF15583E;
.loc 3 343 10
fma.rn.f64 %fd326, %fd324, %fd313, %fd325;
mov.f64 %fd327, 0d3FC446B11780E4FC;
.loc 3 343 10
fma.rn.f64 %fd328, %fd326, %fd313, %fd327;
mov.f64 %fd329, 0dBFC89AE7E19621F7;
.loc 3 343 10
fma.rn.f64 %fd330, %fd328, %fd313, %fd329;
mov.f64 %fd331, 0d3FCACBA1B38EF7B8;
.loc 3 343 10
fma.rn.f64 %fd332, %fd330, %fd313, %fd331;
mov.f64 %fd333, 0dBFCB4166A03BBFA5;
.loc 3 343 10
fma.rn.f64 %fd334, %fd332, %fd313, %fd333;
mov.f64 %fd335, 0d3FCACCA4D5D4889A;
.loc 3 343 10
fma.rn.f64 %fd336, %fd334, %fd313, %fd335;
mov.f64 %fd337, 0dBFCA1455932B9392;
.loc 3 343 10
fma.rn.f64 %fd338, %fd336, %fd313, %fd337;
mov.f64 %fd339, 0d3FC96D8DB8D844EC;
.loc 3 343 10
fma.rn.f64 %fd340, %fd338, %fd313, %fd339;
mov.f64 %fd341, 0dBFC8F7FB77522EDF;
.loc 3 343 10
fma.rn.f64 %fd342, %fd340, %fd313, %fd341;
mov.f64 %fd343, 0d3FC8C0926ABC9AB0;
.loc 3 343 10
fma.rn.f64 %fd344, %fd342, %fd313, %fd343;
mov.f64 %fd345, 0dBFC8D35B8FEA468C;
.loc 3 343 10
fma.rn.f64 %fd346, %fd344, %fd313, %fd345;
mov.f64 %fd347, 0d3FC9424B8A0C8F94;
.loc 3 343 10
fma.rn.f64 %fd348, %fd346, %fd313, %fd347;
mov.f64 %fd349, 0dBFCA396A7F3403EF;
.loc 3 343 10
fma.rn.f64 %fd350, %fd348, %fd313, %fd349;
mov.f64 %fd351, 0d3FCC068086C37055;
.loc 3 343 10
fma.rn.f64 %fd352, %fd350, %fd313, %fd351;
mov.f64 %fd353, 0dBFCCF18E6A4C5C4E;
.loc 3 343 10
fma.rn.f64 %fd354, %fd352, %fd313, %fd353;
mov.f64 %fd355, 0d3FCC3B1338AF4239;
.loc 3 343 10
fma.rn.f64 %fd356, %fd354, %fd313, %fd355;
mov.f64 %fd357, 0dBFDF7E38A46D70DB;
.loc 3 343 10
fma.rn.f64 %fd358, %fd356, %fd313, %fd357;
mov.f64 %fd359, 0d3FEC24371844B88A;
.loc 3 343 10
fma.rn.f64 %fd360, %fd358, %fd313, %fd359;
mul.f64 %fd548, %fd360, %fd313;
bra.uni BB46_54;
BB46_37:
.loc 3 343 10
setp.gtu.f64 %p23, %fd2, 0d40161663B5D9A628;
@%p23 bra BB46_39;
add.f64 %fd361, %fd2, 0dC00FA9534D98569C;
add.f64 %fd362, %fd361, 0d3C9F06AE7804384E;
mov.f64 %fd363, 0dBCD2434958151AC7;
mov.f64 %fd364, 0dBCDAEA62AC8BDA68;
.loc 3 343 10
fma.rn.f64 %fd365, %fd364, %fd362, %fd363;
mov.f64 %fd366, 0d3D11C24A40D33FE1;
.loc 3 343 10
fma.rn.f64 %fd367, %fd365, %fd362, %fd366;
mov.f64 %fd368, 0d3D237CD62FA08CA4;
.loc 3 343 10
fma.rn.f64 %fd369, %fd367, %fd362, %fd368;
mov.f64 %fd370, 0dBD43902E0298C52A;
.loc 3 343 10
fma.rn.f64 %fd371, %fd369, %fd362, %fd370;
mov.f64 %fd372, 0dBD1DDAAD11CAB40F;
.loc 3 343 10
fma.rn.f64 %fd373, %fd371, %fd362, %fd372;
mov.f64 %fd374, 0dBD5209D9F06D7DE4;
.loc 3 343 10
fma.rn.f64 %fd375, %fd373, %fd362, %fd374;
mov.f64 %fd376, 0d3D8BB9F464468E1A;
.loc 3 343 10
fma.rn.f64 %fd377, %fd375, %fd362, %fd376;
mov.f64 %fd378, 0dBDA8F67B07D1B440;
.loc 3 343 10
fma.rn.f64 %fd379, %fd377, %fd362, %fd378;
mov.f64 %fd380, 0d3DC7C8D60F9EAECF;
.loc 3 343 10
fma.rn.f64 %fd381, %fd379, %fd362, %fd380;
mov.f64 %fd382, 0dBDE9703405B49A8D;
.loc 3 343 10
fma.rn.f64 %fd383, %fd381, %fd362, %fd382;
mov.f64 %fd384, 0d3E0A6B64E76417E4;
.loc 3 343 10
fma.rn.f64 %fd385, %fd383, %fd362, %fd384;
mov.f64 %fd386, 0dBE2F6B5AFB2F1359;
.loc 3 343 10
fma.rn.f64 %fd387, %fd385, %fd362, %fd386;
mov.f64 %fd388, 0d3E54526B71C21EC1;
.loc 3 343 10
fma.rn.f64 %fd389, %fd387, %fd362, %fd388;
mov.f64 %fd390, 0dBE5776DBCBBC8E1D;
.loc 3 343 10
fma.rn.f64 %fd391, %fd389, %fd362, %fd390;
mov.f64 %fd392, 0dBE93B211FC2DF90E;
.loc 3 343 10
fma.rn.f64 %fd393, %fd391, %fd362, %fd392;
mov.f64 %fd394, 0dBED486372E8562DC;
.loc 3 343 10
fma.rn.f64 %fd395, %fd393, %fd362, %fd394;
mov.f64 %fd396, 0d3F0AB2C1FBC3A254;
.loc 3 343 10
fma.rn.f64 %fd397, %fd395, %fd362, %fd396;
mov.f64 %fd398, 0d3F299827653353B8;
.loc 3 343 10
fma.rn.f64 %fd399, %fd397, %fd362, %fd398;
mov.f64 %fd400, 0dBF61E32BC4ED7084;
.loc 3 343 10
fma.rn.f64 %fd401, %fd399, %fd362, %fd400;
mov.f64 %fd402, 0dBF7C116FDC599A09;
.loc 3 343 10
fma.rn.f64 %fd403, %fd401, %fd362, %fd402;
mov.f64 %fd404, 0d3FADF6D59BF50C77;
.loc 3 343 10
fma.rn.f64 %fd405, %fd403, %fd362, %fd404;
mov.f64 %fd406, 0d3FAA09C92903680B;
.loc 3 343 10
fma.rn.f64 %fd407, %fd405, %fd362, %fd406;
mov.f64 %fd408, 0dBFD9C34256A12A0B;
.loc 3 343 10
fma.rn.f64 %fd409, %fd407, %fd362, %fd408;
mul.f64 %fd548, %fd409, %fd362;
bra.uni BB46_54;
BB46_39:
.loc 3 343 10
setp.gtu.f64 %p24, %fd2, 0d40214EF30C0C06ED;
@%p24 bra BB46_41;
add.f64 %fd410, %fd2, 0dC01C581DC4E72103;
add.f64 %fd411, %fd410, 0d3C99774A495F56CF;
mov.f64 %fd412, 0dBD3F443BB4F53D75;
mov.f64 %fd413, 0d3CF1CB3ABA718B8E;
.loc 3 343 10
fma.rn.f64 %fd414, %fd413, %fd411, %fd412;
mov.f64 %fd415, 0dBD770F737BD6A786;
.loc 3 343 10
fma.rn.f64 %fd416, %fd414, %fd411, %fd415;
mov.f64 %fd417, 0d3DBF0E9A20459E14;
.loc 3 343 10
fma.rn.f64 %fd418, %fd416, %fd411, %fd417;
mov.f64 %fd419, 0d3DEFA6B137D5E108;
.loc 3 343 10
fma.rn.f64 %fd420, %fd418, %fd411, %fd419;
mov.f64 %fd421, 0dBE344296729FB7FA;
.loc 3 343 10
fma.rn.f64 %fd422, %fd420, %fd411, %fd421;
mov.f64 %fd423, 0dBE60A2813A80DFAA;
.loc 3 343 10
fma.rn.f64 %fd424, %fd422, %fd411, %fd423;
mov.f64 %fd425, 0d3EA34AA737A83EB4;
.loc 3 343 10
fma.rn.f64 %fd426, %fd424, %fd411, %fd425;
mov.f64 %fd427, 0d3EC6A9227332D03C;
.loc 3 343 10
fma.rn.f64 %fd428, %fd426, %fd411, %fd427;
mov.f64 %fd429, 0dBF08177E4F93C81E;
.loc 3 343 10
fma.rn.f64 %fd430, %fd428, %fd411, %fd429;
mov.f64 %fd431, 0dBF226DD71E391775;
.loc 3 343 10
fma.rn.f64 %fd432, %fd430, %fd411, %fd431;
mov.f64 %fd433, 0d3F61D35E85FD7B22;
.loc 3 343 10
fma.rn.f64 %fd434, %fd432, %fd411, %fd433;
mov.f64 %fd435, 0d3F6B2F14A955285C;
.loc 3 343 10
fma.rn.f64 %fd436, %fd434, %fd411, %fd435;
mov.f64 %fd437, 0dBFA8969C64CBF388;
.loc 3 343 10
fma.rn.f64 %fd438, %fd436, %fd411, %fd437;
mov.f64 %fd439, 0dBF95AEF611FC4D5A;
.loc 3 343 10
fma.rn.f64 %fd440, %fd438, %fd411, %fd439;
mov.f64 %fd441, 0d3FD334CCA0697A5A;
.loc 3 343 10
fma.rn.f64 %fd442, %fd440, %fd411, %fd441;
mul.f64 %fd548, %fd442, %fd411;
bra.uni BB46_54;
BB46_41:
.loc 3 343 10
abs.f64 %fd443, %fd2;
setp.neu.f64 %p25, %fd443, 0d7FF0000000000000;
@%p25 bra BB46_43;
mov.f64 %fd548, 0d0000000000000000;
bra.uni BB46_54;
BB46_43:
add.u64 %rd17, %SP, 12;
.loc 3 343 10
// inline asm
cvt.rn.f32.f64 %f9,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd446,%f10;
// inline asm
neg.f64 %fd447, %fd2;
mov.f64 %fd448, 0d3FF0000000000000;
.loc 3 343 10
fma.rn.f64 %fd449, %fd447, %fd446, %fd448;
fma.rn.f64 %fd450, %fd449, %fd449, %fd449;
fma.rn.f64 %fd451, %fd450, %fd446, %fd446;
mul.f64 %fd452, %fd451, %fd451;
mov.f64 %fd453, 0d4093F56A049CDDE7;
mov.f64 %fd454, 0dC0C5E91E6AC3AD03;
.loc 3 343 10
fma.rn.f64 %fd455, %fd454, %fd452, %fd453;
mov.f64 %fd456, 0dC05572D39DFB8433;
.loc 3 343 10
fma.rn.f64 %fd457, %fd455, %fd452, %fd456;
mov.f64 %fd458, 0d4016A6041CAA59E5;
.loc 3 343 10
fma.rn.f64 %fd459, %fd457, %fd452, %fd458;
mov.f64 %fd460, 0dBFE155E3A0493880;
.loc 3 343 10
fma.rn.f64 %fd461, %fd459, %fd452, %fd460;
mov.f64 %fd462, 0d3FBA7FB92F417F7F;
.loc 3 343 10
fma.rn.f64 %fd463, %fd461, %fd452, %fd462;
mov.f64 %fd464, 0dBFAFFFFFB12E32F5;
.loc 3 343 10
fma.rn.f64 %fd465, %fd463, %fd452, %fd464;
mov.f64 %fd466, 0d3FEFFFFFFFFECED5;
.loc 3 343 10
fma.rn.f64 %fd467, %fd465, %fd452, %fd466;
mov.f64 %fd468, 0dC15709C79AAC5813;
mov.f64 %fd469, 0d418A86A64BE101DC;
.loc 3 343 10
fma.rn.f64 %fd470, %fd469, %fd452, %fd468;
mov.f64 %fd471, 0d41142A31C980A287;
.loc 3 343 10
fma.rn.f64 %fd472, %fd470, %fd452, %fd471;
mov.f64 %fd473, 0dC0C9CBE68930485D;
.loc 3 343 10
fma.rn.f64 %fd474, %fd472, %fd452, %fd473;
mov.f64 %fd475, 0d407F583E14E8A4E8;
.loc 3 343 10
fma.rn.f64 %fd476, %fd474, %fd452, %fd475;
mov.f64 %fd477, 0dC0374A629C650680;
.loc 3 343 10
fma.rn.f64 %fd478, %fd476, %fd452, %fd477;
mov.f64 %fd479, 0d3FFA32A7AF17FAE9;
.loc 3 343 10
fma.rn.f64 %fd480, %fd478, %fd452, %fd479;
mov.f64 %fd481, 0dBFCAD32497785CD6;
.loc 3 343 10
fma.rn.f64 %fd482, %fd480, %fd452, %fd481;
mov.f64 %fd483, 0d3FB0AAAA9FB75F7B;
.loc 3 343 10
fma.rn.f64 %fd484, %fd482, %fd452, %fd483;
mov.f64 %fd485, 0dBFBFFFFFFFFE320F;
.loc 3 343 10
fma.rn.f64 %fd486, %fd484, %fd452, %fd485;
fma.rn.f64 %fd42, %fd486, %fd451, %fd2;
rsqrt.approx.f64 %fd487, %fd2;
mul.f64 %fd488, %fd487, 0d3FE9884533D43651;
mul.f64 %fd43, %fd488, %fd467;
mul.f64 %fd489, %fd42, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r78, %fd489;
cvta.to.local.u64 %rd18, %rd17;
.loc 3 343 10
st.local.u32 [%rd18], %r78;
cvt.rn.f64.s32 %fd490, %r78;
neg.f64 %fd491, %fd490;
mov.f64 %fd492, 0d3FF921FB54442D18;
.loc 3 343 10
fma.rn.f64 %fd493, %fd491, %fd492, %fd42;
mov.f64 %fd494, 0d3C91A62633145C00;
.loc 3 343 10
fma.rn.f64 %fd495, %fd491, %fd494, %fd493;
mov.f64 %fd496, 0d397B839A252049C0;
.loc 3 343 10
fma.rn.f64 %fd544, %fd491, %fd496, %fd495;
abs.f64 %fd497, %fd42;
setp.leu.f64 %p26, %fd497, 0d41E0000000000000;
@%p26 bra BB46_45;
// Callseq Start 10
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd42;
.param .b64 param1;
st.param.b64 [param1+0], %rd17;
.param .b64 retval0;
.loc 3 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd544, [retval0+0];
}
// Callseq End 10
ld.local.u32 %r78, [%rd18];
BB46_45:
and.b32 %r60, %r78, 3;
cvt.rn.f64.s32 %fd498, %r60;
add.f64 %fd499, %fd544, 0dC002D97C7F3321D2;
fma.rn.f64 %fd545, %fd498, 0d3FF921FB54442D18, %fd499;
abs.f64 %fd500, %fd545;
setp.neu.f64 %p27, %fd500, 0d7FF0000000000000;
@%p27 bra BB46_47;
mov.f64 %fd501, 0d0000000000000000;
.loc 3 343 10
mul.rn.f64 %fd545, %fd545, %fd501;
BB46_47:
add.u64 %rd21, %SP, 8;
.loc 3 343 10
mul.f64 %fd502, %fd545, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r79, %fd502;
cvta.to.local.u64 %rd22, %rd21;
.loc 3 343 10
st.local.u32 [%rd22], %r79;
cvt.rn.f64.s32 %fd503, %r79;
neg.f64 %fd504, %fd503;
fma.rn.f64 %fd506, %fd504, %fd492, %fd545;
fma.rn.f64 %fd508, %fd504, %fd494, %fd506;
fma.rn.f64 %fd546, %fd504, %fd496, %fd508;
abs.f64 %fd510, %fd545;
setp.leu.f64 %p28, %fd510, 0d41E0000000000000;
@%p28 bra BB46_49;
// Callseq Start 11
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd545;
.param .b64 param1;
st.param.b64 [param1+0], %rd21;
.param .b64 retval0;
.loc 3 343 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd546, [retval0+0];
}
// Callseq End 11
ld.local.u32 %r79, [%rd22];
BB46_49:
add.s32 %r28, %r79, 1;
shl.b32 %r61, %r28, 3;
and.b32 %r62, %r61, 8;
and.b32 %r63, %r28, 1;
setp.eq.b32 %p29, %r63, 1;
not.pred %p30, %p29;
selp.f64 %fd511, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p30;
mul.wide.u32 %rd25, %r62, 8;
mov.u64 %rd26, __cudart_sin_cos_coeffs;
add.s64 %rd27, %rd26, %rd25;
.loc 3 343 10
ld.const.f64 %fd512, [%rd27+8];
mul.rn.f64 %fd53, %fd546, %fd546;
fma.rn.f64 %fd513, %fd511, %fd53, %fd512;
ld.const.f64 %fd514, [%rd27+16];
fma.rn.f64 %fd515, %fd513, %fd53, %fd514;
ld.const.f64 %fd516, [%rd27+24];
fma.rn.f64 %fd517, %fd515, %fd53, %fd516;
ld.const.f64 %fd518, [%rd27+32];
fma.rn.f64 %fd519, %fd517, %fd53, %fd518;
ld.const.f64 %fd520, [%rd27+40];
fma.rn.f64 %fd521, %fd519, %fd53, %fd520;
ld.const.f64 %fd522, [%rd27+48];
fma.rn.f64 %fd54, %fd521, %fd53, %fd522;
fma.rn.f64 %fd547, %fd54, %fd546, %fd546;
@%p30 bra BB46_51;
fma.rn.f64 %fd547, %fd54, %fd53, %fd448;
BB46_51:
and.b32 %r64, %r28, 2;
setp.eq.s32 %p31, %r64, 0;
@%p31 bra BB46_53;
mov.f64 %fd524, 0d0000000000000000;
mov.f64 %fd525, 0dBFF0000000000000;
.loc 3 343 10
fma.rn.f64 %fd547, %fd547, %fd525, %fd524;
BB46_53:
mul.f64 %fd548, %fd43, %fd547;
BB46_54:
setp.lt.f64 %p32, %fd1, 0d0000000000000000;
selp.f64 %fd526, 0dFFF8000000000000, %fd548, %p32;
.loc 2 53 42
mad.lo.s32 %r65, %r70, %r33, %r71;
mul.wide.s32 %rd29, %r65, 8;
add.s64 %rd30, %rd28, %rd29;
st.global.f64 [%rd30], %fd526;
.loc 2 53 22
mov.u32 %r67, %nctaid.y;
mad.lo.s32 %r71, %r67, %r39, %r71;
.loc 2 53 1
setp.lt.s32 %p33, %r71, %r31;
@%p33 bra BB46_3;
BB46_55:
.loc 2 53 22
mov.u32 %r68, %nctaid.x;
mad.lo.s32 %r70, %r68, %r35, %r70;
.loc 2 53 1
setp.lt.s32 %p34, %r70, %r32;
@%p34 bra BB46_2;
BB46_56:
.loc 2 53 2
ret;
}
.visible .entry map_y1_double(
.param .u32 map_y1_double_param_0,
.param .u32 map_y1_double_param_1,
.param .u64 map_y1_double_param_2,
.param .u32 map_y1_double_param_3,
.param .u64 map_y1_double_param_4,
.param .u32 map_y1_double_param_5
)
{
.local .align 4 .b8 __local_depot47[16];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<39>;
.reg .f32 %f<13>;
.reg .s32 %r<80>;
.reg .s64 %rd<31>;
.reg .f64 %fd<533>;
mov.u64 %SPL, __local_depot47;
cvta.local.u64 %SP, %SPL;
ld.param.u32 %r31, [map_y1_double_param_0];
ld.param.u32 %r32, [map_y1_double_param_1];
ld.param.u64 %rd1, [map_y1_double_param_2];
ld.param.u32 %r33, [map_y1_double_param_3];
ld.param.u64 %rd2, [map_y1_double_param_4];
ld.param.u32 %r34, [map_y1_double_param_5];
.loc 2 54 1
mov.u32 %r35, %ntid.x;
mov.u32 %r36, %ctaid.x;
mov.u32 %r37, %tid.x;
mad.lo.s32 %r70, %r35, %r36, %r37;
.loc 2 54 1
setp.ge.s32 %p1, %r70, %r32;
@%p1 bra BB47_61;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd28, %rd1;
BB47_2:
.loc 2 54 1
mov.u32 %r38, %ctaid.y;
mov.u32 %r39, %ntid.y;
mov.u32 %r40, %tid.y;
mad.lo.s32 %r71, %r39, %r38, %r40;
.loc 2 54 1
setp.ge.s32 %p2, %r71, %r31;
@%p2 bra BB47_60;
BB47_3:
.loc 2 54 1
mad.lo.s32 %r45, %r70, %r34, %r71;
mul.wide.s32 %rd4, %r45, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 3 348 10
abs.f64 %fd2, %fd1;
setp.lt.f64 %p3, %fd2, 0d000730D67819E8D2;
@%p3 bra BB47_56;
setp.gtu.f64 %p4, %fd2, 0d3FF4C6F208132576;
@%p4 bra BB47_37;
mul.f64 %fd64, %fd2, %fd2;
mov.f64 %fd65, 0dBDCF0B5B1FB7B95E;
mov.f64 %fd66, 0d3D5249F90687428C;
.loc 3 348 10
fma.rn.f64 %fd67, %fd66, %fd64, %fd65;
mov.f64 %fd68, 0d3E432E589311FA14;
.loc 3 348 10
fma.rn.f64 %fd69, %fd67, %fd64, %fd68;
mov.f64 %fd70, 0dBEB0A780AA4A92E9;
.loc 3 348 10
fma.rn.f64 %fd71, %fd69, %fd64, %fd70;
mov.f64 %fd72, 0d3F12C7DBFFCAEC2B;
.loc 3 348 10
fma.rn.f64 %fd73, %fd71, %fd64, %fd72;
mov.f64 %fd74, 0dBF6835B97894BA4A;
.loc 3 348 10
fma.rn.f64 %fd75, %fd73, %fd64, %fd74;
mov.f64 %fd76, 0d3FABD3975C75B4A3;
.loc 3 348 10
fma.rn.f64 %fd77, %fd75, %fd64, %fd76;
mov.f64 %fd78, 0dBFC91866143CBC8A;
.loc 3 348 10
fma.rn.f64 %fd79, %fd77, %fd64, %fd78;
mul.f64 %fd3, %fd79, %fd2;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd2;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd2;
}
setp.lt.f64 %p5, %fd2, 0d7FF0000000000000;
setp.gt.f64 %p6, %fd2, 0d0000000000000000;
and.pred %p7, %p6, %p5;
@%p7 bra BB47_11;
abs.f64 %fd521, %fd2;
setp.gtu.f64 %p8, %fd521, 0d7FF0000000000000;
@%p8 bra BB47_10;
setp.neu.f64 %p9, %fd2, 0d0000000000000000;
@%p9 bra BB47_9;
mov.f64 %fd522, 0dFFF0000000000000;
bra.uni BB47_17;
BB47_9:
.loc 3 348 10
setp.eq.f64 %p10, %fd2, 0d7FF0000000000000;
selp.f64 %fd522, %fd2, 0dFFF8000000000000, %p10;
bra.uni BB47_17;
BB47_10:
.loc 3 348 10
add.f64 %fd522, %fd2, %fd2;
bra.uni BB47_17;
BB47_11:
.loc 3 348 10
setp.lt.u32 %p11, %r72, 1048576;
@%p11 bra BB47_13;
mov.u32 %r74, -1023;
bra.uni BB47_14;
BB47_13:
.loc 3 348 10
mul.f64 %fd81, %fd2, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r72}, %fd81;
}
{
.reg .b32 %temp;
mov.b64 {%r73, %temp}, %fd81;
}
mov.u32 %r74, -1077;
BB47_14:
.loc 3 348 10
shr.s32 %r48, %r72, 20;
add.s32 %r75, %r74, %r48;
and.b32 %r49, %r72, -2146435073;
or.b32 %r50, %r49, 1072693248;
mov.b64 %fd520, {%r73, %r50};
setp.lt.u32 %p12, %r50, 1073127583;
@%p12 bra BB47_16;
{
.reg .b32 %temp;
mov.b64 {%r51, %temp}, %fd520;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd520;
}
add.s32 %r53, %r52, -1048576;
mov.b64 %fd520, {%r51, %r53};
add.s32 %r75, %r75, 1;
BB47_16:
add.f64 %fd82, %fd520, 0d3FF0000000000000;
mov.f64 %fd84, 0d3FF0000000000000;
.loc 3 348 10
// inline asm
cvt.rn.f32.f64 %f1,%fd82;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd83,%f2;
// inline asm
neg.f64 %fd85, %fd82;
fma.rn.f64 %fd86, %fd85, %fd83, %fd84;
fma.rn.f64 %fd87, %fd86, %fd86, %fd86;
fma.rn.f64 %fd88, %fd87, %fd83, %fd83;
add.f64 %fd89, %fd520, 0dBFF0000000000000;
mul.f64 %fd90, %fd89, %fd88;
fma.rn.f64 %fd91, %fd89, %fd88, %fd90;
mul.f64 %fd92, %fd91, %fd91;
mov.f64 %fd93, 0d3ED0EE258B7A8B04;
mov.f64 %fd94, 0d3EB1380B3AE80F1E;
.loc 3 348 10
fma.rn.f64 %fd95, %fd94, %fd92, %fd93;
mov.f64 %fd96, 0d3EF3B2669F02676F;
.loc 3 348 10
fma.rn.f64 %fd97, %fd95, %fd92, %fd96;
mov.f64 %fd98, 0d3F1745CBA9AB0956;
.loc 3 348 10
fma.rn.f64 %fd99, %fd97, %fd92, %fd98;
mov.f64 %fd100, 0d3F3C71C72D1B5154;
.loc 3 348 10
fma.rn.f64 %fd101, %fd99, %fd92, %fd100;
mov.f64 %fd102, 0d3F624924923BE72D;
.loc 3 348 10
fma.rn.f64 %fd103, %fd101, %fd92, %fd102;
mov.f64 %fd104, 0d3F8999999999A3C4;
.loc 3 348 10
fma.rn.f64 %fd105, %fd103, %fd92, %fd104;
mov.f64 %fd106, 0d3FB5555555555554;
.loc 3 348 10
fma.rn.f64 %fd107, %fd105, %fd92, %fd106;
sub.f64 %fd108, %fd89, %fd91;
add.f64 %fd109, %fd108, %fd108;
neg.f64 %fd110, %fd91;
fma.rn.f64 %fd111, %fd110, %fd89, %fd109;
mul.f64 %fd112, %fd88, %fd111;
mul.f64 %fd113, %fd107, %fd92;
fma.rn.f64 %fd114, %fd113, %fd91, %fd112;
cvt.rn.f64.s32 %fd115, %r75;
mov.f64 %fd116, 0d3FE62E42FEFA39EF;
.loc 3 348 10
fma.rn.f64 %fd117, %fd115, %fd116, %fd91;
neg.s32 %r54, %r75;
cvt.rn.f64.s32 %fd118, %r54;
fma.rn.f64 %fd119, %fd118, %fd116, %fd117;
sub.f64 %fd120, %fd119, %fd91;
sub.f64 %fd121, %fd114, %fd120;
mov.f64 %fd122, 0d3C7ABC9E3B39803F;
.loc 3 348 10
fma.rn.f64 %fd123, %fd115, %fd122, %fd121;
add.f64 %fd522, %fd117, %fd123;
abs.f64 %fd521, %fd2;
BB47_17:
setp.gtu.f64 %p13, %fd521, 0d400353AABAD7B784;
@%p13 bra BB47_19;
mov.f64 %fd124, 0dBD4DD167A0DC3F55;
mov.f64 %fd125, 0d3D020E4ADCDE2AD3;
.loc 3 348 10
fma.rn.f64 %fd126, %fd125, %fd521, %fd124;
mov.f64 %fd127, 0d3D5503F5A491E487;
.loc 3 348 10
fma.rn.f64 %fd128, %fd126, %fd521, %fd127;
mov.f64 %fd129, 0d3DC1F29940C2403A;
.loc 3 348 10
fma.rn.f64 %fd130, %fd128, %fd521, %fd129;
mov.f64 %fd131, 0d3D84CF9302EACDEF;
.loc 3 348 10
fma.rn.f64 %fd132, %fd130, %fd521, %fd131;
mov.f64 %fd133, 0dBE384A53DBBCA436;
.loc 3 348 10
fma.rn.f64 %fd134, %fd132, %fd521, %fd133;
mov.f64 %fd135, 0d3D9779BEE4F63BCC;
.loc 3 348 10
fma.rn.f64 %fd136, %fd134, %fd521, %fd135;
mov.f64 %fd137, 0d3EA6C160E414F3F0;
.loc 3 348 10
fma.rn.f64 %fd138, %fd136, %fd521, %fd137;
mov.f64 %fd139, 0d3D8F3D2F12430699;
.loc 3 348 10
fma.rn.f64 %fd140, %fd138, %fd521, %fd139;
mov.f64 %fd141, 0dBF0C71C72C0CED04;
.loc 3 348 10
fma.rn.f64 %fd142, %fd140, %fd521, %fd141;
mov.f64 %fd143, 0d3D659BCA506F1128;
.loc 3 348 10
fma.rn.f64 %fd144, %fd142, %fd521, %fd143;
mov.f64 %fd145, 0d3F65555555506982;
.loc 3 348 10
fma.rn.f64 %fd146, %fd144, %fd521, %fd145;
mov.f64 %fd147, 0d3D15BA0B425F1BFB;
.loc 3 348 10
fma.rn.f64 %fd148, %fd146, %fd521, %fd147;
mov.f64 %fd149, 0dBFB0000000000065;
.loc 3 348 10
fma.rn.f64 %fd150, %fd148, %fd521, %fd149;
mov.f64 %fd151, 0d3C8729A7253FB679;
.loc 3 348 10
fma.rn.f64 %fd152, %fd150, %fd521, %fd151;
mov.f64 %fd153, 0d3FE0000000000000;
.loc 3 348 10
fma.rn.f64 %fd154, %fd152, %fd521, %fd153;
mul.f64 %fd527, %fd154, %fd521;
bra.uni BB47_36;
BB47_19:
.loc 3 348 10
setp.gtu.f64 %p14, %fd521, 0d4015B1D0574614EA;
@%p14 bra BB47_21;
add.f64 %fd155, %fd521, 0dC00EA75575AF6F09;
add.f64 %fd156, %fd155, 0d3CA60155A9D1B256;
mov.f64 %fd157, 0d3D41011A1DF02DAD;
mov.f64 %fd158, 0dBCF8D3CDBB60175E;
.loc 3 348 10
fma.rn.f64 %fd159, %fd158, %fd156, %fd157;
mov.f64 %fd160, 0d3D76013AC1E5E222;
.loc 3 348 10
fma.rn.f64 %fd161, %fd159, %fd156, %fd160;
mov.f64 %fd162, 0dBDBEC315D96D5F03;
.loc 3 348 10
fma.rn.f64 %fd163, %fd161, %fd156, %fd162;
mov.f64 %fd164, 0dBDF03BE1B4B57207;
.loc 3 348 10
fma.rn.f64 %fd165, %fd163, %fd156, %fd164;
mov.f64 %fd166, 0d3E345695F8B660F7;
.loc 3 348 10
fma.rn.f64 %fd167, %fd165, %fd156, %fd166;
mov.f64 %fd168, 0d3E617069FCFCFFF4;
.loc 3 348 10
fma.rn.f64 %fd169, %fd167, %fd156, %fd168;
mov.f64 %fd170, 0dBEA33825C36745EB;
.loc 3 348 10
fma.rn.f64 %fd171, %fd169, %fd156, %fd170;
mov.f64 %fd172, 0dBEC9799D4F90931B;
.loc 3 348 10
fma.rn.f64 %fd173, %fd171, %fd156, %fd172;
mov.f64 %fd174, 0d3F083A06E2F7DF13;
.loc 3 348 10
fma.rn.f64 %fd175, %fd173, %fd156, %fd174;
mov.f64 %fd176, 0d3F26E4C2D53A7CF6;
.loc 3 348 10
fma.rn.f64 %fd177, %fd175, %fd156, %fd176;
mov.f64 %fd178, 0dBF624B3409957B1C;
.loc 3 348 10
fma.rn.f64 %fd179, %fd177, %fd156, %fd178;
mov.f64 %fd180, 0dBF7537544C3325DF;
.loc 3 348 10
fma.rn.f64 %fd181, %fd179, %fd156, %fd180;
mov.f64 %fd182, 0d3FAB589D1DA138E2;
.loc 3 348 10
fma.rn.f64 %fd183, %fd181, %fd156, %fd182;
mov.f64 %fd184, 0d3FAAE8A39F51AD13;
.loc 3 348 10
fma.rn.f64 %fd185, %fd183, %fd156, %fd184;
mov.f64 %fd186, 0dBFD9C6CF582CBF7F;
.loc 3 348 10
fma.rn.f64 %fd187, %fd185, %fd156, %fd186;
mul.f64 %fd527, %fd187, %fd156;
bra.uni BB47_36;
BB47_21:
.loc 3 348 10
setp.gtu.f64 %p15, %fd521, 0d40213065E54C1AA9;
@%p15 bra BB47_23;
add.f64 %fd188, %fd521, 0dC01C0FF5F3B47250;
add.f64 %fd189, %fd188, 0d3C9B226D9D243827;
mov.f64 %fd190, 0dBD40E8363DB649A9;
mov.f64 %fd191, 0d3CF3EB867515FAD6;
.loc 3 348 10
fma.rn.f64 %fd192, %fd191, %fd189, %fd190;
mov.f64 %fd193, 0dBD73B7DD4A6608FB;
.loc 3 348 10
fma.rn.f64 %fd194, %fd192, %fd189, %fd193;
mov.f64 %fd195, 0d3DBEC5E01482C750;
.loc 3 348 10
fma.rn.f64 %fd196, %fd194, %fd189, %fd195;
mov.f64 %fd197, 0d3DEC62BB9E882103;
.loc 3 348 10
fma.rn.f64 %fd198, %fd196, %fd189, %fd197;
mov.f64 %fd199, 0dBE34462EED732A23;
.loc 3 348 10
fma.rn.f64 %fd200, %fd198, %fd189, %fd199;
mov.f64 %fd201, 0dBE5D48DCAD7DC59B;
.loc 3 348 10
fma.rn.f64 %fd202, %fd200, %fd189, %fd201;
mov.f64 %fd203, 0d3EA3026DF29167E9;
.loc 3 348 10
fma.rn.f64 %fd204, %fd202, %fd189, %fd203;
mov.f64 %fd205, 0d3EC4255B0119666C;
.loc 3 348 10
fma.rn.f64 %fd206, %fd204, %fd189, %fd205;
mov.f64 %fd207, 0dBF0796A751B32693;
.loc 3 348 10
fma.rn.f64 %fd208, %fd206, %fd189, %fd207;
mov.f64 %fd209, 0dBF207358BBDBA284;
.loc 3 348 10
fma.rn.f64 %fd210, %fd208, %fd189, %fd209;
mov.f64 %fd211, 0d3F613FBC7D6927B1;
.loc 3 348 10
fma.rn.f64 %fd212, %fd210, %fd189, %fd211;
mov.f64 %fd213, 0d3F69A4B292E3DD75;
.loc 3 348 10
fma.rn.f64 %fd214, %fd212, %fd189, %fd213;
mov.f64 %fd215, 0dBFA80C83BDEEE4FB;
.loc 3 348 10
fma.rn.f64 %fd216, %fd214, %fd189, %fd215;
mov.f64 %fd217, 0dBF95E70DC60362BF;
.loc 3 348 10
fma.rn.f64 %fd218, %fd216, %fd189, %fd217;
mov.f64 %fd219, 0d3FD33518B3874E8A;
.loc 3 348 10
fma.rn.f64 %fd220, %fd218, %fd189, %fd219;
mul.f64 %fd527, %fd220, %fd189;
bra.uni BB47_36;
BB47_23:
.loc 3 348 10
abs.f64 %fd221, %fd521;
setp.neu.f64 %p16, %fd221, 0d7FF0000000000000;
@%p16 bra BB47_25;
mov.f64 %fd527, 0d0000000000000000;
bra.uni BB47_36;
BB47_25:
add.u64 %rd6, %SP, 4;
.loc 3 348 10
// inline asm
cvt.rn.f32.f64 %f5,%fd521;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f6,%f5;
// inline asm
// inline asm
cvt.f64.f32 %fd224,%f6;
// inline asm
neg.f64 %fd225, %fd521;
mov.f64 %fd226, 0d3FF0000000000000;
.loc 3 348 10
fma.rn.f64 %fd227, %fd225, %fd224, %fd226;
fma.rn.f64 %fd228, %fd227, %fd227, %fd227;
fma.rn.f64 %fd229, %fd228, %fd224, %fd224;
mul.f64 %fd230, %fd229, %fd229;
mov.f64 %fd231, 0dC099C06322A3F8BE;
mov.f64 %fd232, 0d40CD02EA3F2F6751;
.loc 3 348 10
fma.rn.f64 %fd233, %fd232, %fd230, %fd231;
mov.f64 %fd234, 0d405B89354DA77324;
.loc 3 348 10
fma.rn.f64 %fd235, %fd233, %fd230, %fd234;
mov.f64 %fd236, 0dC01E352294653188;
.loc 3 348 10
fma.rn.f64 %fd237, %fd235, %fd230, %fd236;
mov.f64 %fd238, 0d3FE9BC7DB16BD7A7;
.loc 3 348 10
fma.rn.f64 %fd239, %fd237, %fd230, %fd238;
mov.f64 %fd240, 0dBFC8BFE1C3A4F741;
.loc 3 348 10
fma.rn.f64 %fd241, %fd239, %fd230, %fd240;
mov.f64 %fd242, 0d3FC7FFFFF0D00BE2;
.loc 3 348 10
fma.rn.f64 %fd243, %fd241, %fd230, %fd242;
mov.f64 %fd244, 0d3FF00000000068CC;
.loc 3 348 10
fma.rn.f64 %fd245, %fd243, %fd230, %fd244;
mov.f64 %fd246, 0d415A30AC6857BEE0;
mov.f64 %fd247, 0dC18DA26B212FDC9A;
.loc 3 348 10
fma.rn.f64 %fd248, %fd247, %fd230, %fd246;
mov.f64 %fd249, 0dC11764222AD7C910;
.loc 3 348 10
fma.rn.f64 %fd250, %fd248, %fd230, %fd249;
mov.f64 %fd251, 0d40CEB02E0C306857;
.loc 3 348 10
fma.rn.f64 %fd252, %fd250, %fd230, %fd251;
mov.f64 %fd253, 0dC08351859FA2B23B;
.loc 3 348 10
fma.rn.f64 %fd254, %fd252, %fd230, %fd253;
mov.f64 %fd255, 0d403E65A07AF51F42;
.loc 3 348 10
fma.rn.f64 %fd256, %fd254, %fd230, %fd255;
mov.f64 %fd257, 0dC002F2B817F77A57;
.loc 3 348 10
fma.rn.f64 %fd258, %fd256, %fd230, %fd257;
mov.f64 %fd259, 0d3FD7BCC34DA069FD;
.loc 3 348 10
fma.rn.f64 %fd260, %fd258, %fd230, %fd259;
mov.f64 %fd261, 0dBFC4FFFFF8A44463;
.loc 3 348 10
fma.rn.f64 %fd262, %fd260, %fd230, %fd261;
mov.f64 %fd263, 0d3FD7FFFFFFFF5CD7;
.loc 3 348 10
fma.rn.f64 %fd264, %fd262, %fd230, %fd263;
fma.rn.f64 %fd17, %fd264, %fd229, %fd521;
rsqrt.approx.f64 %fd265, %fd521;
mul.f64 %fd266, %fd265, 0d3FE9884533D43651;
mul.f64 %fd18, %fd266, %fd245;
mul.f64 %fd267, %fd17, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r76, %fd267;
cvta.to.local.u64 %rd7, %rd6;
.loc 3 348 10
st.local.u32 [%rd7], %r76;
cvt.rn.f64.s32 %fd268, %r76;
neg.f64 %fd269, %fd268;
mov.f64 %fd270, 0d3FF921FB54442D18;
.loc 3 348 10
fma.rn.f64 %fd271, %fd269, %fd270, %fd17;
mov.f64 %fd272, 0d3C91A62633145C00;
.loc 3 348 10
fma.rn.f64 %fd273, %fd269, %fd272, %fd271;
mov.f64 %fd274, 0d397B839A252049C0;
.loc 3 348 10
fma.rn.f64 %fd523, %fd269, %fd274, %fd273;
abs.f64 %fd275, %fd17;
setp.leu.f64 %p17, %fd275, 0d41E0000000000000;
@%p17 bra BB47_27;
// Callseq Start 12
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd17;
.param .b64 param1;
st.param.b64 [param1+0], %rd6;
.param .b64 retval0;
.loc 3 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd523, [retval0+0];
}
// Callseq End 12
ld.local.u32 %r76, [%rd7];
BB47_27:
and.b32 %r55, %r76, 3;
cvt.rn.f64.s32 %fd276, %r55;
add.f64 %fd277, %fd523, 0dC002D97C7F3321D2;
fma.rn.f64 %fd524, %fd276, 0d3FF921FB54442D18, %fd277;
abs.f64 %fd278, %fd524;
setp.neu.f64 %p18, %fd278, 0d7FF0000000000000;
@%p18 bra BB47_29;
mov.f64 %fd279, 0d0000000000000000;
.loc 3 348 10
mul.rn.f64 %fd524, %fd524, %fd279;
BB47_29:
add.u64 %rd10, %SP, 0;
.loc 3 348 10
mul.f64 %fd280, %fd524, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r77, %fd280;
cvta.to.local.u64 %rd11, %rd10;
.loc 3 348 10
st.local.u32 [%rd11], %r77;
cvt.rn.f64.s32 %fd281, %r77;
neg.f64 %fd282, %fd281;
fma.rn.f64 %fd284, %fd282, %fd270, %fd524;
fma.rn.f64 %fd286, %fd282, %fd272, %fd284;
fma.rn.f64 %fd525, %fd282, %fd274, %fd286;
abs.f64 %fd288, %fd524;
setp.leu.f64 %p19, %fd288, 0d41E0000000000000;
@%p19 bra BB47_31;
// Callseq Start 13
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd524;
.param .b64 param1;
st.param.b64 [param1+0], %rd10;
.param .b64 retval0;
.loc 3 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd525, [retval0+0];
}
// Callseq End 13
ld.local.u32 %r77, [%rd11];
BB47_31:
add.s32 %r21, %r77, 1;
shl.b32 %r56, %r21, 3;
and.b32 %r57, %r56, 8;
and.b32 %r58, %r21, 1;
setp.eq.b32 %p20, %r58, 1;
not.pred %p21, %p20;
selp.f64 %fd289, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p21;
mul.wide.u32 %rd14, %r57, 8;
mov.u64 %rd15, __cudart_sin_cos_coeffs;
add.s64 %rd16, %rd15, %rd14;
.loc 3 348 10
ld.const.f64 %fd290, [%rd16+8];
mul.rn.f64 %fd28, %fd525, %fd525;
fma.rn.f64 %fd291, %fd289, %fd28, %fd290;
ld.const.f64 %fd292, [%rd16+16];
fma.rn.f64 %fd293, %fd291, %fd28, %fd292;
ld.const.f64 %fd294, [%rd16+24];
fma.rn.f64 %fd295, %fd293, %fd28, %fd294;
ld.const.f64 %fd296, [%rd16+32];
fma.rn.f64 %fd297, %fd295, %fd28, %fd296;
ld.const.f64 %fd298, [%rd16+40];
fma.rn.f64 %fd299, %fd297, %fd28, %fd298;
ld.const.f64 %fd300, [%rd16+48];
fma.rn.f64 %fd29, %fd299, %fd28, %fd300;
fma.rn.f64 %fd526, %fd29, %fd525, %fd525;
@%p21 bra BB47_33;
fma.rn.f64 %fd526, %fd29, %fd28, %fd226;
BB47_33:
and.b32 %r59, %r21, 2;
setp.eq.s32 %p22, %r59, 0;
@%p22 bra BB47_35;
mov.f64 %fd302, 0d0000000000000000;
mov.f64 %fd303, 0dBFF0000000000000;
.loc 3 348 10
fma.rn.f64 %fd526, %fd526, %fd303, %fd302;
BB47_35:
mul.f64 %fd527, %fd18, %fd526;
BB47_36:
neg.f64 %fd304, %fd527;
setp.lt.f64 %p23, %fd2, 0d0000000000000000;
selp.f64 %fd305, %fd304, %fd527, %p23;
mul.f64 %fd306, %fd2, 0d3FE0000000000000;
setp.lt.f64 %p24, %fd521, 0d39B4484BFEEBC2A0;
selp.f64 %fd307, %fd306, %fd305, %p24;
mov.f64 %fd308, 0dBFF0000000000000;
.loc 3 348 10
div.rn.f64 %fd309, %fd308, %fd2;
fma.rn.f64 %fd310, %fd522, %fd307, %fd309;
fma.rn.f64 %fd532, %fd310, 0d3FE45F306DC9C883, %fd3;
bra.uni BB47_57;
BB47_37:
.loc 3 348 10
setp.gtu.f64 %p25, %fd2, 0d4009B510EC2ADC83;
@%p25 bra BB47_39;
add.f64 %fd311, %fd2, 0dC00193BED4DFF243;
add.f64 %fd312, %fd311, 0d3C8BD1E50D219BFD;
mov.f64 %fd313, 0d3E4833AAE4D8B975;
mov.f64 %fd314, 0dBE2B87B0BE2AA150;
.loc 3 348 10
fma.rn.f64 %fd315, %fd314, %fd312, %fd313;
mov.f64 %fd316, 0dBE44E279B423E68F;
.loc 3 348 10
fma.rn.f64 %fd317, %fd315, %fd312, %fd316;
mov.f64 %fd318, 0d3E129DC6A747EB4F;
.loc 3 348 10
fma.rn.f64 %fd319, %fd317, %fd312, %fd318;
mov.f64 %fd320, 0dBE61D15534496CD8;
.loc 3 348 10
fma.rn.f64 %fd321, %fd319, %fd312, %fd320;
mov.f64 %fd322, 0d3E7EEC8D48FECE00;
.loc 3 348 10
fma.rn.f64 %fd323, %fd321, %fd312, %fd322;
mov.f64 %fd324, 0dBE8D1180AF70A134;
.loc 3 348 10
fma.rn.f64 %fd325, %fd323, %fd312, %fd324;
mov.f64 %fd326, 0d3E9C8386A0EA1388;
.loc 3 348 10
fma.rn.f64 %fd327, %fd325, %fd312, %fd326;
mov.f64 %fd328, 0dBEB01A014E7F3250;
.loc 3 348 10
fma.rn.f64 %fd329, %fd327, %fd312, %fd328;
mov.f64 %fd330, 0d3EC1FB752010A320;
.loc 3 348 10
fma.rn.f64 %fd331, %fd329, %fd312, %fd330;
mov.f64 %fd332, 0dBED3AA0AFF4E332B;
.loc 3 348 10
fma.rn.f64 %fd333, %fd331, %fd312, %fd332;
mov.f64 %fd334, 0d3EE584A6C77F6700;
.loc 3 348 10
fma.rn.f64 %fd335, %fd333, %fd312, %fd334;
mov.f64 %fd336, 0dBEF794C520FC2EBB;
.loc 3 348 10
fma.rn.f64 %fd337, %fd335, %fd312, %fd336;
mov.f64 %fd338, 0d3F09D18D2D35CC71;
.loc 3 348 10
fma.rn.f64 %fd339, %fd337, %fd312, %fd338;
mov.f64 %fd340, 0dBF1C3FB7315C4599;
.loc 3 348 10
fma.rn.f64 %fd341, %fd339, %fd312, %fd340;
mov.f64 %fd342, 0d3F2EEA7ADECCE927;
.loc 3 348 10
fma.rn.f64 %fd343, %fd341, %fd312, %fd342;
mov.f64 %fd344, 0dBF40B2D85257446F;
.loc 3 348 10
fma.rn.f64 %fd345, %fd343, %fd312, %fd344;
mov.f64 %fd346, 0d3F517AB4B1FE5D5B;
.loc 3 348 10
fma.rn.f64 %fd347, %fd345, %fd312, %fd346;
mov.f64 %fd348, 0dBF65429DC6516C0D;
.loc 3 348 10
fma.rn.f64 %fd349, %fd347, %fd312, %fd348;
mov.f64 %fd350, 0d3F7E671C7D0B090B;
.loc 3 348 10
fma.rn.f64 %fd351, %fd349, %fd312, %fd350;
mov.f64 %fd352, 0dBF73A6DEC36FB27C;
.loc 3 348 10
fma.rn.f64 %fd353, %fd351, %fd312, %fd352;
mov.f64 %fd354, 0dBFA0D2AF4E931FD1;
.loc 3 348 10
fma.rn.f64 %fd355, %fd353, %fd312, %fd354;
mov.f64 %fd356, 0dBFBE56F82217B964;
.loc 3 348 10
fma.rn.f64 %fd357, %fd355, %fd312, %fd356;
mov.f64 %fd358, 0d3FE0AA48442F014B;
.loc 3 348 10
fma.rn.f64 %fd359, %fd357, %fd312, %fd358;
mul.f64 %fd532, %fd359, %fd312;
bra.uni BB47_57;
BB47_39:
.loc 3 348 10
setp.gtu.f64 %p26, %fd2, 0d401C0D26D5A541CB;
@%p26 bra BB47_41;
add.f64 %fd360, %fd2, 0dC015B7FE4E87B02E;
add.f64 %fd361, %fd360, 0dBCBDFE7BAC228E8C;
mov.f64 %fd362, 0d3CC69A30996793E2;
mov.f64 %fd363, 0d3CBA3C76069F1D8C;
.loc 3 348 10
fma.rn.f64 %fd364, %fd363, %fd361, %fd362;
mov.f64 %fd365, 0dBCDDD8432FE756E7;
.loc 3 348 10
fma.rn.f64 %fd366, %fd364, %fd361, %fd365;
mov.f64 %fd367, 0dBD143158EEE220F7;
.loc 3 348 10
fma.rn.f64 %fd368, %fd366, %fd361, %fd367;
mov.f64 %fd369, 0d3D28D44491230F5A;
.loc 3 348 10
fma.rn.f64 %fd370, %fd368, %fd361, %fd369;
mov.f64 %fd371, 0dBD438842EAF4EDBC;
.loc 3 348 10
fma.rn.f64 %fd372, %fd370, %fd361, %fd371;
mov.f64 %fd373, 0d3D74958DAFBFAF5A;
.loc 3 348 10
fma.rn.f64 %fd374, %fd372, %fd361, %fd373;
mov.f64 %fd375, 0dBD9449A60E664848;
.loc 3 348 10
fma.rn.f64 %fd376, %fd374, %fd361, %fd375;
mov.f64 %fd377, 0d3D838BC8CD594A76;
.loc 3 348 10
fma.rn.f64 %fd378, %fd376, %fd361, %fd377;
mov.f64 %fd379, 0dBDFA798002141323;
.loc 3 348 10
fma.rn.f64 %fd380, %fd378, %fd361, %fd379;
mov.f64 %fd381, 0d3E380B4198956AAA;
.loc 3 348 10
fma.rn.f64 %fd382, %fd380, %fd361, %fd381;
mov.f64 %fd383, 0d3E5B62B5F21BACD4;
.loc 3 348 10
fma.rn.f64 %fd384, %fd382, %fd361, %fd383;
mov.f64 %fd385, 0dBEA255E729FB6AAE;
.loc 3 348 10
fma.rn.f64 %fd386, %fd384, %fd361, %fd385;
mov.f64 %fd387, 0dBEC80618F6BAE5AA;
.loc 3 348 10
fma.rn.f64 %fd388, %fd386, %fd361, %fd387;
mov.f64 %fd389, 0d3F085B940F8E8D36;
.loc 3 348 10
fma.rn.f64 %fd390, %fd388, %fd361, %fd389;
mov.f64 %fd391, 0d3F2337C7E10E14E8;
.loc 3 348 10
fma.rn.f64 %fd392, %fd390, %fd361, %fd391;
mov.f64 %fd393, 0dBF61BE6DB99332CA;
.loc 3 348 10
fma.rn.f64 %fd394, %fd392, %fd361, %fd393;
mov.f64 %fd395, 0dBF710A329E2BE9B8;
.loc 3 348 10
fma.rn.f64 %fd396, %fd394, %fd361, %fd395;
mov.f64 %fd397, 0d3FAA15D92DFE3FCF;
.loc 3 348 10
fma.rn.f64 %fd398, %fd396, %fd361, %fd397;
mov.f64 %fd399, 0d3FA00B9F8571C9BE;
.loc 3 348 10
fma.rn.f64 %fd400, %fd398, %fd361, %fd399;
mov.f64 %fd401, 0dBFD5C7C556F0C19A;
.loc 3 348 10
fma.rn.f64 %fd402, %fd400, %fd361, %fd401;
mul.f64 %fd532, %fd402, %fd361;
bra.uni BB47_57;
BB47_41:
.loc 3 348 10
setp.gtu.f64 %p27, %fd2, 0d4022585C739ACDDD;
@%p27 bra BB47_43;
add.f64 %fd403, %fd2, 0dC0213127AE6169B4;
add.f64 %fd404, %fd403, 0dBCB479CC068D9046;
mov.f64 %fd405, 0dBD43515F67644276;
mov.f64 %fd406, 0d3CB09CCC22945996;
.loc 3 348 10
fma.rn.f64 %fd407, %fd406, %fd404, %fd405;
mov.f64 %fd408, 0dBD72C5B978E9F5C7;
.loc 3 348 10
fma.rn.f64 %fd409, %fd407, %fd404, %fd408;
mov.f64 %fd410, 0d3DBEC1151613913C;
.loc 3 348 10
fma.rn.f64 %fd411, %fd409, %fd404, %fd410;
mov.f64 %fd412, 0d3DE9E38D13C4A824;
.loc 3 348 10
fma.rn.f64 %fd413, %fd411, %fd404, %fd412;
mov.f64 %fd414, 0dBE341E75E1088EB5;
.loc 3 348 10
fma.rn.f64 %fd415, %fd413, %fd404, %fd414;
mov.f64 %fd416, 0dBE5A384EBB13CFE1;
.loc 3 348 10
fma.rn.f64 %fd417, %fd415, %fd404, %fd416;
mov.f64 %fd418, 0d3EA2BECB27F8C8F8;
.loc 3 348 10
fma.rn.f64 %fd419, %fd417, %fd404, %fd418;
mov.f64 %fd420, 0d3EC176E72B989FD8;
.loc 3 348 10
fma.rn.f64 %fd421, %fd419, %fd404, %fd420;
mov.f64 %fd422, 0dBF06F7BAB102F822;
.loc 3 348 10
fma.rn.f64 %fd423, %fd421, %fd404, %fd422;
mov.f64 %fd424, 0dBF1B50D7E1D278E1;
.loc 3 348 10
fma.rn.f64 %fd425, %fd423, %fd404, %fd424;
mov.f64 %fd426, 0d3F607A678D60004F;
.loc 3 348 10
fma.rn.f64 %fd427, %fd425, %fd404, %fd426;
mov.f64 %fd428, 0d3F63CED2A2E69115;
.loc 3 348 10
fma.rn.f64 %fd429, %fd427, %fd404, %fd428;
mov.f64 %fd430, 0dBFA6395DFE49FCD4;
.loc 3 348 10
fma.rn.f64 %fd431, %fd429, %fd404, %fd430;
mov.f64 %fd432, 0dBF902B3933CF21B1;
.loc 3 348 10
fma.rn.f64 %fd433, %fd431, %fd404, %fd432;
mov.f64 %fd434, 0d3FD15F993FCEAB5C;
.loc 3 348 10
fma.rn.f64 %fd435, %fd433, %fd404, %fd434;
mul.f64 %fd532, %fd435, %fd404;
bra.uni BB47_57;
BB47_43:
.loc 3 348 10
abs.f64 %fd436, %fd2;
setp.neu.f64 %p28, %fd436, 0d7FF0000000000000;
@%p28 bra BB47_45;
mov.f64 %fd532, 0d0000000000000000;
bra.uni BB47_57;
BB47_45:
add.u64 %rd17, %SP, 12;
.loc 3 348 10
// inline asm
cvt.rn.f32.f64 %f9,%fd2;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f10,%f9;
// inline asm
// inline asm
cvt.f64.f32 %fd439,%f10;
// inline asm
neg.f64 %fd440, %fd2;
mov.f64 %fd441, 0d3FF0000000000000;
.loc 3 348 10
fma.rn.f64 %fd442, %fd440, %fd439, %fd441;
fma.rn.f64 %fd443, %fd442, %fd442, %fd442;
fma.rn.f64 %fd444, %fd443, %fd439, %fd439;
mul.f64 %fd445, %fd444, %fd444;
mov.f64 %fd446, 0dC09C26E89385D5B1;
mov.f64 %fd447, 0d40D13DB326ECEBFE;
.loc 3 348 10
fma.rn.f64 %fd448, %fd447, %fd445, %fd446;
mov.f64 %fd449, 0d405C6AB923C6F55E;
.loc 3 348 10
fma.rn.f64 %fd450, %fd448, %fd445, %fd449;
mov.f64 %fd451, 0dC01E61EAF3BD2FA1;
.loc 3 348 10
fma.rn.f64 %fd452, %fd450, %fd445, %fd451;
mov.f64 %fd453, 0d3FE9BF15D9B97DD1;
.loc 3 348 10
fma.rn.f64 %fd454, %fd452, %fd445, %fd453;
mov.f64 %fd455, 0dBFC8BFECF93D7D19;
.loc 3 348 10
fma.rn.f64 %fd456, %fd454, %fd445, %fd455;
mov.f64 %fd457, 0d3FC7FFFFF756AA6C;
.loc 3 348 10
fma.rn.f64 %fd458, %fd456, %fd445, %fd457;
mov.f64 %fd459, 0d3FF0000000003646;
.loc 3 348 10
fma.rn.f64 %fd460, %fd458, %fd445, %fd459;
mov.f64 %fd461, 0d416024E99BA46E7B;
mov.f64 %fd462, 0dC1943281A050209C;
.loc 3 348 10
fma.rn.f64 %fd463, %fd462, %fd445, %fd461;
mov.f64 %fd464, 0dC11A6875D7DFBD65;
.loc 3 348 10
fma.rn.f64 %fd465, %fd463, %fd445, %fd464;
mov.f64 %fd466, 0d40D032C041790233;
.loc 3 348 10
fma.rn.f64 %fd467, %fd465, %fd445, %fd466;
mov.f64 %fd468, 0dC0839F895BC22946;
.loc 3 348 10
fma.rn.f64 %fd469, %fd467, %fd445, %fd468;
mov.f64 %fd470, 0d403E77CC78ECD2D8;
.loc 3 348 10
fma.rn.f64 %fd471, %fd469, %fd445, %fd470;
mov.f64 %fd472, 0dC002F368D0117BE9;
.loc 3 348 10
fma.rn.f64 %fd473, %fd471, %fd445, %fd472;
mov.f64 %fd474, 0d3FD7BCC786009A25;
.loc 3 348 10
fma.rn.f64 %fd475, %fd473, %fd445, %fd474;
mov.f64 %fd476, 0dBFC4FFFFFC51BC7A;
.loc 3 348 10
fma.rn.f64 %fd477, %fd475, %fd445, %fd476;
mov.f64 %fd478, 0d3FD7FFFFFFFFB5EA;
.loc 3 348 10
fma.rn.f64 %fd479, %fd477, %fd445, %fd478;
fma.rn.f64 %fd41, %fd479, %fd444, %fd2;
rsqrt.approx.f64 %fd480, %fd2;
mul.f64 %fd481, %fd480, 0d3FE9884533D43651;
mul.f64 %fd42, %fd481, %fd460;
mul.f64 %fd482, %fd41, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r78, %fd482;
cvta.to.local.u64 %rd18, %rd17;
.loc 3 348 10
st.local.u32 [%rd18], %r78;
cvt.rn.f64.s32 %fd483, %r78;
neg.f64 %fd484, %fd483;
mov.f64 %fd485, 0d3FF921FB54442D18;
.loc 3 348 10
fma.rn.f64 %fd486, %fd484, %fd485, %fd41;
mov.f64 %fd487, 0d3C91A62633145C00;
.loc 3 348 10
fma.rn.f64 %fd488, %fd484, %fd487, %fd486;
mov.f64 %fd489, 0d397B839A252049C0;
.loc 3 348 10
fma.rn.f64 %fd528, %fd484, %fd489, %fd488;
abs.f64 %fd490, %fd41;
setp.leu.f64 %p29, %fd490, 0d41E0000000000000;
@%p29 bra BB47_47;
// Callseq Start 14
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd41;
.param .b64 param1;
st.param.b64 [param1+0], %rd17;
.param .b64 retval0;
.loc 3 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd528, [retval0+0];
}
// Callseq End 14
ld.local.u32 %r78, [%rd18];
BB47_47:
and.b32 %r60, %r78, 3;
cvt.rn.f64.s32 %fd491, %r60;
add.f64 %fd492, %fd528, 0dC00F6A7A2955385E;
fma.rn.f64 %fd529, %fd491, 0d3FF921FB54442D18, %fd492;
abs.f64 %fd493, %fd529;
setp.neu.f64 %p30, %fd493, 0d7FF0000000000000;
@%p30 bra BB47_49;
mov.f64 %fd494, 0d0000000000000000;
.loc 3 348 10
mul.rn.f64 %fd529, %fd529, %fd494;
BB47_49:
add.u64 %rd21, %SP, 8;
.loc 3 348 10
mul.f64 %fd495, %fd529, 0d3FE45F306DC9C883;
cvt.rni.s32.f64 %r79, %fd495;
cvta.to.local.u64 %rd22, %rd21;
.loc 3 348 10
st.local.u32 [%rd22], %r79;
cvt.rn.f64.s32 %fd496, %r79;
neg.f64 %fd497, %fd496;
fma.rn.f64 %fd499, %fd497, %fd485, %fd529;
fma.rn.f64 %fd501, %fd497, %fd487, %fd499;
fma.rn.f64 %fd530, %fd497, %fd489, %fd501;
abs.f64 %fd503, %fd529;
setp.leu.f64 %p31, %fd503, 0d41E0000000000000;
@%p31 bra BB47_51;
// Callseq Start 15
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd529;
.param .b64 param1;
st.param.b64 [param1+0], %rd21;
.param .b64 retval0;
.loc 3 348 10
call.uni (retval0),
__internal_trig_reduction_slowpathd,
(
param0,
param1
);
ld.param.f64 %fd530, [retval0+0];
}
// Callseq End 15
ld.local.u32 %r79, [%rd22];
BB47_51:
add.s32 %r28, %r79, 1;
shl.b32 %r61, %r28, 3;
and.b32 %r62, %r61, 8;
and.b32 %r63, %r28, 1;
setp.eq.b32 %p32, %r63, 1;
not.pred %p33, %p32;
selp.f64 %fd504, 0d3DE5DB65F9785EBA, 0dBDA8FF8320FD8164, %p33;
mul.wide.u32 %rd25, %r62, 8;
mov.u64 %rd26, __cudart_sin_cos_coeffs;
add.s64 %rd27, %rd26, %rd25;
.loc 3 348 10
ld.const.f64 %fd505, [%rd27+8];
mul.rn.f64 %fd52, %fd530, %fd530;
fma.rn.f64 %fd506, %fd504, %fd52, %fd505;
ld.const.f64 %fd507, [%rd27+16];
fma.rn.f64 %fd508, %fd506, %fd52, %fd507;
ld.const.f64 %fd509, [%rd27+24];
fma.rn.f64 %fd510, %fd508, %fd52, %fd509;
ld.const.f64 %fd511, [%rd27+32];
fma.rn.f64 %fd512, %fd510, %fd52, %fd511;
ld.const.f64 %fd513, [%rd27+40];
fma.rn.f64 %fd514, %fd512, %fd52, %fd513;
ld.const.f64 %fd515, [%rd27+48];
fma.rn.f64 %fd53, %fd514, %fd52, %fd515;
fma.rn.f64 %fd531, %fd53, %fd530, %fd530;
@%p33 bra BB47_53;
fma.rn.f64 %fd531, %fd53, %fd52, %fd441;
BB47_53:
and.b32 %r64, %r28, 2;
setp.eq.s32 %p34, %r64, 0;
@%p34 bra BB47_55;
mov.f64 %fd517, 0d0000000000000000;
mov.f64 %fd518, 0dBFF0000000000000;
.loc 3 348 10
fma.rn.f64 %fd531, %fd531, %fd518, %fd517;
BB47_55:
mul.f64 %fd532, %fd42, %fd531;
bra.uni BB47_57;
BB47_56:
mov.f64 %fd519, 0dBFE45F306DC9C883;
.loc 3 348 10
div.rn.f64 %fd532, %fd519, %fd2;
BB47_57:
setp.gtu.f64 %p35, %fd1, 0d0000000000000000;
@%p35 bra BB47_59;
setp.eq.f64 %p36, %fd1, 0d0000000000000000;
selp.f64 %fd532, 0dFFF0000000000000, 0dFFF8000000000000, %p36;
BB47_59:
.loc 2 54 42
mad.lo.s32 %r65, %r70, %r33, %r71;
mul.wide.s32 %rd29, %r65, 8;
add.s64 %rd30, %rd28, %rd29;
st.global.f64 [%rd30], %fd532;
.loc 2 54 22
mov.u32 %r67, %nctaid.y;
mad.lo.s32 %r71, %r67, %r39, %r71;
.loc 2 54 1
setp.lt.s32 %p37, %r71, %r31;
@%p37 bra BB47_3;
BB47_60:
.loc 2 54 22
mov.u32 %r68, %nctaid.x;
mad.lo.s32 %r70, %r68, %r35, %r70;
.loc 2 54 1
setp.lt.s32 %p38, %r70, %r32;
@%p38 bra BB47_2;
BB47_61:
.loc 2 54 2
ret;
}
.visible .entry map2_add_double(
.param .u32 map2_add_double_param_0,
.param .u32 map2_add_double_param_1,
.param .u64 map2_add_double_param_2,
.param .u32 map2_add_double_param_3,
.param .u64 map2_add_double_param_4,
.param .u32 map2_add_double_param_5,
.param .u64 map2_add_double_param_6,
.param .u32 map2_add_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_add_double_param_0];
ld.param.u32 %r14, [map2_add_double_param_1];
ld.param.u64 %rd4, [map2_add_double_param_2];
ld.param.u32 %r15, [map2_add_double_param_3];
ld.param.u64 %rd5, [map2_add_double_param_4];
ld.param.u32 %r16, [map2_add_double_param_5];
ld.param.u64 %rd6, [map2_add_double_param_6];
ld.param.u32 %r17, [map2_add_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 2 64 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 2 64 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB48_6;
.loc 2 64 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 2 64 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 2 64 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB48_2:
.loc 2 64 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB48_5;
.loc 2 64 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB48_4:
.loc 2 64 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 64 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 2 64 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
add.f64 %fd3, %fd2, %fd1;
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 2 64 1
st.global.f64 [%rd12], %fd3;
.loc 2 64 22
add.s32 %r11, %r5, %r10;
.loc 2 64 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB48_4;
BB48_5:
.loc 2 64 22
add.s32 %r28, %r4, %r28;
.loc 2 64 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB48_2;
BB48_6:
.loc 2 64 2
ret;
}
.visible .entry map2_v_s_add_double(
.param .u32 map2_v_s_add_double_param_0,
.param .u32 map2_v_s_add_double_param_1,
.param .u64 map2_v_s_add_double_param_2,
.param .u32 map2_v_s_add_double_param_3,
.param .u64 map2_v_s_add_double_param_4,
.param .u32 map2_v_s_add_double_param_5,
.param .f64 map2_v_s_add_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_add_double_param_0];
ld.param.u32 %r13, [map2_v_s_add_double_param_1];
ld.param.u64 %rd3, [map2_v_s_add_double_param_2];
ld.param.u32 %r14, [map2_v_s_add_double_param_3];
ld.param.u64 %rd4, [map2_v_s_add_double_param_4];
ld.param.u32 %r15, [map2_v_s_add_double_param_5];
ld.param.f64 %fd1, [map2_v_s_add_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 64 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 64 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB49_6;
.loc 2 64 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 64 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 64 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB49_2:
.loc 2 64 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB49_5;
.loc 2 64 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB49_4:
.loc 2 64 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 64 1
ld.global.f64 %fd2, [%rd6];
add.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 64 1
st.global.f64 [%rd8], %fd3;
.loc 2 64 22
add.s32 %r10, %r5, %r9;
.loc 2 64 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB49_4;
BB49_5:
.loc 2 64 22
add.s32 %r25, %r4, %r25;
.loc 2 64 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB49_2;
BB49_6:
.loc 2 64 2
ret;
}
.visible .entry map2_s_v_add_double(
.param .u32 map2_s_v_add_double_param_0,
.param .u32 map2_s_v_add_double_param_1,
.param .u64 map2_s_v_add_double_param_2,
.param .u32 map2_s_v_add_double_param_3,
.param .f64 map2_s_v_add_double_param_4,
.param .u64 map2_s_v_add_double_param_5,
.param .u32 map2_s_v_add_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_add_double_param_0];
ld.param.u32 %r13, [map2_s_v_add_double_param_1];
ld.param.u64 %rd3, [map2_s_v_add_double_param_2];
ld.param.u32 %r14, [map2_s_v_add_double_param_3];
ld.param.f64 %fd1, [map2_s_v_add_double_param_4];
ld.param.u64 %rd4, [map2_s_v_add_double_param_5];
ld.param.u32 %r15, [map2_s_v_add_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 64 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 64 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB50_6;
.loc 2 64 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 64 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 64 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB50_2:
.loc 2 64 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB50_5;
.loc 2 64 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB50_4:
.loc 2 64 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 64 1
ld.global.f64 %fd2, [%rd6];
add.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 64 1
st.global.f64 [%rd8], %fd3;
.loc 2 64 22
add.s32 %r10, %r5, %r9;
.loc 2 64 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB50_4;
BB50_5:
.loc 2 64 22
add.s32 %r25, %r4, %r25;
.loc 2 64 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB50_2;
BB50_6:
.loc 2 64 2
ret;
}
.visible .entry map2_transpose_add_double(
.param .u32 map2_transpose_add_double_param_0,
.param .u32 map2_transpose_add_double_param_1,
.param .u64 map2_transpose_add_double_param_2,
.param .u32 map2_transpose_add_double_param_3,
.param .u64 map2_transpose_add_double_param_4,
.param .u32 map2_transpose_add_double_param_5,
.param .u64 map2_transpose_add_double_param_6,
.param .u32 map2_transpose_add_double_param_7
)
{
.reg .pred %p<13>;
.reg .s32 %r<68>;
.reg .s64 %rd<23>;
.reg .f64 %fd<5>;
// demoted variable
.shared .align 8 .b8 map2_transpose_add_double$__cuda_local_var_32580_1747_non_const_tile[8448];
ld.param.u32 %r27, [map2_transpose_add_double_param_0];
ld.param.u32 %r28, [map2_transpose_add_double_param_1];
ld.param.u64 %rd4, [map2_transpose_add_double_param_2];
ld.param.u32 %r29, [map2_transpose_add_double_param_3];
ld.param.u64 %rd5, [map2_transpose_add_double_param_4];
ld.param.u32 %r30, [map2_transpose_add_double_param_5];
ld.param.u64 %rd6, [map2_transpose_add_double_param_6];
ld.param.u32 %r31, [map2_transpose_add_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
cvta.to.global.u64 %rd3, %rd6;
.loc 2 64 1
mov.u32 %r1, %ntid.x;
mov.u32 %r32, %ctaid.y;
mov.u32 %r33, %ntid.y;
mul.lo.s32 %r62, %r32, %r33;
.loc 2 64 1
setp.ge.s32 %p2, %r62, %r28;
@%p2 bra BB51_15;
BB51_1:
.loc 2 64 1
mov.u32 %r35, %ctaid.x;
mul.lo.s32 %r63, %r35, %r1;
.loc 2 64 1
setp.ge.s32 %p3, %r63, %r27;
@%p3 bra BB51_14;
.loc 2 64 1
add.s32 %r37, %r62, 32;
.loc 4 2621 10
min.s32 %r4, %r28, %r37;
BB51_3:
.loc 2 64 1
add.s32 %r40, %r63, 32;
.loc 4 2621 10
min.s32 %r7, %r27, %r40;
.loc 2 64 1
mov.u32 %r41, %tid.y;
add.s32 %r64, %r41, %r63;
.loc 2 64 1
setp.ge.s32 %p4, %r64, %r7;
@%p4 bra BB51_8;
BB51_4:
.loc 2 64 1
mov.u32 %r42, %tid.x;
.loc 2 64 1
add.s32 %r65, %r42, %r62;
.loc 2 64 1
setp.ge.s32 %p5, %r65, %r4;
@%p5 bra BB51_7;
.loc 2 64 1
mul.lo.s32 %r10, %r64, %r31;
sub.s32 %r11, %r64, %r63;
BB51_6:
add.s32 %r45, %r65, %r10;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 64 1
sub.s32 %r46, %r65, %r62;
mul.wide.s32 %rd9, %r11, 264;
mov.u64 %rd10, map2_transpose_add_double$__cuda_local_var_32580_1747_non_const_tile;
add.s64 %rd11, %rd10, %rd9;
mul.wide.s32 %rd12, %r46, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 64 1
ld.global.f64 %fd1, [%rd8];
st.shared.f64 [%rd13], %fd1;
.loc 2 64 58
add.s32 %r65, %r1, %r65;
.loc 2 64 1
setp.lt.s32 %p6, %r65, %r4;
@%p6 bra BB51_6;
BB51_7:
.loc 2 64 58
add.s32 %r64, %r33, %r64;
.loc 2 64 1
setp.lt.s32 %p7, %r64, %r7;
@%p7 bra BB51_4;
BB51_8:
.loc 2 64 1
add.s32 %r66, %r41, %r62;
setp.lt.s32 %p1, %r66, %r4;
.loc 2 64 1
bar.sync 0;
.loc 2 64 1
@!%p1 bra BB51_13;
bra.uni BB51_9;
BB51_9:
.loc 2 64 1
mov.u32 %r51, %tid.x;
add.s32 %r67, %r51, %r63;
.loc 2 64 1
setp.ge.s32 %p8, %r67, %r7;
@%p8 bra BB51_12;
.loc 2 64 1
mul.lo.s32 %r18, %r66, %r30;
sub.s32 %r19, %r66, %r62;
mul.lo.s32 %r20, %r66, %r29;
BB51_11:
add.s32 %r54, %r67, %r18;
mul.wide.s32 %rd14, %r54, 8;
add.s64 %rd15, %rd2, %rd14;
.loc 2 64 1
sub.s32 %r55, %r67, %r63;
mul.wide.s32 %rd16, %r55, 264;
mov.u64 %rd17, map2_transpose_add_double$__cuda_local_var_32580_1747_non_const_tile;
add.s64 %rd18, %rd17, %rd16;
mul.wide.s32 %rd19, %r19, 8;
add.s64 %rd20, %rd18, %rd19;
.loc 2 64 1
ld.shared.f64 %fd2, [%rd20];
ld.global.f64 %fd3, [%rd15];
add.f64 %fd4, %fd3, %fd2;
add.s32 %r56, %r67, %r20;
mul.wide.s32 %rd21, %r56, 8;
add.s64 %rd22, %rd1, %rd21;
.loc 2 64 1
st.global.f64 [%rd22], %fd4;
.loc 2 64 58
add.s32 %r67, %r1, %r67;
.loc 2 64 1
setp.lt.s32 %p9, %r67, %r7;
@%p9 bra BB51_11;
BB51_12:
.loc 2 64 58
add.s32 %r66, %r33, %r66;
.loc 2 64 1
setp.lt.s32 %p10, %r66, %r4;
@%p10 bra BB51_9;
BB51_13:
.loc 2 64 1
bar.sync 0;
.loc 2 64 1
mov.u32 %r59, %nctaid.x;
.loc 2 64 21
mad.lo.s32 %r63, %r59, %r1, %r63;
.loc 2 64 1
setp.lt.s32 %p11, %r63, %r27;
@%p11 bra BB51_3;
BB51_14:
.loc 2 64 1
mov.u32 %r60, %nctaid.y;
.loc 2 64 21
mad.lo.s32 %r62, %r60, %r33, %r62;
.loc 2 64 1
setp.lt.s32 %p12, %r62, %r28;
@%p12 bra BB51_1;
BB51_15:
.loc 2 64 2
ret;
}
.visible .entry map2_sub_double(
.param .u32 map2_sub_double_param_0,
.param .u32 map2_sub_double_param_1,
.param .u64 map2_sub_double_param_2,
.param .u32 map2_sub_double_param_3,
.param .u64 map2_sub_double_param_4,
.param .u32 map2_sub_double_param_5,
.param .u64 map2_sub_double_param_6,
.param .u32 map2_sub_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_sub_double_param_0];
ld.param.u32 %r14, [map2_sub_double_param_1];
ld.param.u64 %rd4, [map2_sub_double_param_2];
ld.param.u32 %r15, [map2_sub_double_param_3];
ld.param.u64 %rd5, [map2_sub_double_param_4];
ld.param.u32 %r16, [map2_sub_double_param_5];
ld.param.u64 %rd6, [map2_sub_double_param_6];
ld.param.u32 %r17, [map2_sub_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 2 65 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 2 65 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB52_6;
.loc 2 65 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 2 65 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 2 65 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB52_2:
.loc 2 65 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB52_5;
.loc 2 65 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB52_4:
.loc 2 65 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 65 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 2 65 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
sub.f64 %fd3, %fd2, %fd1;
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 2 65 1
st.global.f64 [%rd12], %fd3;
.loc 2 65 22
add.s32 %r11, %r5, %r10;
.loc 2 65 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB52_4;
BB52_5:
.loc 2 65 22
add.s32 %r28, %r4, %r28;
.loc 2 65 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB52_2;
BB52_6:
.loc 2 65 2
ret;
}
.visible .entry map2_v_s_sub_double(
.param .u32 map2_v_s_sub_double_param_0,
.param .u32 map2_v_s_sub_double_param_1,
.param .u64 map2_v_s_sub_double_param_2,
.param .u32 map2_v_s_sub_double_param_3,
.param .u64 map2_v_s_sub_double_param_4,
.param .u32 map2_v_s_sub_double_param_5,
.param .f64 map2_v_s_sub_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_sub_double_param_0];
ld.param.u32 %r13, [map2_v_s_sub_double_param_1];
ld.param.u64 %rd3, [map2_v_s_sub_double_param_2];
ld.param.u32 %r14, [map2_v_s_sub_double_param_3];
ld.param.u64 %rd4, [map2_v_s_sub_double_param_4];
ld.param.u32 %r15, [map2_v_s_sub_double_param_5];
ld.param.f64 %fd1, [map2_v_s_sub_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 65 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 65 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB53_6;
.loc 2 65 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 65 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 65 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB53_2:
.loc 2 65 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB53_5;
.loc 2 65 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB53_4:
.loc 2 65 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 65 1
ld.global.f64 %fd2, [%rd6];
sub.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 65 1
st.global.f64 [%rd8], %fd3;
.loc 2 65 22
add.s32 %r10, %r5, %r9;
.loc 2 65 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB53_4;
BB53_5:
.loc 2 65 22
add.s32 %r25, %r4, %r25;
.loc 2 65 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB53_2;
BB53_6:
.loc 2 65 2
ret;
}
.visible .entry map2_s_v_sub_double(
.param .u32 map2_s_v_sub_double_param_0,
.param .u32 map2_s_v_sub_double_param_1,
.param .u64 map2_s_v_sub_double_param_2,
.param .u32 map2_s_v_sub_double_param_3,
.param .f64 map2_s_v_sub_double_param_4,
.param .u64 map2_s_v_sub_double_param_5,
.param .u32 map2_s_v_sub_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_sub_double_param_0];
ld.param.u32 %r13, [map2_s_v_sub_double_param_1];
ld.param.u64 %rd3, [map2_s_v_sub_double_param_2];
ld.param.u32 %r14, [map2_s_v_sub_double_param_3];
ld.param.f64 %fd1, [map2_s_v_sub_double_param_4];
ld.param.u64 %rd4, [map2_s_v_sub_double_param_5];
ld.param.u32 %r15, [map2_s_v_sub_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 65 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 65 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB54_6;
.loc 2 65 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 65 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 65 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB54_2:
.loc 2 65 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB54_5;
.loc 2 65 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB54_4:
.loc 2 65 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 65 1
ld.global.f64 %fd2, [%rd6];
sub.f64 %fd3, %fd1, %fd2;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 65 1
st.global.f64 [%rd8], %fd3;
.loc 2 65 22
add.s32 %r10, %r5, %r9;
.loc 2 65 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB54_4;
BB54_5:
.loc 2 65 22
add.s32 %r25, %r4, %r25;
.loc 2 65 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB54_2;
BB54_6:
.loc 2 65 2
ret;
}
.visible .entry map2_transpose_sub_double(
.param .u32 map2_transpose_sub_double_param_0,
.param .u32 map2_transpose_sub_double_param_1,
.param .u64 map2_transpose_sub_double_param_2,
.param .u32 map2_transpose_sub_double_param_3,
.param .u64 map2_transpose_sub_double_param_4,
.param .u32 map2_transpose_sub_double_param_5,
.param .u64 map2_transpose_sub_double_param_6,
.param .u32 map2_transpose_sub_double_param_7
)
{
.reg .pred %p<13>;
.reg .s32 %r<68>;
.reg .s64 %rd<23>;
.reg .f64 %fd<5>;
// demoted variable
.shared .align 8 .b8 map2_transpose_sub_double$__cuda_local_var_32581_1747_non_const_tile[8448];
ld.param.u32 %r27, [map2_transpose_sub_double_param_0];
ld.param.u32 %r28, [map2_transpose_sub_double_param_1];
ld.param.u64 %rd4, [map2_transpose_sub_double_param_2];
ld.param.u32 %r29, [map2_transpose_sub_double_param_3];
ld.param.u64 %rd5, [map2_transpose_sub_double_param_4];
ld.param.u32 %r30, [map2_transpose_sub_double_param_5];
ld.param.u64 %rd6, [map2_transpose_sub_double_param_6];
ld.param.u32 %r31, [map2_transpose_sub_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
cvta.to.global.u64 %rd3, %rd6;
.loc 2 65 1
mov.u32 %r1, %ntid.x;
mov.u32 %r32, %ctaid.y;
mov.u32 %r33, %ntid.y;
mul.lo.s32 %r62, %r32, %r33;
.loc 2 65 1
setp.ge.s32 %p2, %r62, %r28;
@%p2 bra BB55_15;
BB55_1:
.loc 2 65 1
mov.u32 %r35, %ctaid.x;
mul.lo.s32 %r63, %r35, %r1;
.loc 2 65 1
setp.ge.s32 %p3, %r63, %r27;
@%p3 bra BB55_14;
.loc 2 65 1
add.s32 %r37, %r62, 32;
.loc 4 2621 10
min.s32 %r4, %r28, %r37;
BB55_3:
.loc 2 65 1
add.s32 %r40, %r63, 32;
.loc 4 2621 10
min.s32 %r7, %r27, %r40;
.loc 2 65 1
mov.u32 %r41, %tid.y;
add.s32 %r64, %r41, %r63;
.loc 2 65 1
setp.ge.s32 %p4, %r64, %r7;
@%p4 bra BB55_8;
BB55_4:
.loc 2 65 1
mov.u32 %r42, %tid.x;
.loc 2 65 1
add.s32 %r65, %r42, %r62;
.loc 2 65 1
setp.ge.s32 %p5, %r65, %r4;
@%p5 bra BB55_7;
.loc 2 65 1
mul.lo.s32 %r10, %r64, %r31;
sub.s32 %r11, %r64, %r63;
BB55_6:
add.s32 %r45, %r65, %r10;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 65 1
sub.s32 %r46, %r65, %r62;
mul.wide.s32 %rd9, %r11, 264;
mov.u64 %rd10, map2_transpose_sub_double$__cuda_local_var_32581_1747_non_const_tile;
add.s64 %rd11, %rd10, %rd9;
mul.wide.s32 %rd12, %r46, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 65 1
ld.global.f64 %fd1, [%rd8];
st.shared.f64 [%rd13], %fd1;
.loc 2 65 58
add.s32 %r65, %r1, %r65;
.loc 2 65 1
setp.lt.s32 %p6, %r65, %r4;
@%p6 bra BB55_6;
BB55_7:
.loc 2 65 58
add.s32 %r64, %r33, %r64;
.loc 2 65 1
setp.lt.s32 %p7, %r64, %r7;
@%p7 bra BB55_4;
BB55_8:
.loc 2 65 1
add.s32 %r66, %r41, %r62;
setp.lt.s32 %p1, %r66, %r4;
.loc 2 65 1
bar.sync 0;
.loc 2 65 1
@!%p1 bra BB55_13;
bra.uni BB55_9;
BB55_9:
.loc 2 65 1
mov.u32 %r51, %tid.x;
add.s32 %r67, %r51, %r63;
.loc 2 65 1
setp.ge.s32 %p8, %r67, %r7;
@%p8 bra BB55_12;
.loc 2 65 1
mul.lo.s32 %r18, %r66, %r30;
sub.s32 %r19, %r66, %r62;
mul.lo.s32 %r20, %r66, %r29;
BB55_11:
add.s32 %r54, %r67, %r18;
mul.wide.s32 %rd14, %r54, 8;
add.s64 %rd15, %rd2, %rd14;
.loc 2 65 1
sub.s32 %r55, %r67, %r63;
mul.wide.s32 %rd16, %r55, 264;
mov.u64 %rd17, map2_transpose_sub_double$__cuda_local_var_32581_1747_non_const_tile;
add.s64 %rd18, %rd17, %rd16;
mul.wide.s32 %rd19, %r19, 8;
add.s64 %rd20, %rd18, %rd19;
.loc 2 65 1
ld.shared.f64 %fd2, [%rd20];
ld.global.f64 %fd3, [%rd15];
sub.f64 %fd4, %fd3, %fd2;
add.s32 %r56, %r67, %r20;
mul.wide.s32 %rd21, %r56, 8;
add.s64 %rd22, %rd1, %rd21;
.loc 2 65 1
st.global.f64 [%rd22], %fd4;
.loc 2 65 58
add.s32 %r67, %r1, %r67;
.loc 2 65 1
setp.lt.s32 %p9, %r67, %r7;
@%p9 bra BB55_11;
BB55_12:
.loc 2 65 58
add.s32 %r66, %r33, %r66;
.loc 2 65 1
setp.lt.s32 %p10, %r66, %r4;
@%p10 bra BB55_9;
BB55_13:
.loc 2 65 1
bar.sync 0;
.loc 2 65 1
mov.u32 %r59, %nctaid.x;
.loc 2 65 21
mad.lo.s32 %r63, %r59, %r1, %r63;
.loc 2 65 1
setp.lt.s32 %p11, %r63, %r27;
@%p11 bra BB55_3;
BB55_14:
.loc 2 65 1
mov.u32 %r60, %nctaid.y;
.loc 2 65 21
mad.lo.s32 %r62, %r60, %r33, %r62;
.loc 2 65 1
setp.lt.s32 %p12, %r62, %r28;
@%p12 bra BB55_1;
BB55_15:
.loc 2 65 2
ret;
}
.visible .entry map2_mul_double(
.param .u32 map2_mul_double_param_0,
.param .u32 map2_mul_double_param_1,
.param .u64 map2_mul_double_param_2,
.param .u32 map2_mul_double_param_3,
.param .u64 map2_mul_double_param_4,
.param .u32 map2_mul_double_param_5,
.param .u64 map2_mul_double_param_6,
.param .u32 map2_mul_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_mul_double_param_0];
ld.param.u32 %r14, [map2_mul_double_param_1];
ld.param.u64 %rd4, [map2_mul_double_param_2];
ld.param.u32 %r15, [map2_mul_double_param_3];
ld.param.u64 %rd5, [map2_mul_double_param_4];
ld.param.u32 %r16, [map2_mul_double_param_5];
ld.param.u64 %rd6, [map2_mul_double_param_6];
ld.param.u32 %r17, [map2_mul_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 2 66 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 2 66 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB56_6;
.loc 2 66 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 2 66 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 2 66 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB56_2:
.loc 2 66 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB56_5;
.loc 2 66 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB56_4:
.loc 2 66 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 66 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 2 66 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
mul.f64 %fd3, %fd2, %fd1;
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 2 66 1
st.global.f64 [%rd12], %fd3;
.loc 2 66 22
add.s32 %r11, %r5, %r10;
.loc 2 66 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB56_4;
BB56_5:
.loc 2 66 22
add.s32 %r28, %r4, %r28;
.loc 2 66 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB56_2;
BB56_6:
.loc 2 66 2
ret;
}
.visible .entry map2_v_s_mul_double(
.param .u32 map2_v_s_mul_double_param_0,
.param .u32 map2_v_s_mul_double_param_1,
.param .u64 map2_v_s_mul_double_param_2,
.param .u32 map2_v_s_mul_double_param_3,
.param .u64 map2_v_s_mul_double_param_4,
.param .u32 map2_v_s_mul_double_param_5,
.param .f64 map2_v_s_mul_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_mul_double_param_0];
ld.param.u32 %r13, [map2_v_s_mul_double_param_1];
ld.param.u64 %rd3, [map2_v_s_mul_double_param_2];
ld.param.u32 %r14, [map2_v_s_mul_double_param_3];
ld.param.u64 %rd4, [map2_v_s_mul_double_param_4];
ld.param.u32 %r15, [map2_v_s_mul_double_param_5];
ld.param.f64 %fd1, [map2_v_s_mul_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 66 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 66 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB57_6;
.loc 2 66 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 66 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 66 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB57_2:
.loc 2 66 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB57_5;
.loc 2 66 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB57_4:
.loc 2 66 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 66 1
ld.global.f64 %fd2, [%rd6];
mul.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 66 1
st.global.f64 [%rd8], %fd3;
.loc 2 66 22
add.s32 %r10, %r5, %r9;
.loc 2 66 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB57_4;
BB57_5:
.loc 2 66 22
add.s32 %r25, %r4, %r25;
.loc 2 66 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB57_2;
BB57_6:
.loc 2 66 2
ret;
}
.visible .entry map2_s_v_mul_double(
.param .u32 map2_s_v_mul_double_param_0,
.param .u32 map2_s_v_mul_double_param_1,
.param .u64 map2_s_v_mul_double_param_2,
.param .u32 map2_s_v_mul_double_param_3,
.param .f64 map2_s_v_mul_double_param_4,
.param .u64 map2_s_v_mul_double_param_5,
.param .u32 map2_s_v_mul_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_mul_double_param_0];
ld.param.u32 %r13, [map2_s_v_mul_double_param_1];
ld.param.u64 %rd3, [map2_s_v_mul_double_param_2];
ld.param.u32 %r14, [map2_s_v_mul_double_param_3];
ld.param.f64 %fd1, [map2_s_v_mul_double_param_4];
ld.param.u64 %rd4, [map2_s_v_mul_double_param_5];
ld.param.u32 %r15, [map2_s_v_mul_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 66 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 66 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB58_6;
.loc 2 66 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 66 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 66 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB58_2:
.loc 2 66 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB58_5;
.loc 2 66 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB58_4:
.loc 2 66 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 66 1
ld.global.f64 %fd2, [%rd6];
mul.f64 %fd3, %fd2, %fd1;
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 66 1
st.global.f64 [%rd8], %fd3;
.loc 2 66 22
add.s32 %r10, %r5, %r9;
.loc 2 66 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB58_4;
BB58_5:
.loc 2 66 22
add.s32 %r25, %r4, %r25;
.loc 2 66 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB58_2;
BB58_6:
.loc 2 66 2
ret;
}
.visible .entry map2_transpose_mul_double(
.param .u32 map2_transpose_mul_double_param_0,
.param .u32 map2_transpose_mul_double_param_1,
.param .u64 map2_transpose_mul_double_param_2,
.param .u32 map2_transpose_mul_double_param_3,
.param .u64 map2_transpose_mul_double_param_4,
.param .u32 map2_transpose_mul_double_param_5,
.param .u64 map2_transpose_mul_double_param_6,
.param .u32 map2_transpose_mul_double_param_7
)
{
.reg .pred %p<13>;
.reg .s32 %r<68>;
.reg .s64 %rd<23>;
.reg .f64 %fd<5>;
// demoted variable
.shared .align 8 .b8 map2_transpose_mul_double$__cuda_local_var_32582_1747_non_const_tile[8448];
ld.param.u32 %r27, [map2_transpose_mul_double_param_0];
ld.param.u32 %r28, [map2_transpose_mul_double_param_1];
ld.param.u64 %rd4, [map2_transpose_mul_double_param_2];
ld.param.u32 %r29, [map2_transpose_mul_double_param_3];
ld.param.u64 %rd5, [map2_transpose_mul_double_param_4];
ld.param.u32 %r30, [map2_transpose_mul_double_param_5];
ld.param.u64 %rd6, [map2_transpose_mul_double_param_6];
ld.param.u32 %r31, [map2_transpose_mul_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
cvta.to.global.u64 %rd3, %rd6;
.loc 2 66 1
mov.u32 %r1, %ntid.x;
mov.u32 %r32, %ctaid.y;
mov.u32 %r33, %ntid.y;
mul.lo.s32 %r62, %r32, %r33;
.loc 2 66 1
setp.ge.s32 %p2, %r62, %r28;
@%p2 bra BB59_15;
BB59_1:
.loc 2 66 1
mov.u32 %r35, %ctaid.x;
mul.lo.s32 %r63, %r35, %r1;
.loc 2 66 1
setp.ge.s32 %p3, %r63, %r27;
@%p3 bra BB59_14;
.loc 2 66 1
add.s32 %r37, %r62, 32;
.loc 4 2621 10
min.s32 %r4, %r28, %r37;
BB59_3:
.loc 2 66 1
add.s32 %r40, %r63, 32;
.loc 4 2621 10
min.s32 %r7, %r27, %r40;
.loc 2 66 1
mov.u32 %r41, %tid.y;
add.s32 %r64, %r41, %r63;
.loc 2 66 1
setp.ge.s32 %p4, %r64, %r7;
@%p4 bra BB59_8;
BB59_4:
.loc 2 66 1
mov.u32 %r42, %tid.x;
.loc 2 66 1
add.s32 %r65, %r42, %r62;
.loc 2 66 1
setp.ge.s32 %p5, %r65, %r4;
@%p5 bra BB59_7;
.loc 2 66 1
mul.lo.s32 %r10, %r64, %r31;
sub.s32 %r11, %r64, %r63;
BB59_6:
add.s32 %r45, %r65, %r10;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 66 1
sub.s32 %r46, %r65, %r62;
mul.wide.s32 %rd9, %r11, 264;
mov.u64 %rd10, map2_transpose_mul_double$__cuda_local_var_32582_1747_non_const_tile;
add.s64 %rd11, %rd10, %rd9;
mul.wide.s32 %rd12, %r46, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 66 1
ld.global.f64 %fd1, [%rd8];
st.shared.f64 [%rd13], %fd1;
.loc 2 66 58
add.s32 %r65, %r1, %r65;
.loc 2 66 1
setp.lt.s32 %p6, %r65, %r4;
@%p6 bra BB59_6;
BB59_7:
.loc 2 66 58
add.s32 %r64, %r33, %r64;
.loc 2 66 1
setp.lt.s32 %p7, %r64, %r7;
@%p7 bra BB59_4;
BB59_8:
.loc 2 66 1
add.s32 %r66, %r41, %r62;
setp.lt.s32 %p1, %r66, %r4;
.loc 2 66 1
bar.sync 0;
.loc 2 66 1
@!%p1 bra BB59_13;
bra.uni BB59_9;
BB59_9:
.loc 2 66 1
mov.u32 %r51, %tid.x;
add.s32 %r67, %r51, %r63;
.loc 2 66 1
setp.ge.s32 %p8, %r67, %r7;
@%p8 bra BB59_12;
.loc 2 66 1
mul.lo.s32 %r18, %r66, %r30;
sub.s32 %r19, %r66, %r62;
mul.lo.s32 %r20, %r66, %r29;
BB59_11:
add.s32 %r54, %r67, %r18;
mul.wide.s32 %rd14, %r54, 8;
add.s64 %rd15, %rd2, %rd14;
.loc 2 66 1
sub.s32 %r55, %r67, %r63;
mul.wide.s32 %rd16, %r55, 264;
mov.u64 %rd17, map2_transpose_mul_double$__cuda_local_var_32582_1747_non_const_tile;
add.s64 %rd18, %rd17, %rd16;
mul.wide.s32 %rd19, %r19, 8;
add.s64 %rd20, %rd18, %rd19;
.loc 2 66 1
ld.shared.f64 %fd2, [%rd20];
ld.global.f64 %fd3, [%rd15];
mul.f64 %fd4, %fd3, %fd2;
add.s32 %r56, %r67, %r20;
mul.wide.s32 %rd21, %r56, 8;
add.s64 %rd22, %rd1, %rd21;
.loc 2 66 1
st.global.f64 [%rd22], %fd4;
.loc 2 66 58
add.s32 %r67, %r1, %r67;
.loc 2 66 1
setp.lt.s32 %p9, %r67, %r7;
@%p9 bra BB59_11;
BB59_12:
.loc 2 66 58
add.s32 %r66, %r33, %r66;
.loc 2 66 1
setp.lt.s32 %p10, %r66, %r4;
@%p10 bra BB59_9;
BB59_13:
.loc 2 66 1
bar.sync 0;
.loc 2 66 1
mov.u32 %r59, %nctaid.x;
.loc 2 66 21
mad.lo.s32 %r63, %r59, %r1, %r63;
.loc 2 66 1
setp.lt.s32 %p11, %r63, %r27;
@%p11 bra BB59_3;
BB59_14:
.loc 2 66 1
mov.u32 %r60, %nctaid.y;
.loc 2 66 21
mad.lo.s32 %r62, %r60, %r33, %r62;
.loc 2 66 1
setp.lt.s32 %p12, %r62, %r28;
@%p12 bra BB59_1;
BB59_15:
.loc 2 66 2
ret;
}
.visible .entry map2_div_double(
.param .u32 map2_div_double_param_0,
.param .u32 map2_div_double_param_1,
.param .u64 map2_div_double_param_2,
.param .u32 map2_div_double_param_3,
.param .u64 map2_div_double_param_4,
.param .u32 map2_div_double_param_5,
.param .u64 map2_div_double_param_6,
.param .u32 map2_div_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_div_double_param_0];
ld.param.u32 %r14, [map2_div_double_param_1];
ld.param.u64 %rd4, [map2_div_double_param_2];
ld.param.u32 %r15, [map2_div_double_param_3];
ld.param.u64 %rd5, [map2_div_double_param_4];
ld.param.u32 %r16, [map2_div_double_param_5];
ld.param.u64 %rd6, [map2_div_double_param_6];
ld.param.u32 %r17, [map2_div_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 2 67 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 2 67 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB60_6;
.loc 2 67 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 2 67 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 2 67 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB60_2:
.loc 2 67 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB60_5;
.loc 2 67 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
.loc 2 67 51
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB60_4:
.loc 2 67 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 67 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 2 67 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
.loc 4 3614 3
div.rn.f64 %fd3, %fd2, %fd1;
.loc 2 67 51
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 2 67 51
st.global.f64 [%rd12], %fd3;
.loc 2 67 22
add.s32 %r11, %r5, %r10;
.loc 2 67 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB60_4;
BB60_5:
.loc 2 67 22
add.s32 %r28, %r4, %r28;
.loc 2 67 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB60_2;
BB60_6:
.loc 2 67 2
ret;
}
.visible .entry map2_v_s_div_double(
.param .u32 map2_v_s_div_double_param_0,
.param .u32 map2_v_s_div_double_param_1,
.param .u64 map2_v_s_div_double_param_2,
.param .u32 map2_v_s_div_double_param_3,
.param .u64 map2_v_s_div_double_param_4,
.param .u32 map2_v_s_div_double_param_5,
.param .f64 map2_v_s_div_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_div_double_param_0];
ld.param.u32 %r13, [map2_v_s_div_double_param_1];
ld.param.u64 %rd3, [map2_v_s_div_double_param_2];
ld.param.u32 %r14, [map2_v_s_div_double_param_3];
ld.param.u64 %rd4, [map2_v_s_div_double_param_4];
ld.param.u32 %r15, [map2_v_s_div_double_param_5];
ld.param.f64 %fd1, [map2_v_s_div_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 67 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 67 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB61_6;
.loc 2 67 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 67 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 67 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB61_2:
.loc 2 67 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB61_5;
.loc 2 67 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 67 51
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB61_4:
.loc 2 67 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 67 1
ld.global.f64 %fd2, [%rd6];
.loc 4 3614 3
div.rn.f64 %fd3, %fd2, %fd1;
.loc 2 67 51
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 67 51
st.global.f64 [%rd8], %fd3;
.loc 2 67 22
add.s32 %r10, %r5, %r9;
.loc 2 67 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB61_4;
BB61_5:
.loc 2 67 22
add.s32 %r25, %r4, %r25;
.loc 2 67 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB61_2;
BB61_6:
.loc 2 67 2
ret;
}
.visible .entry map2_s_v_div_double(
.param .u32 map2_s_v_div_double_param_0,
.param .u32 map2_s_v_div_double_param_1,
.param .u64 map2_s_v_div_double_param_2,
.param .u32 map2_s_v_div_double_param_3,
.param .f64 map2_s_v_div_double_param_4,
.param .u64 map2_s_v_div_double_param_5,
.param .u32 map2_s_v_div_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_div_double_param_0];
ld.param.u32 %r13, [map2_s_v_div_double_param_1];
ld.param.u64 %rd3, [map2_s_v_div_double_param_2];
ld.param.u32 %r14, [map2_s_v_div_double_param_3];
ld.param.f64 %fd1, [map2_s_v_div_double_param_4];
ld.param.u64 %rd4, [map2_s_v_div_double_param_5];
ld.param.u32 %r15, [map2_s_v_div_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 67 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 67 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB62_6;
.loc 2 67 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 67 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 67 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB62_2:
.loc 2 67 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB62_5;
.loc 2 67 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 67 51
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB62_4:
.loc 2 67 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 67 1
ld.global.f64 %fd2, [%rd6];
.loc 4 3614 3
div.rn.f64 %fd3, %fd1, %fd2;
.loc 2 67 51
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 67 51
st.global.f64 [%rd8], %fd3;
.loc 2 67 22
add.s32 %r10, %r5, %r9;
.loc 2 67 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB62_4;
BB62_5:
.loc 2 67 22
add.s32 %r25, %r4, %r25;
.loc 2 67 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB62_2;
BB62_6:
.loc 2 67 2
ret;
}
.visible .entry map2_transpose_div_double(
.param .u32 map2_transpose_div_double_param_0,
.param .u32 map2_transpose_div_double_param_1,
.param .u64 map2_transpose_div_double_param_2,
.param .u32 map2_transpose_div_double_param_3,
.param .u64 map2_transpose_div_double_param_4,
.param .u32 map2_transpose_div_double_param_5,
.param .u64 map2_transpose_div_double_param_6,
.param .u32 map2_transpose_div_double_param_7
)
{
.reg .pred %p<13>;
.reg .s32 %r<68>;
.reg .s64 %rd<23>;
.reg .f64 %fd<5>;
// demoted variable
.shared .align 8 .b8 map2_transpose_div_double$__cuda_local_var_32583_1747_non_const_tile[8448];
ld.param.u32 %r27, [map2_transpose_div_double_param_0];
ld.param.u32 %r28, [map2_transpose_div_double_param_1];
ld.param.u64 %rd4, [map2_transpose_div_double_param_2];
ld.param.u32 %r29, [map2_transpose_div_double_param_3];
ld.param.u64 %rd5, [map2_transpose_div_double_param_4];
ld.param.u32 %r30, [map2_transpose_div_double_param_5];
ld.param.u64 %rd6, [map2_transpose_div_double_param_6];
ld.param.u32 %r31, [map2_transpose_div_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
cvta.to.global.u64 %rd3, %rd6;
.loc 2 67 1
mov.u32 %r1, %ntid.x;
mov.u32 %r32, %ctaid.y;
mov.u32 %r33, %ntid.y;
mul.lo.s32 %r62, %r32, %r33;
.loc 2 67 1
setp.ge.s32 %p2, %r62, %r28;
@%p2 bra BB63_15;
BB63_1:
.loc 2 67 1
mov.u32 %r35, %ctaid.x;
mul.lo.s32 %r63, %r35, %r1;
.loc 2 67 1
setp.ge.s32 %p3, %r63, %r27;
@%p3 bra BB63_14;
.loc 2 67 1
add.s32 %r37, %r62, 32;
.loc 4 2621 10
min.s32 %r4, %r28, %r37;
BB63_3:
.loc 2 67 1
add.s32 %r40, %r63, 32;
.loc 4 2621 10
min.s32 %r7, %r27, %r40;
.loc 2 67 1
mov.u32 %r41, %tid.y;
add.s32 %r64, %r41, %r63;
.loc 2 67 1
setp.ge.s32 %p4, %r64, %r7;
@%p4 bra BB63_8;
BB63_4:
.loc 2 67 1
mov.u32 %r42, %tid.x;
.loc 2 67 1
add.s32 %r65, %r42, %r62;
.loc 2 67 1
setp.ge.s32 %p5, %r65, %r4;
@%p5 bra BB63_7;
.loc 2 67 1
mul.lo.s32 %r10, %r64, %r31;
sub.s32 %r11, %r64, %r63;
BB63_6:
add.s32 %r45, %r65, %r10;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 67 1
sub.s32 %r46, %r65, %r62;
mul.wide.s32 %rd9, %r11, 264;
mov.u64 %rd10, map2_transpose_div_double$__cuda_local_var_32583_1747_non_const_tile;
add.s64 %rd11, %rd10, %rd9;
mul.wide.s32 %rd12, %r46, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 67 1
ld.global.f64 %fd1, [%rd8];
st.shared.f64 [%rd13], %fd1;
.loc 2 67 58
add.s32 %r65, %r1, %r65;
.loc 2 67 1
setp.lt.s32 %p6, %r65, %r4;
@%p6 bra BB63_6;
BB63_7:
.loc 2 67 58
add.s32 %r64, %r33, %r64;
.loc 2 67 1
setp.lt.s32 %p7, %r64, %r7;
@%p7 bra BB63_4;
BB63_8:
.loc 2 67 1
add.s32 %r66, %r41, %r62;
setp.lt.s32 %p1, %r66, %r4;
.loc 2 67 1
bar.sync 0;
.loc 2 67 1
@!%p1 bra BB63_13;
bra.uni BB63_9;
BB63_9:
.loc 2 67 1
mov.u32 %r51, %tid.x;
add.s32 %r67, %r51, %r63;
.loc 2 67 1
setp.ge.s32 %p8, %r67, %r7;
@%p8 bra BB63_12;
.loc 2 67 1
mul.lo.s32 %r18, %r66, %r30;
sub.s32 %r19, %r66, %r62;
.loc 2 67 47
mul.lo.s32 %r20, %r66, %r29;
BB63_11:
.loc 2 67 1
add.s32 %r54, %r67, %r18;
mul.wide.s32 %rd14, %r54, 8;
add.s64 %rd15, %rd2, %rd14;
.loc 2 67 1
sub.s32 %r55, %r67, %r63;
mul.wide.s32 %rd16, %r55, 264;
mov.u64 %rd17, map2_transpose_div_double$__cuda_local_var_32583_1747_non_const_tile;
add.s64 %rd18, %rd17, %rd16;
mul.wide.s32 %rd19, %r19, 8;
add.s64 %rd20, %rd18, %rd19;
.loc 2 67 1
ld.shared.f64 %fd2, [%rd20];
ld.global.f64 %fd3, [%rd15];
.loc 4 3614 3
div.rn.f64 %fd4, %fd3, %fd2;
.loc 2 67 47
add.s32 %r56, %r67, %r20;
mul.wide.s32 %rd21, %r56, 8;
add.s64 %rd22, %rd1, %rd21;
.loc 2 67 47
st.global.f64 [%rd22], %fd4;
.loc 2 67 58
add.s32 %r67, %r1, %r67;
.loc 2 67 1
setp.lt.s32 %p9, %r67, %r7;
@%p9 bra BB63_11;
BB63_12:
.loc 2 67 58
add.s32 %r66, %r33, %r66;
.loc 2 67 1
setp.lt.s32 %p10, %r66, %r4;
@%p10 bra BB63_9;
BB63_13:
.loc 2 67 1
bar.sync 0;
.loc 2 67 1
mov.u32 %r59, %nctaid.x;
.loc 2 67 21
mad.lo.s32 %r63, %r59, %r1, %r63;
.loc 2 67 1
setp.lt.s32 %p11, %r63, %r27;
@%p11 bra BB63_3;
BB63_14:
.loc 2 67 1
mov.u32 %r60, %nctaid.y;
.loc 2 67 21
mad.lo.s32 %r62, %r60, %r33, %r62;
.loc 2 67 1
setp.lt.s32 %p12, %r62, %r28;
@%p12 bra BB63_1;
BB63_15:
.loc 2 67 2
ret;
}
.visible .entry map2_mod_double(
.param .u32 map2_mod_double_param_0,
.param .u32 map2_mod_double_param_1,
.param .u64 map2_mod_double_param_2,
.param .u32 map2_mod_double_param_3,
.param .u64 map2_mod_double_param_4,
.param .u32 map2_mod_double_param_5,
.param .u64 map2_mod_double_param_6,
.param .u32 map2_mod_double_param_7
)
{
.reg .pred %p<18>;
.reg .s32 %r<66>;
.reg .s64 %rd<33>;
.reg .f64 %fd<20>;
ld.param.u32 %r16, [map2_mod_double_param_0];
ld.param.u32 %r17, [map2_mod_double_param_1];
ld.param.u64 %rd9, [map2_mod_double_param_2];
ld.param.u32 %r18, [map2_mod_double_param_3];
ld.param.u64 %rd10, [map2_mod_double_param_4];
ld.param.u32 %r19, [map2_mod_double_param_5];
ld.param.u64 %rd11, [map2_mod_double_param_6];
ld.param.u32 %r20, [map2_mod_double_param_7];
.loc 2 68 1
mov.u32 %r21, %ntid.x;
mov.u32 %r22, %ctaid.x;
mov.u32 %r23, %tid.x;
mad.lo.s32 %r61, %r21, %r22, %r23;
.loc 2 68 1
setp.ge.s32 %p1, %r61, %r17;
@%p1 bra BB64_19;
cvta.to.global.u64 %rd12, %rd10;
cvta.to.global.u64 %rd13, %rd11;
cvta.to.global.u64 %rd28, %rd9;
BB64_2:
.loc 2 68 1
mov.u32 %r24, %ctaid.y;
mov.u32 %r25, %ntid.y;
mov.u32 %r26, %tid.y;
mad.lo.s32 %r62, %r25, %r24, %r26;
.loc 2 68 1
setp.ge.s32 %p2, %r62, %r16;
@%p2 bra BB64_18;
BB64_3:
.loc 2 68 1
mad.lo.s32 %r31, %r61, %r19, %r62;
mul.wide.s32 %rd14, %r31, 8;
add.s64 %rd15, %rd12, %rd14;
.loc 2 68 1
mad.lo.s32 %r32, %r61, %r20, %r62;
mul.wide.s32 %rd16, %r32, 8;
add.s64 %rd17, %rd13, %rd16;
.loc 2 68 1
ld.global.f64 %fd19, [%rd15];
.loc 3 458 10
abs.f64 %fd17, %fd19;
.loc 2 68 1
ld.global.f64 %fd3, [%rd17];
.loc 3 458 10
abs.f64 %fd18, %fd3;
setp.gtu.f64 %p3, %fd17, 0d7FF0000000000000;
setp.gtu.f64 %p4, %fd18, 0d7FF0000000000000;
or.pred %p5, %p3, %p4;
.loc 3 458 10
@%p5 bra BB64_16;
setp.eq.f64 %p6, %fd17, 0d7FF0000000000000;
setp.eq.f64 %p7, %fd18, 0d0000000000000000;
or.pred %p8, %p6, %p7;
@!%p8 bra BB64_6;
bra.uni BB64_5;
BB64_5:
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB64_17;
BB64_6:
.loc 3 458 10
setp.ltu.f64 %p9, %fd17, %fd18;
@%p9 bra BB64_17;
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd17;
}
shr.s32 %r63, %r33, 20;
{
.reg .b32 %temp;
mov.b64 {%temp, %r34}, %fd18;
}
shr.s32 %r64, %r34, 20;
setp.gt.s32 %p10, %r63, 0;
@%p10 bra BB64_9;
mul.f64 %fd17, %fd17, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r35}, %fd17;
}
shr.s32 %r36, %r35, 20;
add.s32 %r37, %r63, %r36;
add.s32 %r63, %r37, -54;
BB64_9:
setp.gt.s32 %p11, %r64, 0;
@%p11 bra BB64_11;
mul.f64 %fd18, %fd18, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r38}, %fd18;
}
shr.s32 %r39, %r38, 20;
add.s32 %r40, %r64, %r39;
add.s32 %r64, %r40, -54;
BB64_11:
mov.b64 %rd18, %fd17;
and.b64 %rd19, %rd18, 4503599627370495;
or.b64 %rd31, %rd19, 4503599627370496;
mov.b64 %rd20, %fd18;
and.b64 %rd21, %rd20, 4503599627370495;
or.b64 %rd2, %rd21, 4503599627370496;
add.s32 %r41, %r63, 1;
sub.s32 %r65, %r41, %r64;
BB64_12:
sub.s64 %rd22, %rd31, %rd2;
mov.b64 %fd12, %rd22;
{
.reg .b32 %temp;
mov.b64 {%temp, %r42}, %fd12;
}
setp.lt.s32 %p12, %r42, 0;
selp.b64 %rd4, %rd31, %rd22, %p12;
shl.b64 %rd31, %rd4, 1;
add.s32 %r65, %r65, -1;
setp.gt.s32 %p13, %r65, 0;
@%p13 bra BB64_12;
and.b64 %rd32, %rd4, 9223372036854775807;
setp.eq.s64 %p14, %rd32, 0;
@%p14 bra BB64_15;
mov.b64 %fd13, %rd32;
mul.f64 %fd14, %fd13, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r43}, %fd14;
}
shr.s32 %r44, %r43, 20;
mov.u32 %r45, 55;
.loc 3 458 10
sub.s32 %r46, %r45, %r44;
sub.s32 %r47, %r64, %r46;
shl.b64 %rd23, %rd32, %r46;
setp.lt.s32 %p15, %r47, 1;
mov.u32 %r48, 1;
.loc 3 458 10
sub.s32 %r49, %r48, %r47;
shr.u64 %rd24, %rd23, %r49;
add.s32 %r50, %r47, 4095;
cvt.u64.u32 %rd25, %r50;
shl.b64 %rd26, %rd25, 52;
add.s64 %rd27, %rd26, %rd23;
selp.b64 %rd32, %rd24, %rd27, %p15;
BB64_15:
mov.b64 %fd15, %rd32;
{
.reg .b32 %temp;
mov.b64 {%r51, %temp}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r52}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r53}, %fd19;
}
and.b32 %r54, %r53, -2147483648;
or.b32 %r55, %r52, %r54;
mov.b64 %fd19, {%r51, %r55};
bra.uni BB64_17;
BB64_16:
.loc 3 458 10
add.f64 %fd19, %fd19, %fd3;
BB64_17:
.loc 2 68 145
mad.lo.s32 %r56, %r61, %r18, %r62;
mul.wide.s32 %rd29, %r56, 8;
add.s64 %rd30, %rd28, %rd29;
.loc 2 68 145
st.global.f64 [%rd30], %fd19;
.loc 2 68 22
mov.u32 %r58, %nctaid.y;
mad.lo.s32 %r62, %r58, %r25, %r62;
.loc 2 68 1
setp.lt.s32 %p16, %r62, %r16;
@%p16 bra BB64_3;
BB64_18:
.loc 2 68 22
mov.u32 %r59, %nctaid.x;
mad.lo.s32 %r61, %r59, %r21, %r61;
.loc 2 68 1
setp.lt.s32 %p17, %r61, %r17;
@%p17 bra BB64_2;
BB64_19:
.loc 2 68 2
ret;
}
.visible .entry map2_v_s_mod_double(
.param .u32 map2_v_s_mod_double_param_0,
.param .u32 map2_v_s_mod_double_param_1,
.param .u64 map2_v_s_mod_double_param_2,
.param .u32 map2_v_s_mod_double_param_3,
.param .u64 map2_v_s_mod_double_param_4,
.param .u32 map2_v_s_mod_double_param_5,
.param .f64 map2_v_s_mod_double_param_6
)
{
.reg .pred %p<18>;
.reg .s32 %r<64>;
.reg .s64 %rd<29>;
.reg .f64 %fd<20>;
ld.param.u32 %r16, [map2_v_s_mod_double_param_0];
ld.param.u32 %r17, [map2_v_s_mod_double_param_1];
ld.param.u64 %rd9, [map2_v_s_mod_double_param_2];
ld.param.u32 %r18, [map2_v_s_mod_double_param_3];
ld.param.u64 %rd10, [map2_v_s_mod_double_param_4];
ld.param.u32 %r19, [map2_v_s_mod_double_param_5];
ld.param.f64 %fd11, [map2_v_s_mod_double_param_6];
.loc 2 68 1
mov.u32 %r20, %ntid.x;
mov.u32 %r21, %ctaid.x;
mov.u32 %r22, %tid.x;
mad.lo.s32 %r59, %r20, %r21, %r22;
.loc 2 68 1
setp.ge.s32 %p1, %r59, %r17;
@%p1 bra BB65_20;
cvta.to.global.u64 %rd11, %rd10;
cvta.to.global.u64 %rd24, %rd9;
BB65_2:
.loc 2 68 1
mov.u32 %r23, %ctaid.y;
mov.u32 %r24, %ntid.y;
mov.u32 %r25, %tid.y;
mad.lo.s32 %r60, %r24, %r23, %r25;
.loc 2 68 1
setp.ge.s32 %p2, %r60, %r16;
@%p2 bra BB65_19;
.loc 3 458 10
abs.f64 %fd1, %fd11;
mul.f64 %fd2, %fd1, 0d4350000000000000;
BB65_4:
.loc 2 68 1
mad.lo.s32 %r30, %r59, %r19, %r60;
mul.wide.s32 %rd12, %r30, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 68 1
ld.global.f64 %fd19, [%rd13];
.loc 3 458 10
abs.f64 %fd17, %fd19;
setp.gtu.f64 %p3, %fd17, 0d7FF0000000000000;
setp.gtu.f64 %p4, %fd1, 0d7FF0000000000000;
or.pred %p5, %p3, %p4;
.loc 3 458 10
@%p5 bra BB65_17;
setp.eq.f64 %p6, %fd1, 0d0000000000000000;
.loc 3 458 10
setp.eq.f64 %p7, %fd17, 0d7FF0000000000000;
or.pred %p8, %p7, %p6;
@!%p8 bra BB65_7;
bra.uni BB65_6;
BB65_6:
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB65_18;
BB65_7:
.loc 3 458 10
setp.ltu.f64 %p9, %fd17, %fd1;
@%p9 bra BB65_18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r31}, %fd17;
}
shr.s32 %r61, %r31, 20;
{
.reg .b32 %temp;
mov.b64 {%temp, %r32}, %fd1;
}
shr.s32 %r62, %r32, 20;
setp.gt.s32 %p10, %r61, 0;
@%p10 bra BB65_10;
mul.f64 %fd17, %fd17, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd17;
}
shr.s32 %r34, %r33, 20;
add.s32 %r35, %r61, %r34;
add.s32 %r61, %r35, -54;
BB65_10:
setp.gt.s32 %p11, %r62, 0;
mov.f64 %fd18, %fd1;
@%p11 bra BB65_12;
{
.reg .b32 %temp;
mov.b64 {%temp, %r36}, %fd2;
}
shr.s32 %r37, %r36, 20;
add.s32 %r38, %r62, %r37;
add.s32 %r62, %r38, -54;
mov.f64 %fd18, %fd2;
BB65_12:
.loc 3 458 10
mov.f64 %fd7, %fd18;
mov.b64 %rd14, %fd17;
and.b64 %rd15, %rd14, 4503599627370495;
or.b64 %rd27, %rd15, 4503599627370496;
mov.b64 %rd16, %fd7;
and.b64 %rd17, %rd16, 4503599627370495;
or.b64 %rd2, %rd17, 4503599627370496;
add.s32 %r39, %r61, 1;
sub.s32 %r63, %r39, %r62;
BB65_13:
sub.s64 %rd18, %rd27, %rd2;
mov.b64 %fd12, %rd18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd12;
}
setp.lt.s32 %p12, %r40, 0;
selp.b64 %rd4, %rd27, %rd18, %p12;
shl.b64 %rd27, %rd4, 1;
add.s32 %r63, %r63, -1;
setp.gt.s32 %p13, %r63, 0;
@%p13 bra BB65_13;
and.b64 %rd28, %rd4, 9223372036854775807;
setp.eq.s64 %p14, %rd28, 0;
@%p14 bra BB65_16;
mov.b64 %fd13, %rd28;
mul.f64 %fd14, %fd13, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r41}, %fd14;
}
shr.s32 %r42, %r41, 20;
mov.u32 %r43, 55;
.loc 3 458 10
sub.s32 %r44, %r43, %r42;
sub.s32 %r45, %r62, %r44;
shl.b64 %rd19, %rd28, %r44;
setp.lt.s32 %p15, %r45, 1;
mov.u32 %r46, 1;
.loc 3 458 10
sub.s32 %r47, %r46, %r45;
shr.u64 %rd20, %rd19, %r47;
add.s32 %r48, %r45, 4095;
cvt.u64.u32 %rd21, %r48;
shl.b64 %rd22, %rd21, 52;
add.s64 %rd23, %rd22, %rd19;
selp.b64 %rd28, %rd20, %rd23, %p15;
BB65_16:
mov.b64 %fd15, %rd28;
{
.reg .b32 %temp;
mov.b64 {%r49, %temp}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r50}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd19;
}
and.b32 %r52, %r51, -2147483648;
or.b32 %r53, %r50, %r52;
mov.b64 %fd19, {%r49, %r53};
bra.uni BB65_18;
BB65_17:
.loc 3 458 10
add.f64 %fd19, %fd19, %fd11;
BB65_18:
.loc 2 68 89
mad.lo.s32 %r54, %r59, %r18, %r60;
mul.wide.s32 %rd25, %r54, 8;
add.s64 %rd26, %rd24, %rd25;
.loc 2 68 89
st.global.f64 [%rd26], %fd19;
.loc 2 68 22
mov.u32 %r56, %nctaid.y;
mad.lo.s32 %r60, %r56, %r24, %r60;
.loc 2 68 1
setp.lt.s32 %p16, %r60, %r16;
@%p16 bra BB65_4;
BB65_19:
.loc 2 68 22
mov.u32 %r57, %nctaid.x;
mad.lo.s32 %r59, %r57, %r20, %r59;
.loc 2 68 1
setp.lt.s32 %p17, %r59, %r17;
@%p17 bra BB65_2;
BB65_20:
.loc 2 68 2
ret;
}
.visible .entry map2_s_v_mod_double(
.param .u32 map2_s_v_mod_double_param_0,
.param .u32 map2_s_v_mod_double_param_1,
.param .u64 map2_s_v_mod_double_param_2,
.param .u32 map2_s_v_mod_double_param_3,
.param .f64 map2_s_v_mod_double_param_4,
.param .u64 map2_s_v_mod_double_param_5,
.param .u32 map2_s_v_mod_double_param_6
)
{
.reg .pred %p<18>;
.reg .s32 %r<64>;
.reg .s64 %rd<29>;
.reg .f64 %fd<20>;
ld.param.u32 %r16, [map2_s_v_mod_double_param_0];
ld.param.u32 %r17, [map2_s_v_mod_double_param_1];
ld.param.u64 %rd9, [map2_s_v_mod_double_param_2];
ld.param.u32 %r18, [map2_s_v_mod_double_param_3];
ld.param.f64 %fd11, [map2_s_v_mod_double_param_4];
ld.param.u64 %rd10, [map2_s_v_mod_double_param_5];
ld.param.u32 %r19, [map2_s_v_mod_double_param_6];
.loc 2 68 1
mov.u32 %r20, %ntid.x;
mov.u32 %r21, %ctaid.x;
mov.u32 %r22, %tid.x;
mad.lo.s32 %r59, %r20, %r21, %r22;
.loc 2 68 1
setp.ge.s32 %p1, %r59, %r17;
@%p1 bra BB66_20;
cvta.to.global.u64 %rd11, %rd10;
cvta.to.global.u64 %rd24, %rd9;
BB66_2:
.loc 2 68 1
mov.u32 %r23, %ctaid.y;
mov.u32 %r24, %ntid.y;
mov.u32 %r25, %tid.y;
mad.lo.s32 %r60, %r24, %r23, %r25;
.loc 2 68 1
setp.ge.s32 %p2, %r60, %r16;
@%p2 bra BB66_19;
.loc 3 458 10
abs.f64 %fd1, %fd11;
mul.f64 %fd2, %fd1, 0d4350000000000000;
BB66_4:
.loc 2 68 1
mad.lo.s32 %r30, %r59, %r19, %r60;
mul.wide.s32 %rd12, %r30, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 68 1
ld.global.f64 %fd3, [%rd13];
.loc 3 458 10
abs.f64 %fd18, %fd3;
setp.gtu.f64 %p3, %fd18, 0d7FF0000000000000;
setp.gtu.f64 %p4, %fd1, 0d7FF0000000000000;
or.pred %p5, %p4, %p3;
.loc 3 458 10
@%p5 bra BB66_17;
setp.eq.f64 %p6, %fd1, 0d7FF0000000000000;
.loc 3 458 10
setp.eq.f64 %p7, %fd18, 0d0000000000000000;
or.pred %p8, %p6, %p7;
@!%p8 bra BB66_7;
bra.uni BB66_6;
BB66_6:
mov.f64 %fd16, 0dFFF8000000000000;
.loc 3 458 10
mov.f64 %fd19, %fd16;
bra.uni BB66_18;
BB66_7:
.loc 3 458 10
setp.ltu.f64 %p9, %fd1, %fd18;
mov.f64 %fd19, %fd11;
@%p9 bra BB66_18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r31}, %fd1;
}
shr.s32 %r61, %r31, 20;
{
.reg .b32 %temp;
mov.b64 {%temp, %r32}, %fd18;
}
shr.s32 %r62, %r32, 20;
setp.gt.s32 %p10, %r61, 0;
mov.f64 %fd17, %fd1;
@%p10 bra BB66_10;
{
.reg .b32 %temp;
mov.b64 {%temp, %r33}, %fd2;
}
shr.s32 %r34, %r33, 20;
add.s32 %r35, %r61, %r34;
add.s32 %r61, %r35, -54;
mov.f64 %fd17, %fd2;
BB66_10:
.loc 3 458 10
mov.f64 %fd5, %fd17;
setp.gt.s32 %p11, %r62, 0;
@%p11 bra BB66_12;
mul.f64 %fd18, %fd18, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r36}, %fd18;
}
shr.s32 %r37, %r36, 20;
add.s32 %r38, %r62, %r37;
add.s32 %r62, %r38, -54;
BB66_12:
mov.b64 %rd14, %fd5;
and.b64 %rd15, %rd14, 4503599627370495;
or.b64 %rd27, %rd15, 4503599627370496;
mov.b64 %rd16, %fd18;
and.b64 %rd17, %rd16, 4503599627370495;
or.b64 %rd2, %rd17, 4503599627370496;
add.s32 %r39, %r61, 1;
sub.s32 %r63, %r39, %r62;
BB66_13:
sub.s64 %rd18, %rd27, %rd2;
mov.b64 %fd12, %rd18;
{
.reg .b32 %temp;
mov.b64 {%temp, %r40}, %fd12;
}
setp.lt.s32 %p12, %r40, 0;
selp.b64 %rd4, %rd27, %rd18, %p12;
shl.b64 %rd27, %rd4, 1;
add.s32 %r63, %r63, -1;
setp.gt.s32 %p13, %r63, 0;
@%p13 bra BB66_13;
and.b64 %rd28, %rd4, 9223372036854775807;
setp.eq.s64 %p14, %rd28, 0;
@%p14 bra BB66_16;
mov.b64 %fd13, %rd28;
mul.f64 %fd14, %fd13, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r41}, %fd14;
}
shr.s32 %r42, %r41, 20;
mov.u32 %r43, 55;
.loc 3 458 10
sub.s32 %r44, %r43, %r42;
sub.s32 %r45, %r62, %r44;
shl.b64 %rd19, %rd28, %r44;
setp.lt.s32 %p15, %r45, 1;
mov.u32 %r46, 1;
.loc 3 458 10
sub.s32 %r47, %r46, %r45;
shr.u64 %rd20, %rd19, %r47;
add.s32 %r48, %r45, 4095;
cvt.u64.u32 %rd21, %r48;
shl.b64 %rd22, %rd21, 52;
add.s64 %rd23, %rd22, %rd19;
selp.b64 %rd28, %rd20, %rd23, %p15;
BB66_16:
mov.b64 %fd15, %rd28;
{
.reg .b32 %temp;
mov.b64 {%r49, %temp}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r50}, %fd15;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r51}, %fd11;
}
and.b32 %r52, %r51, -2147483648;
or.b32 %r53, %r50, %r52;
mov.b64 %fd8, {%r49, %r53};
mov.f64 %fd19, %fd8;
bra.uni BB66_18;
BB66_17:
.loc 3 458 10
add.f64 %fd9, %fd3, %fd11;
mov.f64 %fd19, %fd9;
BB66_18:
.loc 3 458 10
mov.f64 %fd10, %fd19;
.loc 2 68 89
mad.lo.s32 %r54, %r59, %r18, %r60;
mul.wide.s32 %rd25, %r54, 8;
add.s64 %rd26, %rd24, %rd25;
.loc 2 68 89
st.global.f64 [%rd26], %fd10;
.loc 2 68 22
mov.u32 %r56, %nctaid.y;
mad.lo.s32 %r60, %r56, %r24, %r60;
.loc 2 68 1
setp.lt.s32 %p16, %r60, %r16;
@%p16 bra BB66_4;
BB66_19:
.loc 2 68 22
mov.u32 %r57, %nctaid.x;
mad.lo.s32 %r59, %r57, %r20, %r59;
.loc 2 68 1
setp.lt.s32 %p17, %r59, %r17;
@%p17 bra BB66_2;
BB66_20:
.loc 2 68 2
ret;
}
.visible .entry map2_transpose_mod_double(
.param .u32 map2_transpose_mod_double_param_0,
.param .u32 map2_transpose_mod_double_param_1,
.param .u64 map2_transpose_mod_double_param_2,
.param .u32 map2_transpose_mod_double_param_3,
.param .u64 map2_transpose_mod_double_param_4,
.param .u32 map2_transpose_mod_double_param_5,
.param .u64 map2_transpose_mod_double_param_6,
.param .u32 map2_transpose_mod_double_param_7
)
{
.reg .pred %p<26>;
.reg .s32 %r<103>;
.reg .s64 %rd<43>;
.reg .f64 %fd<21>;
// demoted variable
.shared .align 8 .b8 map2_transpose_mod_double$__cuda_local_var_32584_1747_non_const_tile[8448];
ld.param.u32 %r36, [map2_transpose_mod_double_param_0];
ld.param.u32 %r37, [map2_transpose_mod_double_param_1];
ld.param.u64 %rd12, [map2_transpose_mod_double_param_2];
ld.param.u32 %r38, [map2_transpose_mod_double_param_3];
ld.param.u64 %rd13, [map2_transpose_mod_double_param_4];
ld.param.u32 %r39, [map2_transpose_mod_double_param_5];
ld.param.u64 %rd14, [map2_transpose_mod_double_param_6];
ld.param.u32 %r40, [map2_transpose_mod_double_param_7];
cvta.to.global.u64 %rd1, %rd12;
cvta.to.global.u64 %rd2, %rd13;
cvta.to.global.u64 %rd3, %rd14;
.loc 2 68 1
mov.u32 %r1, %ntid.x;
mov.u32 %r41, %ctaid.y;
mov.u32 %r42, %ntid.y;
mul.lo.s32 %r94, %r41, %r42;
.loc 2 68 1
setp.ge.s32 %p2, %r94, %r37;
@%p2 bra BB67_29;
BB67_1:
.loc 2 68 1
mov.u32 %r44, %ctaid.x;
mul.lo.s32 %r95, %r44, %r1;
.loc 2 68 1
setp.ge.s32 %p3, %r95, %r36;
@%p3 bra BB67_28;
.loc 2 68 1
add.s32 %r46, %r94, 32;
.loc 4 2621 10
min.s32 %r4, %r37, %r46;
BB67_3:
.loc 2 68 1
add.s32 %r49, %r95, 32;
.loc 4 2621 10
min.s32 %r7, %r36, %r49;
.loc 2 68 1
mov.u32 %r50, %tid.y;
add.s32 %r96, %r50, %r95;
.loc 2 68 1
setp.ge.s32 %p4, %r96, %r7;
@%p4 bra BB67_8;
BB67_4:
.loc 2 68 1
mov.u32 %r51, %tid.x;
.loc 2 68 1
add.s32 %r97, %r51, %r94;
.loc 2 68 1
setp.ge.s32 %p5, %r97, %r4;
@%p5 bra BB67_7;
.loc 2 68 1
mul.lo.s32 %r10, %r96, %r40;
sub.s32 %r11, %r96, %r95;
BB67_6:
add.s32 %r54, %r97, %r10;
mul.wide.s32 %rd15, %r54, 8;
add.s64 %rd16, %rd3, %rd15;
.loc 2 68 1
sub.s32 %r55, %r97, %r94;
mul.wide.s32 %rd17, %r11, 264;
mov.u64 %rd18, map2_transpose_mod_double$__cuda_local_var_32584_1747_non_const_tile;
add.s64 %rd19, %rd18, %rd17;
mul.wide.s32 %rd20, %r55, 8;
add.s64 %rd21, %rd19, %rd20;
.loc 2 68 1
ld.global.f64 %fd12, [%rd16];
st.shared.f64 [%rd21], %fd12;
.loc 2 68 58
add.s32 %r97, %r1, %r97;
.loc 2 68 1
setp.lt.s32 %p6, %r97, %r4;
@%p6 bra BB67_6;
BB67_7:
.loc 2 68 58
add.s32 %r96, %r42, %r96;
.loc 2 68 1
setp.lt.s32 %p7, %r96, %r7;
@%p7 bra BB67_4;
BB67_8:
.loc 2 68 1
add.s32 %r98, %r50, %r94;
setp.lt.s32 %p1, %r98, %r4;
.loc 2 68 1
bar.sync 0;
.loc 2 68 1
@!%p1 bra BB67_27;
bra.uni BB67_9;
BB67_9:
.loc 2 68 1
mov.u32 %r60, %tid.x;
add.s32 %r99, %r60, %r95;
.loc 2 68 1
setp.ge.s32 %p8, %r99, %r7;
@%p8 bra BB67_26;
.loc 2 68 1
mul.lo.s32 %r18, %r98, %r39;
sub.s32 %r19, %r98, %r94;
.loc 2 68 174
mul.lo.s32 %r20, %r98, %r38;
BB67_11:
.loc 2 68 1
add.s32 %r63, %r99, %r18;
mul.wide.s32 %rd22, %r63, 8;
add.s64 %rd23, %rd2, %rd22;
.loc 2 68 1
sub.s32 %r64, %r99, %r95;
mul.wide.s32 %rd24, %r64, 264;
mov.u64 %rd25, map2_transpose_mod_double$__cuda_local_var_32584_1747_non_const_tile;
add.s64 %rd26, %rd25, %rd24;
mul.wide.s32 %rd27, %r19, 8;
add.s64 %rd28, %rd26, %rd27;
.loc 2 68 1
ld.global.f64 %fd20, [%rd23];
.loc 3 458 10
abs.f64 %fd18, %fd20;
.loc 2 68 1
ld.shared.f64 %fd3, [%rd28];
.loc 3 458 10
abs.f64 %fd19, %fd3;
setp.gtu.f64 %p9, %fd18, 0d7FF0000000000000;
setp.gtu.f64 %p10, %fd19, 0d7FF0000000000000;
or.pred %p11, %p9, %p10;
.loc 3 458 10
@%p11 bra BB67_24;
setp.eq.f64 %p12, %fd18, 0d7FF0000000000000;
setp.eq.f64 %p13, %fd19, 0d0000000000000000;
or.pred %p14, %p12, %p13;
@!%p14 bra BB67_14;
bra.uni BB67_13;
BB67_13:
mov.f64 %fd20, 0dFFF8000000000000;
bra.uni BB67_25;
BB67_14:
.loc 3 458 10
setp.ltu.f64 %p15, %fd18, %fd19;
@%p15 bra BB67_25;
{
.reg .b32 %temp;
mov.b64 {%temp, %r65}, %fd18;
}
shr.s32 %r100, %r65, 20;
{
.reg .b32 %temp;
mov.b64 {%temp, %r66}, %fd19;
}
shr.s32 %r101, %r66, 20;
setp.gt.s32 %p16, %r100, 0;
@%p16 bra BB67_17;
mul.f64 %fd18, %fd18, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r67}, %fd18;
}
shr.s32 %r68, %r67, 20;
add.s32 %r69, %r100, %r68;
add.s32 %r100, %r69, -54;
BB67_17:
setp.gt.s32 %p17, %r101, 0;
@%p17 bra BB67_19;
mul.f64 %fd19, %fd19, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r70}, %fd19;
}
shr.s32 %r71, %r70, 20;
add.s32 %r72, %r101, %r71;
add.s32 %r101, %r72, -54;
BB67_19:
mov.b64 %rd29, %fd18;
and.b64 %rd30, %rd29, 4503599627370495;
or.b64 %rd41, %rd30, 4503599627370496;
mov.b64 %rd31, %fd19;
and.b64 %rd32, %rd31, 4503599627370495;
or.b64 %rd5, %rd32, 4503599627370496;
add.s32 %r73, %r100, 1;
sub.s32 %r102, %r73, %r101;
BB67_20:
sub.s64 %rd33, %rd41, %rd5;
mov.b64 %fd13, %rd33;
{
.reg .b32 %temp;
mov.b64 {%temp, %r74}, %fd13;
}
setp.lt.s32 %p18, %r74, 0;
selp.b64 %rd7, %rd41, %rd33, %p18;
shl.b64 %rd41, %rd7, 1;
add.s32 %r102, %r102, -1;
setp.gt.s32 %p19, %r102, 0;
@%p19 bra BB67_20;
and.b64 %rd42, %rd7, 9223372036854775807;
setp.eq.s64 %p20, %rd42, 0;
@%p20 bra BB67_23;
mov.b64 %fd14, %rd42;
mul.f64 %fd15, %fd14, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r75}, %fd15;
}
shr.s32 %r76, %r75, 20;
mov.u32 %r77, 55;
.loc 3 458 10
sub.s32 %r78, %r77, %r76;
sub.s32 %r79, %r101, %r78;
shl.b64 %rd34, %rd42, %r78;
setp.lt.s32 %p21, %r79, 1;
mov.u32 %r80, 1;
.loc 3 458 10
sub.s32 %r81, %r80, %r79;
shr.u64 %rd35, %rd34, %r81;
add.s32 %r82, %r79, 4095;
cvt.u64.u32 %rd36, %r82;
shl.b64 %rd37, %rd36, 52;
add.s64 %rd38, %rd37, %rd34;
selp.b64 %rd42, %rd35, %rd38, %p21;
BB67_23:
mov.b64 %fd16, %rd42;
{
.reg .b32 %temp;
mov.b64 {%r83, %temp}, %fd16;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r84}, %fd16;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r85}, %fd20;
}
and.b32 %r86, %r85, -2147483648;
or.b32 %r87, %r84, %r86;
mov.b64 %fd20, {%r83, %r87};
bra.uni BB67_25;
BB67_24:
.loc 3 458 10
add.f64 %fd20, %fd20, %fd3;
BB67_25:
.loc 2 68 174
add.s32 %r88, %r99, %r20;
mul.wide.s32 %rd39, %r88, 8;
add.s64 %rd40, %rd1, %rd39;
.loc 2 68 174
st.global.f64 [%rd40], %fd20;
.loc 2 68 58
add.s32 %r99, %r1, %r99;
.loc 2 68 1
setp.lt.s32 %p22, %r99, %r7;
@%p22 bra BB67_11;
BB67_26:
.loc 2 68 58
add.s32 %r98, %r42, %r98;
.loc 2 68 1
setp.lt.s32 %p23, %r98, %r4;
@%p23 bra BB67_9;
BB67_27:
.loc 2 68 1
bar.sync 0;
.loc 2 68 1
mov.u32 %r91, %nctaid.x;
.loc 2 68 21
mad.lo.s32 %r95, %r91, %r1, %r95;
.loc 2 68 1
setp.lt.s32 %p24, %r95, %r36;
@%p24 bra BB67_3;
BB67_28:
.loc 2 68 1
mov.u32 %r92, %nctaid.y;
.loc 2 68 21
mad.lo.s32 %r94, %r92, %r42, %r94;
.loc 2 68 1
setp.lt.s32 %p25, %r94, %r37;
@%p25 bra BB67_1;
BB67_29:
.loc 2 68 2
ret;
}
.visible .entry map2_pow_double(
.param .u32 map2_pow_double_param_0,
.param .u32 map2_pow_double_param_1,
.param .u64 map2_pow_double_param_2,
.param .u32 map2_pow_double_param_3,
.param .u64 map2_pow_double_param_4,
.param .u32 map2_pow_double_param_5,
.param .u64 map2_pow_double_param_6,
.param .u32 map2_pow_double_param_7
)
{
.reg .pred %p<26>;
.reg .s32 %r<52>;
.reg .s64 %rd<13>;
.reg .f64 %fd<20>;
ld.param.u32 %r10, [map2_pow_double_param_0];
ld.param.u32 %r11, [map2_pow_double_param_1];
ld.param.u64 %rd1, [map2_pow_double_param_2];
ld.param.u32 %r12, [map2_pow_double_param_3];
ld.param.u64 %rd2, [map2_pow_double_param_4];
ld.param.u32 %r13, [map2_pow_double_param_5];
ld.param.u64 %rd3, [map2_pow_double_param_6];
ld.param.u32 %r14, [map2_pow_double_param_7];
.loc 2 69 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r50, %r15, %r16, %r17;
.loc 2 69 1
setp.ge.s32 %p1, %r50, %r11;
@%p1 bra BB68_21;
.loc 2 69 1
mov.u32 %r18, %ntid.y;
.loc 2 69 22
mov.u32 %r19, %nctaid.y;
mul.lo.s32 %r2, %r19, %r18;
cvta.to.global.u64 %rd4, %rd2;
cvta.to.global.u64 %rd5, %rd3;
cvta.to.global.u64 %rd10, %rd1;
BB68_2:
.loc 2 69 1
mov.u32 %r20, %ctaid.y;
mov.u32 %r22, %tid.y;
mad.lo.s32 %r51, %r18, %r20, %r22;
.loc 2 69 1
setp.ge.s32 %p2, %r51, %r10;
@%p2 bra BB68_20;
BB68_3:
.loc 2 69 1
mad.lo.s32 %r27, %r50, %r13, %r51;
mul.wide.s32 %rd6, %r27, 8;
add.s64 %rd7, %rd4, %rd6;
mad.lo.s32 %r28, %r50, %r14, %r51;
mul.wide.s32 %rd8, %r28, 8;
add.s64 %rd9, %rd5, %rd8;
ld.global.f64 %fd1, [%rd7];
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r6}, %fd1;
}
.loc 2 69 1
ld.global.f64 %fd2, [%rd9];
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r7}, %fd2;
}
mul.f64 %fd13, %fd2, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd14, %fd13;
fma.rn.f64 %fd15, %fd14, 0dC000000000000000, %fd2;
abs.f64 %fd3, %fd15;
setp.eq.f64 %p3, %fd1, 0d3FF0000000000000;
setp.eq.f64 %p4, %fd2, 0d0000000000000000;
or.pred %p5, %p3, %p4;
@!%p5 bra BB68_5;
bra.uni BB68_4;
BB68_4:
mov.f64 %fd19, 0d3FF0000000000000;
bra.uni BB68_19;
BB68_5:
.loc 3 328 10
abs.f64 %fd4, %fd1;
setp.gtu.f64 %p6, %fd4, 0d7FF0000000000000;
@%p6 bra BB68_18;
abs.f64 %fd5, %fd2;
setp.gtu.f64 %p7, %fd5, 0d7FF0000000000000;
@%p7 bra BB68_18;
setp.eq.f64 %p8, %fd5, 0d7FF0000000000000;
@%p8 bra BB68_17;
setp.eq.f64 %p9, %fd4, 0d7FF0000000000000;
@%p9 bra BB68_16;
setp.eq.f64 %p10, %fd1, 0d0000000000000000;
@%p10 bra BB68_15;
setp.gt.s32 %p11, %r6, -1;
@%p11 bra BB68_13;
cvt.rzi.f64.f64 %fd16, %fd2;
setp.eq.f64 %p12, %fd2, %fd16;
@%p12 bra BB68_13;
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB68_19;
BB68_13:
setp.lt.s32 %p13, %r6, 0;
// Callseq Start 16
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd4;
.param .b64 param1;
st.param.f64 [param1+0], %fd2;
.param .b64 retval0;
.loc 3 328 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd19, [retval0+0];
}
// Callseq End 16
setp.eq.f64 %p14, %fd3, 0d3FF0000000000000;
.loc 3 328 10
and.pred %p15, %p13, %p14;
@!%p15 bra BB68_19;
bra.uni BB68_14;
BB68_14:
{
.reg .b32 %temp;
mov.b64 {%temp, %r29}, %fd19;
}
xor.b32 %r30, %r29, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%r31, %temp}, %fd19;
}
mov.b64 %fd19, {%r31, %r30};
bra.uni BB68_19;
BB68_15:
setp.eq.f64 %p16, %fd3, 0d3FF0000000000000;
.loc 3 328 10
selp.b32 %r32, %r6, 0, %p16;
mov.u32 %r33, 0;
.loc 3 328 10
or.b32 %r34, %r32, 2146435072;
setp.lt.s32 %p17, %r7, 0;
selp.b32 %r35, %r34, %r32, %p17;
mov.b64 %fd19, {%r33, %r35};
bra.uni BB68_19;
BB68_16:
setp.eq.f64 %p18, %fd3, 0d3FF0000000000000;
.loc 3 328 10
shr.s32 %r36, %r7, 31;
and.b32 %r37, %r36, -2146435072;
add.s32 %r38, %r37, 2146435072;
setp.lt.s32 %p19, %r6, 0;
mov.u32 %r39, 0;
.loc 3 328 10
and.pred %p20, %p19, %p18;
or.b32 %r40, %r38, -2147483648;
selp.b32 %r41, %r40, %r38, %p20;
mov.b64 %fd19, {%r39, %r41};
bra.uni BB68_19;
BB68_17:
.loc 3 328 10
setp.gt.f64 %p21, %fd4, 0d3FF0000000000000;
selp.b32 %r42, 2146435072, 0, %p21;
mov.u32 %r43, 0;
.loc 3 328 10
xor.b32 %r44, %r42, 2146435072;
setp.lt.s32 %p22, %r7, 0;
selp.b32 %r45, %r44, %r42, %p22;
setp.eq.f64 %p23, %fd1, 0dBFF0000000000000;
selp.b32 %r46, 1072693248, %r45, %p23;
mov.b64 %fd19, {%r43, %r46};
bra.uni BB68_19;
BB68_18:
.loc 3 328 10
add.f64 %fd19, %fd1, %fd2;
BB68_19:
.loc 2 69 42
mad.lo.s32 %r47, %r50, %r12, %r51;
mul.wide.s32 %rd11, %r47, 8;
add.s64 %rd12, %rd10, %rd11;
st.global.f64 [%rd12], %fd19;
.loc 2 69 22
add.s32 %r51, %r2, %r51;
.loc 2 69 1
setp.lt.s32 %p24, %r51, %r10;
@%p24 bra BB68_3;
BB68_20:
.loc 2 69 22
mov.u32 %r48, %nctaid.x;
mad.lo.s32 %r50, %r48, %r15, %r50;
.loc 2 69 1
setp.lt.s32 %p25, %r50, %r11;
@%p25 bra BB68_2;
BB68_21:
.loc 2 69 2
ret;
}
.visible .entry map2_v_s_pow_double(
.param .u32 map2_v_s_pow_double_param_0,
.param .u32 map2_v_s_pow_double_param_1,
.param .u64 map2_v_s_pow_double_param_2,
.param .u32 map2_v_s_pow_double_param_3,
.param .u64 map2_v_s_pow_double_param_4,
.param .u32 map2_v_s_pow_double_param_5,
.param .f64 map2_v_s_pow_double_param_6
)
{
.reg .pred %p<26>;
.reg .s32 %r<49>;
.reg .s64 %rd<9>;
.reg .f64 %fd<20>;
ld.param.u32 %r10, [map2_v_s_pow_double_param_0];
ld.param.u32 %r11, [map2_v_s_pow_double_param_1];
ld.param.u64 %rd1, [map2_v_s_pow_double_param_2];
ld.param.u32 %r12, [map2_v_s_pow_double_param_3];
ld.param.u64 %rd2, [map2_v_s_pow_double_param_4];
ld.param.u32 %r13, [map2_v_s_pow_double_param_5];
ld.param.f64 %fd13, [map2_v_s_pow_double_param_6];
.loc 2 69 1
mov.u32 %r14, %ntid.x;
mov.u32 %r15, %ctaid.x;
mov.u32 %r16, %tid.x;
mad.lo.s32 %r47, %r14, %r15, %r16;
.loc 2 69 1
setp.ge.s32 %p1, %r47, %r11;
@%p1 bra BB69_22;
.loc 3 328 10
mul.f64 %fd1, %fd13, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd14, %fd1;
fma.rn.f64 %fd15, %fd14, 0dC000000000000000, %fd13;
abs.f64 %fd2, %fd15;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB69_2:
.loc 2 69 1
mov.u32 %r17, %ctaid.y;
mov.u32 %r18, %ntid.y;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r48, %r18, %r17, %r19;
.loc 2 69 1
setp.ge.s32 %p2, %r48, %r10;
@%p2 bra BB69_21;
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r3}, %fd13;
}
shr.s32 %r21, %r3, 31;
and.b32 %r22, %r21, -2146435072;
add.s32 %r4, %r22, 2146435072;
BB69_4:
.loc 2 69 1
mad.lo.s32 %r26, %r47, %r13, %r48;
mul.wide.s32 %rd4, %r26, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd3, [%rd5];
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r7}, %fd3;
}
setp.eq.f64 %p3, %fd3, 0d3FF0000000000000;
setp.eq.f64 %p4, %fd13, 0d0000000000000000;
.loc 3 328 10
or.pred %p5, %p3, %p4;
@!%p5 bra BB69_6;
bra.uni BB69_5;
BB69_5:
mov.f64 %fd19, 0d3FF0000000000000;
bra.uni BB69_20;
BB69_6:
.loc 3 328 10
abs.f64 %fd4, %fd3;
setp.gtu.f64 %p6, %fd4, 0d7FF0000000000000;
@%p6 bra BB69_19;
abs.f64 %fd5, %fd13;
setp.gtu.f64 %p7, %fd5, 0d7FF0000000000000;
@%p7 bra BB69_19;
setp.eq.f64 %p8, %fd5, 0d7FF0000000000000;
@%p8 bra BB69_18;
setp.eq.f64 %p9, %fd4, 0d7FF0000000000000;
@%p9 bra BB69_17;
setp.eq.f64 %p10, %fd3, 0d0000000000000000;
@%p10 bra BB69_16;
setp.gt.s32 %p11, %r7, -1;
@%p11 bra BB69_14;
cvt.rzi.f64.f64 %fd16, %fd13;
setp.eq.f64 %p12, %fd16, %fd13;
@%p12 bra BB69_14;
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB69_20;
BB69_14:
setp.lt.s32 %p13, %r7, 0;
// Callseq Start 17
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd4;
.param .b64 param1;
st.param.f64 [param1+0], %fd13;
.param .b64 retval0;
.loc 3 328 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd19, [retval0+0];
}
// Callseq End 17
setp.eq.f64 %p14, %fd2, 0d3FF0000000000000;
.loc 3 328 10
and.pred %p15, %p13, %p14;
@!%p15 bra BB69_20;
bra.uni BB69_15;
BB69_15:
{
.reg .b32 %temp;
mov.b64 {%temp, %r27}, %fd19;
}
xor.b32 %r28, %r27, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%r29, %temp}, %fd19;
}
mov.b64 %fd19, {%r29, %r28};
bra.uni BB69_20;
BB69_16:
setp.lt.s32 %p16, %r3, 0;
mov.u32 %r30, 0;
setp.eq.f64 %p17, %fd2, 0d3FF0000000000000;
.loc 3 328 10
selp.b32 %r31, %r7, 0, %p17;
or.b32 %r32, %r31, 2146435072;
selp.b32 %r33, %r32, %r31, %p16;
mov.b64 %fd19, {%r30, %r33};
bra.uni BB69_20;
BB69_17:
setp.eq.f64 %p18, %fd2, 0d3FF0000000000000;
.loc 3 328 10
setp.lt.s32 %p19, %r7, 0;
mov.u32 %r34, 0;
.loc 3 328 10
and.pred %p20, %p19, %p18;
or.b32 %r35, %r4, -2147483648;
selp.b32 %r36, %r35, %r4, %p20;
mov.b64 %fd19, {%r34, %r36};
bra.uni BB69_20;
BB69_18:
setp.lt.s32 %p21, %r3, 0;
mov.u32 %r37, 0;
.loc 3 328 10
setp.gt.f64 %p22, %fd4, 0d3FF0000000000000;
selp.b32 %r38, 2146435072, 0, %p22;
xor.b32 %r39, %r38, 2146435072;
selp.b32 %r40, %r39, %r38, %p21;
setp.eq.f64 %p23, %fd3, 0dBFF0000000000000;
selp.b32 %r41, 1072693248, %r40, %p23;
mov.b64 %fd19, {%r37, %r41};
bra.uni BB69_20;
BB69_19:
.loc 3 328 10
add.f64 %fd19, %fd3, %fd13;
BB69_20:
.loc 2 69 42
mad.lo.s32 %r42, %r47, %r12, %r48;
mul.wide.s32 %rd7, %r42, 8;
add.s64 %rd8, %rd6, %rd7;
st.global.f64 [%rd8], %fd19;
.loc 2 69 22
mov.u32 %r44, %nctaid.y;
mad.lo.s32 %r48, %r44, %r18, %r48;
.loc 2 69 1
setp.lt.s32 %p24, %r48, %r10;
@%p24 bra BB69_4;
BB69_21:
.loc 2 69 22
mov.u32 %r45, %nctaid.x;
mad.lo.s32 %r47, %r45, %r14, %r47;
.loc 2 69 1
setp.lt.s32 %p25, %r47, %r11;
@%p25 bra BB69_2;
BB69_22:
.loc 2 69 2
ret;
}
.visible .entry map2_s_v_pow_double(
.param .u32 map2_s_v_pow_double_param_0,
.param .u32 map2_s_v_pow_double_param_1,
.param .u64 map2_s_v_pow_double_param_2,
.param .u32 map2_s_v_pow_double_param_3,
.param .f64 map2_s_v_pow_double_param_4,
.param .u64 map2_s_v_pow_double_param_5,
.param .u32 map2_s_v_pow_double_param_6
)
{
.reg .pred %p<26>;
.reg .s32 %r<51>;
.reg .s64 %rd<9>;
.reg .f64 %fd<20>;
ld.param.u32 %r11, [map2_s_v_pow_double_param_0];
ld.param.u32 %r12, [map2_s_v_pow_double_param_1];
ld.param.u64 %rd1, [map2_s_v_pow_double_param_2];
ld.param.u32 %r13, [map2_s_v_pow_double_param_3];
ld.param.f64 %fd12, [map2_s_v_pow_double_param_4];
ld.param.u64 %rd2, [map2_s_v_pow_double_param_5];
ld.param.u32 %r14, [map2_s_v_pow_double_param_6];
.loc 2 69 1
mov.u32 %r15, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r49, %r15, %r16, %r17;
.loc 2 69 1
setp.ge.s32 %p1, %r49, %r12;
@%p1 bra BB70_22;
.loc 2 69 1
mov.u32 %r18, %ntid.y;
.loc 2 69 22
mov.u32 %r19, %nctaid.y;
mul.lo.s32 %r2, %r19, %r18;
cvta.to.global.u64 %rd3, %rd2;
cvta.to.global.u64 %rd6, %rd1;
BB70_2:
.loc 2 69 1
mov.u32 %r20, %ctaid.y;
mov.u32 %r22, %tid.y;
mad.lo.s32 %r50, %r18, %r20, %r22;
.loc 2 69 1
setp.ge.s32 %p2, %r50, %r11;
@%p2 bra BB70_21;
.loc 2 69 1
mul.lo.s32 %r4, %r49, %r14;
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r5}, %fd12;
}
BB70_4:
.loc 2 69 1
add.s32 %r27, %r50, %r4;
mul.wide.s32 %rd4, %r27, 8;
add.s64 %rd5, %rd3, %rd4;
ld.global.f64 %fd1, [%rd5];
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r8}, %fd1;
}
mul.f64 %fd13, %fd1, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd14, %fd13;
fma.rn.f64 %fd15, %fd14, 0dC000000000000000, %fd1;
abs.f64 %fd2, %fd15;
setp.eq.f64 %p3, %fd1, 0d0000000000000000;
setp.eq.f64 %p4, %fd12, 0d3FF0000000000000;
.loc 3 328 10
or.pred %p5, %p4, %p3;
@!%p5 bra BB70_6;
bra.uni BB70_5;
BB70_5:
mov.f64 %fd19, 0d3FF0000000000000;
bra.uni BB70_20;
BB70_6:
.loc 3 328 10
abs.f64 %fd3, %fd12;
setp.gtu.f64 %p6, %fd3, 0d7FF0000000000000;
@%p6 bra BB70_19;
abs.f64 %fd4, %fd1;
setp.gtu.f64 %p7, %fd4, 0d7FF0000000000000;
@%p7 bra BB70_19;
setp.eq.f64 %p8, %fd4, 0d7FF0000000000000;
@%p8 bra BB70_18;
setp.eq.f64 %p9, %fd3, 0d7FF0000000000000;
@%p9 bra BB70_17;
setp.eq.f64 %p10, %fd12, 0d0000000000000000;
.loc 3 328 10
@%p10 bra BB70_16;
setp.gt.s32 %p11, %r5, -1;
@%p11 bra BB70_14;
cvt.rzi.f64.f64 %fd16, %fd1;
setp.eq.f64 %p12, %fd1, %fd16;
@%p12 bra BB70_14;
mov.f64 %fd19, 0dFFF8000000000000;
bra.uni BB70_20;
BB70_14:
setp.eq.f64 %p13, %fd2, 0d3FF0000000000000;
// Callseq Start 18
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd3;
.param .b64 param1;
st.param.f64 [param1+0], %fd1;
.param .b64 retval0;
.loc 3 328 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd19, [retval0+0];
}
// Callseq End 18
setp.lt.s32 %p14, %r5, 0;
.loc 3 328 10
and.pred %p15, %p14, %p13;
@!%p15 bra BB70_20;
bra.uni BB70_15;
BB70_15:
{
.reg .b32 %temp;
mov.b64 {%temp, %r28}, %fd19;
}
xor.b32 %r29, %r28, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%r30, %temp}, %fd19;
}
mov.b64 %fd19, {%r30, %r29};
bra.uni BB70_20;
BB70_16:
setp.eq.f64 %p16, %fd2, 0d3FF0000000000000;
.loc 3 328 10
selp.b32 %r31, %r5, 0, %p16;
mov.u32 %r32, 0;
.loc 3 328 10
or.b32 %r33, %r31, 2146435072;
setp.lt.s32 %p17, %r8, 0;
selp.b32 %r34, %r33, %r31, %p17;
mov.b64 %fd19, {%r32, %r34};
bra.uni BB70_20;
BB70_17:
setp.eq.f64 %p18, %fd2, 0d3FF0000000000000;
.loc 3 328 10
shr.s32 %r35, %r8, 31;
and.b32 %r36, %r35, -2146435072;
add.s32 %r37, %r36, 2146435072;
setp.lt.s32 %p19, %r5, 0;
mov.u32 %r38, 0;
.loc 3 328 10
and.pred %p20, %p19, %p18;
or.b32 %r39, %r37, -2147483648;
selp.b32 %r40, %r39, %r37, %p20;
mov.b64 %fd19, {%r38, %r40};
bra.uni BB70_20;
BB70_18:
setp.eq.f64 %p21, %fd12, 0dBFF0000000000000;
.loc 3 328 10
setp.gt.f64 %p22, %fd3, 0d3FF0000000000000;
selp.b32 %r41, 2146435072, 0, %p22;
mov.u32 %r42, 0;
.loc 3 328 10
xor.b32 %r43, %r41, 2146435072;
setp.lt.s32 %p23, %r8, 0;
selp.b32 %r44, %r43, %r41, %p23;
selp.b32 %r45, 1072693248, %r44, %p21;
mov.b64 %fd19, {%r42, %r45};
bra.uni BB70_20;
BB70_19:
.loc 3 328 10
add.f64 %fd19, %fd1, %fd12;
BB70_20:
.loc 2 69 42
mad.lo.s32 %r46, %r49, %r13, %r50;
mul.wide.s32 %rd7, %r46, 8;
add.s64 %rd8, %rd6, %rd7;
st.global.f64 [%rd8], %fd19;
.loc 2 69 22
add.s32 %r50, %r2, %r50;
.loc 2 69 1
setp.lt.s32 %p24, %r50, %r11;
@%p24 bra BB70_4;
BB70_21:
.loc 2 69 22
mov.u32 %r47, %nctaid.x;
mad.lo.s32 %r49, %r47, %r15, %r49;
.loc 2 69 1
setp.lt.s32 %p25, %r49, %r12;
@%p25 bra BB70_2;
BB70_22:
.loc 2 69 2
ret;
}
.visible .entry map2_transpose_pow_double(
.param .u32 map2_transpose_pow_double_param_0,
.param .u32 map2_transpose_pow_double_param_1,
.param .u64 map2_transpose_pow_double_param_2,
.param .u32 map2_transpose_pow_double_param_3,
.param .u64 map2_transpose_pow_double_param_4,
.param .u32 map2_transpose_pow_double_param_5,
.param .u64 map2_transpose_pow_double_param_6,
.param .u32 map2_transpose_pow_double_param_7
)
{
.reg .pred %p<34>;
.reg .s32 %r<88>;
.reg .s64 %rd<25>;
.reg .f64 %fd<21>;
// demoted variable
.shared .align 8 .b8 map2_transpose_pow_double$__cuda_local_var_32585_1747_non_const_tile[8448];
ld.param.u32 %r27, [map2_transpose_pow_double_param_0];
ld.param.u32 %r28, [map2_transpose_pow_double_param_1];
ld.param.u64 %rd6, [map2_transpose_pow_double_param_2];
ld.param.u32 %r29, [map2_transpose_pow_double_param_3];
ld.param.u64 %rd7, [map2_transpose_pow_double_param_4];
ld.param.u32 %r30, [map2_transpose_pow_double_param_5];
ld.param.u64 %rd8, [map2_transpose_pow_double_param_6];
ld.param.u32 %r31, [map2_transpose_pow_double_param_7];
cvta.to.global.u64 %rd1, %rd6;
cvta.to.global.u64 %rd2, %rd7;
cvta.to.global.u64 %rd3, %rd8;
.loc 2 69 1
mov.u32 %r1, %ntid.x;
mov.u32 %r32, %ctaid.y;
mov.u32 %r33, %ntid.y;
mul.lo.s32 %r82, %r32, %r33;
.loc 2 69 1
setp.ge.s32 %p2, %r82, %r28;
@%p2 bra BB71_31;
BB71_1:
.loc 2 69 1
mov.u32 %r35, %ctaid.x;
mul.lo.s32 %r83, %r35, %r1;
.loc 2 69 1
setp.ge.s32 %p3, %r83, %r27;
@%p3 bra BB71_30;
.loc 2 69 1
add.s32 %r37, %r82, 32;
.loc 4 2621 10
min.s32 %r4, %r28, %r37;
BB71_3:
.loc 2 69 1
add.s32 %r40, %r83, 32;
.loc 4 2621 10
min.s32 %r7, %r27, %r40;
.loc 2 69 1
mov.u32 %r41, %tid.y;
add.s32 %r84, %r41, %r83;
.loc 2 69 1
setp.ge.s32 %p4, %r84, %r7;
@%p4 bra BB71_8;
BB71_4:
.loc 2 69 1
mov.u32 %r42, %tid.x;
.loc 2 69 1
add.s32 %r85, %r42, %r82;
.loc 2 69 1
setp.ge.s32 %p5, %r85, %r4;
@%p5 bra BB71_7;
.loc 2 69 1
mul.lo.s32 %r10, %r84, %r31;
sub.s32 %r44, %r84, %r83;
cvt.s64.s32 %rd4, %r44;
BB71_6:
add.s32 %r46, %r85, %r10;
mul.wide.s32 %rd9, %r46, 8;
add.s64 %rd10, %rd3, %rd9;
sub.s32 %r47, %r85, %r82;
mul.lo.s64 %rd11, %rd4, 264;
mov.u64 %rd12, map2_transpose_pow_double$__cuda_local_var_32585_1747_non_const_tile;
add.s64 %rd13, %rd12, %rd11;
mul.wide.s32 %rd14, %r47, 8;
add.s64 %rd15, %rd13, %rd14;
ld.global.f64 %fd13, [%rd10];
st.shared.f64 [%rd15], %fd13;
.loc 2 69 58
add.s32 %r85, %r1, %r85;
.loc 2 69 1
setp.lt.s32 %p6, %r85, %r4;
@%p6 bra BB71_6;
BB71_7:
.loc 2 69 58
add.s32 %r84, %r33, %r84;
.loc 2 69 1
setp.lt.s32 %p7, %r84, %r7;
@%p7 bra BB71_4;
BB71_8:
.loc 2 69 1
add.s32 %r86, %r41, %r82;
setp.lt.s32 %p1, %r86, %r4;
.loc 2 69 1
bar.sync 0;
.loc 2 69 1
@!%p1 bra BB71_29;
bra.uni BB71_9;
BB71_9:
.loc 2 69 1
mov.u32 %r52, %tid.x;
add.s32 %r87, %r52, %r83;
.loc 2 69 1
setp.ge.s32 %p8, %r87, %r7;
@%p8 bra BB71_28;
.loc 2 69 1
mul.lo.s32 %r17, %r86, %r30;
sub.s32 %r54, %r86, %r82;
cvt.s64.s32 %rd5, %r54;
.loc 2 69 38
mul.lo.s32 %r18, %r86, %r29;
BB71_11:
.loc 2 69 1
add.s32 %r56, %r87, %r17;
mul.wide.s32 %rd16, %r56, 8;
add.s64 %rd17, %rd2, %rd16;
sub.s32 %r57, %r87, %r83;
mul.wide.s32 %rd18, %r57, 264;
mov.u64 %rd19, map2_transpose_pow_double$__cuda_local_var_32585_1747_non_const_tile;
add.s64 %rd20, %rd19, %rd18;
shl.b64 %rd21, %rd5, 3;
add.s64 %rd22, %rd20, %rd21;
ld.global.f64 %fd1, [%rd17];
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r21}, %fd1;
}
.loc 2 69 1
ld.shared.f64 %fd2, [%rd22];
.loc 3 328 10
{
.reg .b32 %temp;
mov.b64 {%temp, %r22}, %fd2;
}
mul.f64 %fd14, %fd2, 0d3FE0000000000000;
cvt.rzi.f64.f64 %fd15, %fd14;
fma.rn.f64 %fd16, %fd15, 0dC000000000000000, %fd2;
abs.f64 %fd3, %fd16;
setp.eq.f64 %p9, %fd1, 0d3FF0000000000000;
setp.eq.f64 %p10, %fd2, 0d0000000000000000;
or.pred %p11, %p9, %p10;
@!%p11 bra BB71_13;
bra.uni BB71_12;
BB71_12:
mov.f64 %fd20, 0d3FF0000000000000;
bra.uni BB71_27;
BB71_13:
.loc 3 328 10
abs.f64 %fd4, %fd1;
setp.gtu.f64 %p12, %fd4, 0d7FF0000000000000;
@%p12 bra BB71_26;
abs.f64 %fd5, %fd2;
setp.gtu.f64 %p13, %fd5, 0d7FF0000000000000;
@%p13 bra BB71_26;
setp.eq.f64 %p14, %fd5, 0d7FF0000000000000;
@%p14 bra BB71_25;
setp.eq.f64 %p15, %fd4, 0d7FF0000000000000;
@%p15 bra BB71_24;
setp.eq.f64 %p16, %fd1, 0d0000000000000000;
@%p16 bra BB71_23;
setp.gt.s32 %p17, %r21, -1;
@%p17 bra BB71_21;
cvt.rzi.f64.f64 %fd17, %fd2;
setp.eq.f64 %p18, %fd2, %fd17;
@%p18 bra BB71_21;
mov.f64 %fd20, 0dFFF8000000000000;
bra.uni BB71_27;
BB71_21:
setp.lt.s32 %p19, %r21, 0;
// Callseq Start 19
{
.reg .b32 temp_param_reg;
.param .b64 param0;
st.param.f64 [param0+0], %fd4;
.param .b64 param1;
st.param.f64 [param1+0], %fd2;
.param .b64 retval0;
.loc 3 328 10
call.uni (retval0),
__internal_accurate_pow,
(
param0,
param1
);
ld.param.f64 %fd20, [retval0+0];
}
// Callseq End 19
setp.eq.f64 %p20, %fd3, 0d3FF0000000000000;
.loc 3 328 10
and.pred %p21, %p19, %p20;
@!%p21 bra BB71_27;
bra.uni BB71_22;
BB71_22:
{
.reg .b32 %temp;
mov.b64 {%temp, %r58}, %fd20;
}
xor.b32 %r59, %r58, -2147483648;
{
.reg .b32 %temp;
mov.b64 {%r60, %temp}, %fd20;
}
mov.b64 %fd20, {%r60, %r59};
bra.uni BB71_27;
BB71_23:
setp.eq.f64 %p22, %fd3, 0d3FF0000000000000;
.loc 3 328 10
selp.b32 %r61, %r21, 0, %p22;
mov.u32 %r62, 0;
.loc 3 328 10
or.b32 %r63, %r61, 2146435072;
setp.lt.s32 %p23, %r22, 0;
selp.b32 %r64, %r63, %r61, %p23;
mov.b64 %fd20, {%r62, %r64};
bra.uni BB71_27;
BB71_24:
setp.eq.f64 %p24, %fd3, 0d3FF0000000000000;
.loc 3 328 10
shr.s32 %r65, %r22, 31;
and.b32 %r66, %r65, -2146435072;
add.s32 %r67, %r66, 2146435072;
setp.lt.s32 %p25, %r21, 0;
mov.u32 %r68, 0;
.loc 3 328 10
and.pred %p26, %p25, %p24;
or.b32 %r69, %r67, -2147483648;
selp.b32 %r70, %r69, %r67, %p26;
mov.b64 %fd20, {%r68, %r70};
bra.uni BB71_27;
BB71_25:
.loc 3 328 10
setp.gt.f64 %p27, %fd4, 0d3FF0000000000000;
selp.b32 %r71, 2146435072, 0, %p27;
mov.u32 %r72, 0;
.loc 3 328 10
xor.b32 %r73, %r71, 2146435072;
setp.lt.s32 %p28, %r22, 0;
selp.b32 %r74, %r73, %r71, %p28;
setp.eq.f64 %p29, %fd1, 0dBFF0000000000000;
selp.b32 %r75, 1072693248, %r74, %p29;
mov.b64 %fd20, {%r72, %r75};
bra.uni BB71_27;
BB71_26:
.loc 3 328 10
add.f64 %fd20, %fd1, %fd2;
BB71_27:
.loc 2 69 38
add.s32 %r76, %r87, %r18;
mul.wide.s32 %rd23, %r76, 8;
add.s64 %rd24, %rd1, %rd23;
st.global.f64 [%rd24], %fd20;
.loc 2 69 58
add.s32 %r87, %r1, %r87;
.loc 2 69 1
setp.lt.s32 %p30, %r87, %r7;
@%p30 bra BB71_11;
BB71_28:
.loc 2 69 58
add.s32 %r86, %r33, %r86;
.loc 2 69 1
setp.lt.s32 %p31, %r86, %r4;
@%p31 bra BB71_9;
BB71_29:
.loc 2 69 1
bar.sync 0;
.loc 2 69 1
mov.u32 %r79, %nctaid.x;
.loc 2 69 21
mad.lo.s32 %r83, %r79, %r1, %r83;
.loc 2 69 1
setp.lt.s32 %p32, %r83, %r27;
@%p32 bra BB71_3;
BB71_30:
.loc 2 69 1
mov.u32 %r80, %nctaid.y;
.loc 2 69 21
mad.lo.s32 %r82, %r80, %r33, %r82;
.loc 2 69 1
setp.lt.s32 %p33, %r82, %r28;
@%p33 bra BB71_1;
BB71_31:
.loc 2 69 2
ret;
}
.visible .entry map2_max_double(
.param .u32 map2_max_double_param_0,
.param .u32 map2_max_double_param_1,
.param .u64 map2_max_double_param_2,
.param .u32 map2_max_double_param_3,
.param .u64 map2_max_double_param_4,
.param .u32 map2_max_double_param_5,
.param .u64 map2_max_double_param_6,
.param .u32 map2_max_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_max_double_param_0];
ld.param.u32 %r14, [map2_max_double_param_1];
ld.param.u64 %rd4, [map2_max_double_param_2];
ld.param.u32 %r15, [map2_max_double_param_3];
ld.param.u64 %rd5, [map2_max_double_param_4];
ld.param.u32 %r16, [map2_max_double_param_5];
ld.param.u64 %rd6, [map2_max_double_param_6];
ld.param.u32 %r17, [map2_max_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 2 70 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 2 70 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB72_6;
.loc 2 70 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 2 70 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 2 70 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB72_2:
.loc 2 70 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB72_5;
.loc 2 70 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
.loc 2 70 145
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB72_4:
.loc 2 70 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 70 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 2 70 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
.loc 4 2785 10
max.f64 %fd3, %fd2, %fd1;
.loc 2 70 145
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 2 70 145
st.global.f64 [%rd12], %fd3;
.loc 2 70 22
add.s32 %r11, %r5, %r10;
.loc 2 70 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB72_4;
BB72_5:
.loc 2 70 22
add.s32 %r28, %r4, %r28;
.loc 2 70 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB72_2;
BB72_6:
.loc 2 70 2
ret;
}
.visible .entry map2_v_s_max_double(
.param .u32 map2_v_s_max_double_param_0,
.param .u32 map2_v_s_max_double_param_1,
.param .u64 map2_v_s_max_double_param_2,
.param .u32 map2_v_s_max_double_param_3,
.param .u64 map2_v_s_max_double_param_4,
.param .u32 map2_v_s_max_double_param_5,
.param .f64 map2_v_s_max_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_max_double_param_0];
ld.param.u32 %r13, [map2_v_s_max_double_param_1];
ld.param.u64 %rd3, [map2_v_s_max_double_param_2];
ld.param.u32 %r14, [map2_v_s_max_double_param_3];
ld.param.u64 %rd4, [map2_v_s_max_double_param_4];
ld.param.u32 %r15, [map2_v_s_max_double_param_5];
ld.param.f64 %fd1, [map2_v_s_max_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 70 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 70 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB73_6;
.loc 2 70 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 70 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 70 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB73_2:
.loc 2 70 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB73_5;
.loc 2 70 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 70 89
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB73_4:
.loc 2 70 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 70 1
ld.global.f64 %fd2, [%rd6];
.loc 4 2785 10
max.f64 %fd3, %fd2, %fd1;
.loc 2 70 89
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 70 89
st.global.f64 [%rd8], %fd3;
.loc 2 70 22
add.s32 %r10, %r5, %r9;
.loc 2 70 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB73_4;
BB73_5:
.loc 2 70 22
add.s32 %r25, %r4, %r25;
.loc 2 70 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB73_2;
BB73_6:
.loc 2 70 2
ret;
}
.visible .entry map2_s_v_max_double(
.param .u32 map2_s_v_max_double_param_0,
.param .u32 map2_s_v_max_double_param_1,
.param .u64 map2_s_v_max_double_param_2,
.param .u32 map2_s_v_max_double_param_3,
.param .f64 map2_s_v_max_double_param_4,
.param .u64 map2_s_v_max_double_param_5,
.param .u32 map2_s_v_max_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_max_double_param_0];
ld.param.u32 %r13, [map2_s_v_max_double_param_1];
ld.param.u64 %rd3, [map2_s_v_max_double_param_2];
ld.param.u32 %r14, [map2_s_v_max_double_param_3];
ld.param.f64 %fd1, [map2_s_v_max_double_param_4];
ld.param.u64 %rd4, [map2_s_v_max_double_param_5];
ld.param.u32 %r15, [map2_s_v_max_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 70 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 70 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB74_6;
.loc 2 70 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 70 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 70 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB74_2:
.loc 2 70 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB74_5;
.loc 2 70 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 70 89
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB74_4:
.loc 2 70 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 70 1
ld.global.f64 %fd2, [%rd6];
.loc 4 2785 10
max.f64 %fd3, %fd1, %fd2;
.loc 2 70 89
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 70 89
st.global.f64 [%rd8], %fd3;
.loc 2 70 22
add.s32 %r10, %r5, %r9;
.loc 2 70 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB74_4;
BB74_5:
.loc 2 70 22
add.s32 %r25, %r4, %r25;
.loc 2 70 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB74_2;
BB74_6:
.loc 2 70 2
ret;
}
.visible .entry map2_transpose_max_double(
.param .u32 map2_transpose_max_double_param_0,
.param .u32 map2_transpose_max_double_param_1,
.param .u64 map2_transpose_max_double_param_2,
.param .u32 map2_transpose_max_double_param_3,
.param .u64 map2_transpose_max_double_param_4,
.param .u32 map2_transpose_max_double_param_5,
.param .u64 map2_transpose_max_double_param_6,
.param .u32 map2_transpose_max_double_param_7
)
{
.reg .pred %p<13>;
.reg .s32 %r<68>;
.reg .s64 %rd<23>;
.reg .f64 %fd<5>;
// demoted variable
.shared .align 8 .b8 map2_transpose_max_double$__cuda_local_var_32586_1747_non_const_tile[8448];
ld.param.u32 %r27, [map2_transpose_max_double_param_0];
ld.param.u32 %r28, [map2_transpose_max_double_param_1];
ld.param.u64 %rd4, [map2_transpose_max_double_param_2];
ld.param.u32 %r29, [map2_transpose_max_double_param_3];
ld.param.u64 %rd5, [map2_transpose_max_double_param_4];
ld.param.u32 %r30, [map2_transpose_max_double_param_5];
ld.param.u64 %rd6, [map2_transpose_max_double_param_6];
ld.param.u32 %r31, [map2_transpose_max_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
cvta.to.global.u64 %rd3, %rd6;
.loc 2 70 1
mov.u32 %r1, %ntid.x;
mov.u32 %r32, %ctaid.y;
mov.u32 %r33, %ntid.y;
mul.lo.s32 %r62, %r32, %r33;
.loc 2 70 1
setp.ge.s32 %p2, %r62, %r28;
@%p2 bra BB75_15;
BB75_1:
.loc 2 70 1
mov.u32 %r35, %ctaid.x;
mul.lo.s32 %r63, %r35, %r1;
.loc 2 70 1
setp.ge.s32 %p3, %r63, %r27;
@%p3 bra BB75_14;
.loc 2 70 1
add.s32 %r37, %r62, 32;
.loc 4 2621 10
min.s32 %r4, %r28, %r37;
BB75_3:
.loc 2 70 1
add.s32 %r40, %r63, 32;
.loc 4 2621 10
min.s32 %r7, %r27, %r40;
.loc 2 70 1
mov.u32 %r41, %tid.y;
add.s32 %r64, %r41, %r63;
.loc 2 70 1
setp.ge.s32 %p4, %r64, %r7;
@%p4 bra BB75_8;
BB75_4:
.loc 2 70 1
mov.u32 %r42, %tid.x;
.loc 2 70 1
add.s32 %r65, %r42, %r62;
.loc 2 70 1
setp.ge.s32 %p5, %r65, %r4;
@%p5 bra BB75_7;
.loc 2 70 1
mul.lo.s32 %r10, %r64, %r31;
sub.s32 %r11, %r64, %r63;
BB75_6:
add.s32 %r45, %r65, %r10;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 70 1
sub.s32 %r46, %r65, %r62;
mul.wide.s32 %rd9, %r11, 264;
mov.u64 %rd10, map2_transpose_max_double$__cuda_local_var_32586_1747_non_const_tile;
add.s64 %rd11, %rd10, %rd9;
mul.wide.s32 %rd12, %r46, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 70 1
ld.global.f64 %fd1, [%rd8];
st.shared.f64 [%rd13], %fd1;
.loc 2 70 58
add.s32 %r65, %r1, %r65;
.loc 2 70 1
setp.lt.s32 %p6, %r65, %r4;
@%p6 bra BB75_6;
BB75_7:
.loc 2 70 58
add.s32 %r64, %r33, %r64;
.loc 2 70 1
setp.lt.s32 %p7, %r64, %r7;
@%p7 bra BB75_4;
BB75_8:
.loc 2 70 1
add.s32 %r66, %r41, %r62;
setp.lt.s32 %p1, %r66, %r4;
.loc 2 70 1
bar.sync 0;
.loc 2 70 1
@!%p1 bra BB75_13;
bra.uni BB75_9;
BB75_9:
.loc 2 70 1
mov.u32 %r51, %tid.x;
add.s32 %r67, %r51, %r63;
.loc 2 70 1
setp.ge.s32 %p8, %r67, %r7;
@%p8 bra BB75_12;
.loc 2 70 1
mul.lo.s32 %r18, %r66, %r30;
sub.s32 %r19, %r66, %r62;
.loc 2 70 176
mul.lo.s32 %r20, %r66, %r29;
BB75_11:
.loc 2 70 1
add.s32 %r54, %r67, %r18;
mul.wide.s32 %rd14, %r54, 8;
add.s64 %rd15, %rd2, %rd14;
.loc 2 70 1
sub.s32 %r55, %r67, %r63;
mul.wide.s32 %rd16, %r55, 264;
mov.u64 %rd17, map2_transpose_max_double$__cuda_local_var_32586_1747_non_const_tile;
add.s64 %rd18, %rd17, %rd16;
mul.wide.s32 %rd19, %r19, 8;
add.s64 %rd20, %rd18, %rd19;
.loc 2 70 1
ld.shared.f64 %fd2, [%rd20];
ld.global.f64 %fd3, [%rd15];
.loc 4 2785 10
max.f64 %fd4, %fd3, %fd2;
.loc 2 70 176
add.s32 %r56, %r67, %r20;
mul.wide.s32 %rd21, %r56, 8;
add.s64 %rd22, %rd1, %rd21;
.loc 2 70 176
st.global.f64 [%rd22], %fd4;
.loc 2 70 58
add.s32 %r67, %r1, %r67;
.loc 2 70 1
setp.lt.s32 %p9, %r67, %r7;
@%p9 bra BB75_11;
BB75_12:
.loc 2 70 58
add.s32 %r66, %r33, %r66;
.loc 2 70 1
setp.lt.s32 %p10, %r66, %r4;
@%p10 bra BB75_9;
BB75_13:
.loc 2 70 1
bar.sync 0;
.loc 2 70 1
mov.u32 %r59, %nctaid.x;
.loc 2 70 21
mad.lo.s32 %r63, %r59, %r1, %r63;
.loc 2 70 1
setp.lt.s32 %p11, %r63, %r27;
@%p11 bra BB75_3;
BB75_14:
.loc 2 70 1
mov.u32 %r60, %nctaid.y;
.loc 2 70 21
mad.lo.s32 %r62, %r60, %r33, %r62;
.loc 2 70 1
setp.lt.s32 %p12, %r62, %r28;
@%p12 bra BB75_1;
BB75_15:
.loc 2 70 2
ret;
}
.visible .entry map2_min_double(
.param .u32 map2_min_double_param_0,
.param .u32 map2_min_double_param_1,
.param .u64 map2_min_double_param_2,
.param .u32 map2_min_double_param_3,
.param .u64 map2_min_double_param_4,
.param .u32 map2_min_double_param_5,
.param .u64 map2_min_double_param_6,
.param .u32 map2_min_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<30>;
.reg .s64 %rd<13>;
.reg .f64 %fd<4>;
ld.param.u32 %r13, [map2_min_double_param_0];
ld.param.u32 %r14, [map2_min_double_param_1];
ld.param.u64 %rd4, [map2_min_double_param_2];
ld.param.u32 %r15, [map2_min_double_param_3];
ld.param.u64 %rd5, [map2_min_double_param_4];
ld.param.u32 %r16, [map2_min_double_param_5];
ld.param.u64 %rd6, [map2_min_double_param_6];
ld.param.u32 %r17, [map2_min_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd6;
cvta.to.global.u64 %rd3, %rd5;
.loc 2 71 1
mov.u32 %r1, %ntid.x;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.x;
mad.lo.s32 %r28, %r1, %r18, %r19;
.loc 2 71 1
setp.ge.s32 %p1, %r28, %r14;
@%p1 bra BB76_6;
.loc 2 71 1
mov.u32 %r20, %tid.y;
mov.u32 %r21, %ntid.y;
mov.u32 %r22, %ctaid.y;
mad.lo.s32 %r3, %r21, %r22, %r20;
.loc 2 71 22
mov.u32 %r23, %nctaid.x;
mul.lo.s32 %r4, %r23, %r1;
.loc 2 71 22
mov.u32 %r24, %nctaid.y;
mul.lo.s32 %r5, %r24, %r21;
BB76_2:
.loc 2 71 1
setp.ge.s32 %p2, %r3, %r13;
@%p2 bra BB76_5;
.loc 2 71 1
mul.lo.s32 %r7, %r28, %r16;
mul.lo.s32 %r8, %r28, %r17;
.loc 2 71 147
mul.lo.s32 %r9, %r28, %r15;
mov.u32 %r29, %r3;
BB76_4:
.loc 2 71 1
mov.u32 %r10, %r29;
add.s32 %r25, %r10, %r7;
mul.wide.s32 %rd7, %r25, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 71 1
add.s32 %r26, %r10, %r8;
mul.wide.s32 %rd9, %r26, 8;
add.s64 %rd10, %rd2, %rd9;
.loc 2 71 1
ld.global.f64 %fd1, [%rd10];
ld.global.f64 %fd2, [%rd8];
.loc 4 2780 10
min.f64 %fd3, %fd2, %fd1;
.loc 2 71 147
add.s32 %r27, %r10, %r9;
mul.wide.s32 %rd11, %r27, 8;
add.s64 %rd12, %rd1, %rd11;
.loc 2 71 147
st.global.f64 [%rd12], %fd3;
.loc 2 71 22
add.s32 %r11, %r5, %r10;
.loc 2 71 1
setp.lt.s32 %p3, %r11, %r13;
mov.u32 %r29, %r11;
@%p3 bra BB76_4;
BB76_5:
.loc 2 71 22
add.s32 %r28, %r4, %r28;
.loc 2 71 1
setp.lt.s32 %p4, %r28, %r14;
@%p4 bra BB76_2;
BB76_6:
.loc 2 71 2
ret;
}
.visible .entry map2_v_s_min_double(
.param .u32 map2_v_s_min_double_param_0,
.param .u32 map2_v_s_min_double_param_1,
.param .u64 map2_v_s_min_double_param_2,
.param .u32 map2_v_s_min_double_param_3,
.param .u64 map2_v_s_min_double_param_4,
.param .u32 map2_v_s_min_double_param_5,
.param .f64 map2_v_s_min_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_v_s_min_double_param_0];
ld.param.u32 %r13, [map2_v_s_min_double_param_1];
ld.param.u64 %rd3, [map2_v_s_min_double_param_2];
ld.param.u32 %r14, [map2_v_s_min_double_param_3];
ld.param.u64 %rd4, [map2_v_s_min_double_param_4];
ld.param.u32 %r15, [map2_v_s_min_double_param_5];
ld.param.f64 %fd1, [map2_v_s_min_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 71 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 71 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB77_6;
.loc 2 71 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 71 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 71 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB77_2:
.loc 2 71 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB77_5;
.loc 2 71 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 71 90
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB77_4:
.loc 2 71 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 71 1
ld.global.f64 %fd2, [%rd6];
.loc 4 2780 10
min.f64 %fd3, %fd2, %fd1;
.loc 2 71 90
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 71 90
st.global.f64 [%rd8], %fd3;
.loc 2 71 22
add.s32 %r10, %r5, %r9;
.loc 2 71 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB77_4;
BB77_5:
.loc 2 71 22
add.s32 %r25, %r4, %r25;
.loc 2 71 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB77_2;
BB77_6:
.loc 2 71 2
ret;
}
.visible .entry map2_s_v_min_double(
.param .u32 map2_s_v_min_double_param_0,
.param .u32 map2_s_v_min_double_param_1,
.param .u64 map2_s_v_min_double_param_2,
.param .u32 map2_s_v_min_double_param_3,
.param .f64 map2_s_v_min_double_param_4,
.param .u64 map2_s_v_min_double_param_5,
.param .u32 map2_s_v_min_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<4>;
ld.param.u32 %r12, [map2_s_v_min_double_param_0];
ld.param.u32 %r13, [map2_s_v_min_double_param_1];
ld.param.u64 %rd3, [map2_s_v_min_double_param_2];
ld.param.u32 %r14, [map2_s_v_min_double_param_3];
ld.param.f64 %fd1, [map2_s_v_min_double_param_4];
ld.param.u64 %rd4, [map2_s_v_min_double_param_5];
ld.param.u32 %r15, [map2_s_v_min_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 71 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 71 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB78_6;
.loc 2 71 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 71 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 71 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB78_2:
.loc 2 71 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB78_5;
.loc 2 71 1
mul.lo.s32 %r7, %r25, %r15;
.loc 2 71 90
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB78_4:
.loc 2 71 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 71 1
ld.global.f64 %fd2, [%rd6];
.loc 4 2780 10
min.f64 %fd3, %fd1, %fd2;
.loc 2 71 90
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 71 90
st.global.f64 [%rd8], %fd3;
.loc 2 71 22
add.s32 %r10, %r5, %r9;
.loc 2 71 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB78_4;
BB78_5:
.loc 2 71 22
add.s32 %r25, %r4, %r25;
.loc 2 71 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB78_2;
BB78_6:
.loc 2 71 2
ret;
}
.visible .entry map2_transpose_min_double(
.param .u32 map2_transpose_min_double_param_0,
.param .u32 map2_transpose_min_double_param_1,
.param .u64 map2_transpose_min_double_param_2,
.param .u32 map2_transpose_min_double_param_3,
.param .u64 map2_transpose_min_double_param_4,
.param .u32 map2_transpose_min_double_param_5,
.param .u64 map2_transpose_min_double_param_6,
.param .u32 map2_transpose_min_double_param_7
)
{
.reg .pred %p<13>;
.reg .s32 %r<68>;
.reg .s64 %rd<23>;
.reg .f64 %fd<5>;
// demoted variable
.shared .align 8 .b8 map2_transpose_min_double$__cuda_local_var_32587_1747_non_const_tile[8448];
ld.param.u32 %r27, [map2_transpose_min_double_param_0];
ld.param.u32 %r28, [map2_transpose_min_double_param_1];
ld.param.u64 %rd4, [map2_transpose_min_double_param_2];
ld.param.u32 %r29, [map2_transpose_min_double_param_3];
ld.param.u64 %rd5, [map2_transpose_min_double_param_4];
ld.param.u32 %r30, [map2_transpose_min_double_param_5];
ld.param.u64 %rd6, [map2_transpose_min_double_param_6];
ld.param.u32 %r31, [map2_transpose_min_double_param_7];
cvta.to.global.u64 %rd1, %rd4;
cvta.to.global.u64 %rd2, %rd5;
cvta.to.global.u64 %rd3, %rd6;
.loc 2 71 1
mov.u32 %r1, %ntid.x;
mov.u32 %r32, %ctaid.y;
mov.u32 %r33, %ntid.y;
mul.lo.s32 %r62, %r32, %r33;
.loc 2 71 1
setp.ge.s32 %p2, %r62, %r28;
@%p2 bra BB79_15;
BB79_1:
.loc 2 71 1
mov.u32 %r35, %ctaid.x;
mul.lo.s32 %r63, %r35, %r1;
.loc 2 71 1
setp.ge.s32 %p3, %r63, %r27;
@%p3 bra BB79_14;
.loc 2 71 1
add.s32 %r37, %r62, 32;
.loc 4 2621 10
min.s32 %r4, %r28, %r37;
BB79_3:
.loc 2 71 1
add.s32 %r40, %r63, 32;
.loc 4 2621 10
min.s32 %r7, %r27, %r40;
.loc 2 71 1
mov.u32 %r41, %tid.y;
add.s32 %r64, %r41, %r63;
.loc 2 71 1
setp.ge.s32 %p4, %r64, %r7;
@%p4 bra BB79_8;
BB79_4:
.loc 2 71 1
mov.u32 %r42, %tid.x;
.loc 2 71 1
add.s32 %r65, %r42, %r62;
.loc 2 71 1
setp.ge.s32 %p5, %r65, %r4;
@%p5 bra BB79_7;
.loc 2 71 1
mul.lo.s32 %r10, %r64, %r31;
sub.s32 %r11, %r64, %r63;
BB79_6:
add.s32 %r45, %r65, %r10;
mul.wide.s32 %rd7, %r45, 8;
add.s64 %rd8, %rd3, %rd7;
.loc 2 71 1
sub.s32 %r46, %r65, %r62;
mul.wide.s32 %rd9, %r11, 264;
mov.u64 %rd10, map2_transpose_min_double$__cuda_local_var_32587_1747_non_const_tile;
add.s64 %rd11, %rd10, %rd9;
mul.wide.s32 %rd12, %r46, 8;
add.s64 %rd13, %rd11, %rd12;
.loc 2 71 1
ld.global.f64 %fd1, [%rd8];
st.shared.f64 [%rd13], %fd1;
.loc 2 71 58
add.s32 %r65, %r1, %r65;
.loc 2 71 1
setp.lt.s32 %p6, %r65, %r4;
@%p6 bra BB79_6;
BB79_7:
.loc 2 71 58
add.s32 %r64, %r33, %r64;
.loc 2 71 1
setp.lt.s32 %p7, %r64, %r7;
@%p7 bra BB79_4;
BB79_8:
.loc 2 71 1
add.s32 %r66, %r41, %r62;
setp.lt.s32 %p1, %r66, %r4;
.loc 2 71 1
bar.sync 0;
.loc 2 71 1
@!%p1 bra BB79_13;
bra.uni BB79_9;
BB79_9:
.loc 2 71 1
mov.u32 %r51, %tid.x;
add.s32 %r67, %r51, %r63;
.loc 2 71 1
setp.ge.s32 %p8, %r67, %r7;
@%p8 bra BB79_12;
.loc 2 71 1
mul.lo.s32 %r18, %r66, %r30;
sub.s32 %r19, %r66, %r62;
.loc 2 71 176
mul.lo.s32 %r20, %r66, %r29;
BB79_11:
.loc 2 71 1
add.s32 %r54, %r67, %r18;
mul.wide.s32 %rd14, %r54, 8;
add.s64 %rd15, %rd2, %rd14;
.loc 2 71 1
sub.s32 %r55, %r67, %r63;
mul.wide.s32 %rd16, %r55, 264;
mov.u64 %rd17, map2_transpose_min_double$__cuda_local_var_32587_1747_non_const_tile;
add.s64 %rd18, %rd17, %rd16;
mul.wide.s32 %rd19, %r19, 8;
add.s64 %rd20, %rd18, %rd19;
.loc 2 71 1
ld.shared.f64 %fd2, [%rd20];
ld.global.f64 %fd3, [%rd15];
.loc 4 2780 10
min.f64 %fd4, %fd3, %fd2;
.loc 2 71 176
add.s32 %r56, %r67, %r20;
mul.wide.s32 %rd21, %r56, 8;
add.s64 %rd22, %rd1, %rd21;
.loc 2 71 176
st.global.f64 [%rd22], %fd4;
.loc 2 71 58
add.s32 %r67, %r1, %r67;
.loc 2 71 1
setp.lt.s32 %p9, %r67, %r7;
@%p9 bra BB79_11;
BB79_12:
.loc 2 71 58
add.s32 %r66, %r33, %r66;
.loc 2 71 1
setp.lt.s32 %p10, %r66, %r4;
@%p10 bra BB79_9;
BB79_13:
.loc 2 71 1
bar.sync 0;
.loc 2 71 1
mov.u32 %r59, %nctaid.x;
.loc 2 71 21
mad.lo.s32 %r63, %r59, %r1, %r63;
.loc 2 71 1
setp.lt.s32 %p11, %r63, %r27;
@%p11 bra BB79_3;
BB79_14:
.loc 2 71 1
mov.u32 %r60, %nctaid.y;
.loc 2 71 21
mad.lo.s32 %r62, %r60, %r33, %r62;
.loc 2 71 1
setp.lt.s32 %p12, %r62, %r28;
@%p12 bra BB79_1;
BB79_15:
.loc 2 71 2
ret;
}
.visible .entry map2_set_double(
.param .u32 map2_set_double_param_0,
.param .u32 map2_set_double_param_1,
.param .u64 map2_set_double_param_2,
.param .u32 map2_set_double_param_3,
.param .u64 map2_set_double_param_4,
.param .u32 map2_set_double_param_5,
.param .u64 map2_set_double_param_6,
.param .u32 map2_set_double_param_7
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<2>;
ld.param.u32 %r12, [map2_set_double_param_0];
ld.param.u32 %r13, [map2_set_double_param_1];
ld.param.u64 %rd3, [map2_set_double_param_2];
ld.param.u32 %r14, [map2_set_double_param_3];
ld.param.u64 %rd4, [map2_set_double_param_6];
ld.param.u32 %r15, [map2_set_double_param_7];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 72 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 72 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB80_6;
.loc 2 72 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 72 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 72 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB80_2:
.loc 2 72 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB80_5;
.loc 2 72 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB80_4:
.loc 2 72 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 72 1
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 72 1
ld.global.f64 %fd1, [%rd6];
st.global.f64 [%rd8], %fd1;
.loc 2 72 22
add.s32 %r10, %r5, %r9;
.loc 2 72 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB80_4;
BB80_5:
.loc 2 72 22
add.s32 %r25, %r4, %r25;
.loc 2 72 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB80_2;
BB80_6:
.loc 2 72 2
ret;
}
.visible .entry map2_v_s_set_double(
.param .u32 map2_v_s_set_double_param_0,
.param .u32 map2_v_s_set_double_param_1,
.param .u64 map2_v_s_set_double_param_2,
.param .u32 map2_v_s_set_double_param_3,
.param .u64 map2_v_s_set_double_param_4,
.param .u32 map2_v_s_set_double_param_5,
.param .f64 map2_v_s_set_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<24>;
.reg .s64 %rd<5>;
.reg .f64 %fd<2>;
ld.param.u32 %r11, [map2_v_s_set_double_param_0];
ld.param.u32 %r12, [map2_v_s_set_double_param_1];
ld.param.u64 %rd2, [map2_v_s_set_double_param_2];
ld.param.u32 %r13, [map2_v_s_set_double_param_3];
ld.param.f64 %fd1, [map2_v_s_set_double_param_6];
cvta.to.global.u64 %rd1, %rd2;
.loc 2 72 1
mov.u32 %r1, %ntid.x;
mov.u32 %r14, %ctaid.x;
mov.u32 %r15, %tid.x;
mad.lo.s32 %r22, %r1, %r14, %r15;
.loc 2 72 1
setp.ge.s32 %p1, %r22, %r12;
@%p1 bra BB81_6;
.loc 2 72 1
mov.u32 %r16, %tid.y;
mov.u32 %r17, %ntid.y;
mov.u32 %r18, %ctaid.y;
mad.lo.s32 %r3, %r17, %r18, %r16;
.loc 2 72 22
mov.u32 %r19, %nctaid.x;
mul.lo.s32 %r4, %r19, %r1;
.loc 2 72 22
mov.u32 %r20, %nctaid.y;
mul.lo.s32 %r5, %r20, %r17;
BB81_2:
.loc 2 72 1
setp.ge.s32 %p2, %r3, %r11;
@%p2 bra BB81_5;
.loc 2 72 1
mul.lo.s32 %r7, %r22, %r13;
mov.u32 %r23, %r3;
BB81_4:
.loc 2 72 1
mov.u32 %r8, %r23;
add.s32 %r21, %r8, %r7;
mul.wide.s32 %rd3, %r21, 8;
add.s64 %rd4, %rd1, %rd3;
.loc 2 72 1
st.global.f64 [%rd4], %fd1;
.loc 2 72 22
add.s32 %r9, %r5, %r8;
.loc 2 72 1
setp.lt.s32 %p3, %r9, %r11;
mov.u32 %r23, %r9;
@%p3 bra BB81_4;
BB81_5:
.loc 2 72 22
add.s32 %r22, %r4, %r22;
.loc 2 72 1
setp.lt.s32 %p4, %r22, %r12;
@%p4 bra BB81_2;
BB81_6:
.loc 2 72 2
ret;
}
.visible .entry map2_s_v_set_double(
.param .u32 map2_s_v_set_double_param_0,
.param .u32 map2_s_v_set_double_param_1,
.param .u64 map2_s_v_set_double_param_2,
.param .u32 map2_s_v_set_double_param_3,
.param .f64 map2_s_v_set_double_param_4,
.param .u64 map2_s_v_set_double_param_5,
.param .u32 map2_s_v_set_double_param_6
)
{
.reg .pred %p<5>;
.reg .s32 %r<27>;
.reg .s64 %rd<9>;
.reg .f64 %fd<2>;
ld.param.u32 %r12, [map2_s_v_set_double_param_0];
ld.param.u32 %r13, [map2_s_v_set_double_param_1];
ld.param.u64 %rd3, [map2_s_v_set_double_param_2];
ld.param.u32 %r14, [map2_s_v_set_double_param_3];
ld.param.u64 %rd4, [map2_s_v_set_double_param_5];
ld.param.u32 %r15, [map2_s_v_set_double_param_6];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 72 1
mov.u32 %r1, %ntid.x;
mov.u32 %r16, %ctaid.x;
mov.u32 %r17, %tid.x;
mad.lo.s32 %r25, %r1, %r16, %r17;
.loc 2 72 1
setp.ge.s32 %p1, %r25, %r13;
@%p1 bra BB82_6;
.loc 2 72 1
mov.u32 %r18, %tid.y;
mov.u32 %r19, %ntid.y;
mov.u32 %r20, %ctaid.y;
mad.lo.s32 %r3, %r19, %r20, %r18;
.loc 2 72 22
mov.u32 %r21, %nctaid.x;
mul.lo.s32 %r4, %r21, %r1;
.loc 2 72 22
mov.u32 %r22, %nctaid.y;
mul.lo.s32 %r5, %r22, %r19;
BB82_2:
.loc 2 72 1
setp.ge.s32 %p2, %r3, %r12;
@%p2 bra BB82_5;
.loc 2 72 1
mul.lo.s32 %r7, %r25, %r15;
mul.lo.s32 %r8, %r25, %r14;
mov.u32 %r26, %r3;
BB82_4:
.loc 2 72 1
mov.u32 %r9, %r26;
add.s32 %r23, %r9, %r7;
mul.wide.s32 %rd5, %r23, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 72 1
add.s32 %r24, %r9, %r8;
mul.wide.s32 %rd7, %r24, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 72 1
ld.global.f64 %fd1, [%rd6];
st.global.f64 [%rd8], %fd1;
.loc 2 72 22
add.s32 %r10, %r5, %r9;
.loc 2 72 1
setp.lt.s32 %p3, %r10, %r12;
mov.u32 %r26, %r10;
@%p3 bra BB82_4;
BB82_5:
.loc 2 72 22
add.s32 %r25, %r4, %r25;
.loc 2 72 1
setp.lt.s32 %p4, %r25, %r13;
@%p4 bra BB82_2;
BB82_6:
.loc 2 72 2
ret;
}
.visible .entry map2_transpose_set_double(
.param .u32 map2_transpose_set_double_param_0,
.param .u32 map2_transpose_set_double_param_1,
.param .u64 map2_transpose_set_double_param_2,
.param .u32 map2_transpose_set_double_param_3,
.param .u64 map2_transpose_set_double_param_4,
.param .u32 map2_transpose_set_double_param_5,
.param .u64 map2_transpose_set_double_param_6,
.param .u32 map2_transpose_set_double_param_7
)
{
.reg .pred %p<13>;
.reg .s32 %r<65>;
.reg .s64 %rd<19>;
.reg .f64 %fd<3>;
// demoted variable
.shared .align 8 .b8 map2_transpose_set_double$__cuda_local_var_32588_1747_non_const_tile[8448];
ld.param.u32 %r26, [map2_transpose_set_double_param_0];
ld.param.u32 %r27, [map2_transpose_set_double_param_1];
ld.param.u64 %rd3, [map2_transpose_set_double_param_2];
ld.param.u32 %r28, [map2_transpose_set_double_param_3];
ld.param.u64 %rd4, [map2_transpose_set_double_param_6];
ld.param.u32 %r29, [map2_transpose_set_double_param_7];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 72 1
mov.u32 %r1, %ntid.x;
mov.u32 %r30, %ctaid.y;
mov.u32 %r31, %ntid.y;
mul.lo.s32 %r59, %r30, %r31;
.loc 2 72 1
setp.ge.s32 %p2, %r59, %r27;
@%p2 bra BB83_15;
BB83_1:
.loc 2 72 1
mov.u32 %r33, %ctaid.x;
mul.lo.s32 %r60, %r33, %r1;
.loc 2 72 1
setp.ge.s32 %p3, %r60, %r26;
@%p3 bra BB83_14;
.loc 2 72 1
add.s32 %r35, %r59, 32;
.loc 4 2621 10
min.s32 %r4, %r27, %r35;
BB83_3:
.loc 2 72 1
add.s32 %r38, %r60, 32;
.loc 4 2621 10
min.s32 %r7, %r26, %r38;
.loc 2 72 1
mov.u32 %r39, %tid.y;
add.s32 %r61, %r39, %r60;
.loc 2 72 1
setp.ge.s32 %p4, %r61, %r7;
@%p4 bra BB83_8;
BB83_4:
.loc 2 72 1
mov.u32 %r40, %tid.x;
.loc 2 72 1
add.s32 %r62, %r40, %r59;
.loc 2 72 1
setp.ge.s32 %p5, %r62, %r4;
@%p5 bra BB83_7;
.loc 2 72 1
mul.lo.s32 %r10, %r61, %r29;
sub.s32 %r11, %r61, %r60;
BB83_6:
add.s32 %r43, %r62, %r10;
mul.wide.s32 %rd5, %r43, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 72 1
sub.s32 %r44, %r62, %r59;
mul.wide.s32 %rd7, %r11, 264;
mov.u64 %rd8, map2_transpose_set_double$__cuda_local_var_32588_1747_non_const_tile;
add.s64 %rd9, %rd8, %rd7;
mul.wide.s32 %rd10, %r44, 8;
add.s64 %rd11, %rd9, %rd10;
.loc 2 72 1
ld.global.f64 %fd1, [%rd6];
st.shared.f64 [%rd11], %fd1;
.loc 2 72 58
add.s32 %r62, %r1, %r62;
.loc 2 72 1
setp.lt.s32 %p6, %r62, %r4;
@%p6 bra BB83_6;
BB83_7:
.loc 2 72 58
add.s32 %r61, %r31, %r61;
.loc 2 72 1
setp.lt.s32 %p7, %r61, %r7;
@%p7 bra BB83_4;
BB83_8:
.loc 2 72 1
add.s32 %r63, %r39, %r59;
setp.lt.s32 %p1, %r63, %r4;
.loc 2 72 1
bar.sync 0;
.loc 2 72 1
@!%p1 bra BB83_13;
bra.uni BB83_9;
BB83_9:
.loc 2 72 1
mov.u32 %r49, %tid.x;
add.s32 %r64, %r49, %r60;
.loc 2 72 1
setp.ge.s32 %p8, %r64, %r7;
@%p8 bra BB83_12;
.loc 2 72 1
sub.s32 %r18, %r63, %r59;
mul.lo.s32 %r19, %r63, %r28;
BB83_11:
sub.s32 %r52, %r64, %r60;
mul.wide.s32 %rd12, %r52, 264;
mov.u64 %rd13, map2_transpose_set_double$__cuda_local_var_32588_1747_non_const_tile;
add.s64 %rd14, %rd13, %rd12;
mul.wide.s32 %rd15, %r18, 8;
add.s64 %rd16, %rd14, %rd15;
.loc 2 72 1
add.s32 %r53, %r64, %r19;
mul.wide.s32 %rd17, %r53, 8;
add.s64 %rd18, %rd1, %rd17;
.loc 2 72 1
ld.shared.f64 %fd2, [%rd16];
st.global.f64 [%rd18], %fd2;
.loc 2 72 58
add.s32 %r64, %r1, %r64;
.loc 2 72 1
setp.lt.s32 %p9, %r64, %r7;
@%p9 bra BB83_11;
BB83_12:
.loc 2 72 58
add.s32 %r63, %r31, %r63;
.loc 2 72 1
setp.lt.s32 %p10, %r63, %r4;
@%p10 bra BB83_9;
BB83_13:
.loc 2 72 1
bar.sync 0;
.loc 2 72 1
mov.u32 %r56, %nctaid.x;
.loc 2 72 21
mad.lo.s32 %r60, %r56, %r1, %r60;
.loc 2 72 1
setp.lt.s32 %p11, %r60, %r26;
@%p11 bra BB83_3;
BB83_14:
.loc 2 72 1
mov.u32 %r57, %nctaid.y;
.loc 2 72 21
mad.lo.s32 %r59, %r57, %r31, %r59;
.loc 2 72 1
setp.lt.s32 %p12, %r59, %r27;
@%p12 bra BB83_1;
BB83_15:
.loc 2 72 2
ret;
}
.visible .entry reduce_add_double(
.param .u32 reduce_add_double_param_0,
.param .u32 reduce_add_double_param_1,
.param .u64 reduce_add_double_param_2,
.param .u64 reduce_add_double_param_3,
.param .u32 reduce_add_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<50>;
.reg .s64 %rd<9>;
.reg .f64 %fd<15>;
ld.param.u32 %r16, [reduce_add_double_param_0];
ld.param.u32 %r17, [reduce_add_double_param_1];
ld.param.u64 %rd3, [reduce_add_double_param_2];
ld.param.u64 %rd4, [reduce_add_double_param_3];
ld.param.u32 %r18, [reduce_add_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 74 1
mov.u32 %r1, %ntid.y;
mov.u32 %r19, %ctaid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r47, %r1, %r19, %r20;
.loc 2 74 1
setp.lt.s32 %p1, %r47, %r17;
.loc 2 74 1
mov.u32 %r3, %ntid.x;
.loc 2 74 1
@%p1 bra BB84_2;
mov.f64 %fd14, 0d0000000000000000;
bra.uni BB84_7;
BB84_2:
.loc 2 74 1
mov.u32 %r21, %tid.x;
mov.u32 %r22, %ctaid.x;
mad.lo.s32 %r4, %r3, %r22, %r21;
.loc 2 74 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r1;
.loc 2 74 22
mov.u32 %r24, %nctaid.x;
mul.lo.s32 %r6, %r24, %r3;
mov.f64 %fd14, 0d0000000000000000;
BB84_3:
.loc 2 74 1
setp.ge.s32 %p2, %r4, %r16;
@%p2 bra BB84_6;
.loc 2 74 1
mul.lo.s32 %r8, %r47, %r18;
mov.u32 %r48, %r4;
BB84_5:
.loc 2 74 1
mov.u32 %r9, %r48;
add.s32 %r25, %r9, %r8;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd2, %rd5;
ld.global.f64 %fd11, [%rd6];
add.f64 %fd14, %fd14, %fd11;
.loc 2 74 22
add.s32 %r10, %r6, %r9;
.loc 2 74 1
setp.lt.s32 %p3, %r10, %r16;
mov.u32 %r48, %r10;
@%p3 bra BB84_5;
BB84_6:
.loc 2 74 22
add.s32 %r47, %r5, %r47;
.loc 2 74 1
setp.lt.s32 %p4, %r47, %r17;
@%p4 bra BB84_3;
BB84_7:
.loc 2 74 1
bar.sync 0;
.loc 2 74 1
setp.lt.u32 %p5, %r3, 2;
@%p5 bra BB84_10;
.loc 2 74 1
mov.u32 %r27, WARP_SZ;
mov.u32 %r28, 32;
.loc 5 109 1
sub.s32 %r29, %r28, %r27;
shl.b32 %r30, %r29, 8;
or.b32 %r13, %r30, 31;
mov.u32 %r49, 1;
BB84_9:
.loc 1 115 1
// inline asm
mov.b64 { %r31, %r32 }, %fd14;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r33, %r32, %r49, %r13;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r37, %r31, %r49, %r13;
// inline asm
.loc 4 3330 10
mov.b64 %fd13, {%r37, %r33};
.loc 2 74 1
add.f64 %fd14, %fd14, %fd13;
.loc 2 74 40
shl.b32 %r49, %r49, 1;
.loc 2 74 1
setp.lt.u32 %p6, %r49, %r3;
@%p6 bra BB84_9;
BB84_10:
.loc 2 74 1
mov.u32 %r41, %tid.x;
and.b32 %r42, %r41, 31;
setp.ne.s32 %p7, %r42, 0;
@%p7 bra BB84_12;
.loc 2 74 1
mov.u32 %r43, %nctaid.y;
mov.u32 %r44, %ctaid.x;
mad.lo.s32 %r46, %r43, %r44, %r19;
mul.wide.u32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
st.global.f64 [%rd8], %fd14;
BB84_12:
.loc 2 74 2
ret;
}
.visible .entry reduce_col_add_double(
.param .u32 reduce_col_add_double_param_0,
.param .u32 reduce_col_add_double_param_1,
.param .u64 reduce_col_add_double_param_2,
.param .u64 reduce_col_add_double_param_3,
.param .u32 reduce_col_add_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<40>;
.reg .s64 %rd<9>;
.reg .f64 %fd<13>;
ld.param.u32 %r15, [reduce_col_add_double_param_0];
ld.param.u32 %r16, [reduce_col_add_double_param_1];
ld.param.u64 %rd3, [reduce_col_add_double_param_2];
ld.param.u64 %rd4, [reduce_col_add_double_param_3];
ld.param.u32 %r17, [reduce_col_add_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 74 1
mov.u32 %r1, %ntid.y;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r37, %r1, %r18, %r19;
.loc 2 74 1
setp.ge.s32 %p1, %r37, %r16;
@%p1 bra BB85_12;
.loc 2 74 1
mov.u32 %r3, %tid.x;
.loc 2 74 1
mov.u32 %r4, %ntid.x;
.loc 2 74 1
and.b32 %r5, %r3, 31;
.loc 2 74 22
mov.u32 %r20, %nctaid.x;
mul.lo.s32 %r6, %r20, %r1;
.loc 2 74 1
mov.u32 %r21, WARP_SZ;
mov.u32 %r22, 32;
.loc 5 109 1
sub.s32 %r23, %r22, %r21;
shl.b32 %r24, %r23, 8;
or.b32 %r7, %r24, 31;
BB85_2:
setp.lt.s32 %p2, %r3, %r15;
.loc 2 74 1
@%p2 bra BB85_4;
mov.f64 %fd12, 0d0000000000000000;
bra.uni BB85_6;
BB85_4:
.loc 2 74 1
mul.lo.s32 %r9, %r37, %r17;
mov.f64 %fd12, 0d0000000000000000;
mov.u32 %r38, %r3;
BB85_5:
.loc 2 74 1
mov.u32 %r10, %r38;
add.s32 %r25, %r10, %r9;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 74 1
ld.global.f64 %fd9, [%rd6];
add.f64 %fd12, %fd12, %fd9;
.loc 2 74 22
add.s32 %r11, %r4, %r10;
.loc 2 74 1
setp.lt.s32 %p3, %r11, %r15;
mov.u32 %r38, %r11;
@%p3 bra BB85_5;
BB85_6:
.loc 2 74 1
bar.sync 0;
.loc 2 74 1
setp.lt.u32 %p4, %r4, 2;
@%p4 bra BB85_9;
mov.u32 %r39, 1;
BB85_8:
.loc 1 115 1
// inline asm
mov.b64 { %r27, %r28 }, %fd12;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r29, %r28, %r39, %r7;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r33, %r27, %r39, %r7;
// inline asm
.loc 4 3330 10
mov.b64 %fd11, {%r33, %r29};
.loc 2 74 1
add.f64 %fd12, %fd12, %fd11;
.loc 2 74 40
shl.b32 %r39, %r39, 1;
.loc 2 74 1
setp.lt.u32 %p5, %r39, %r4;
@%p5 bra BB85_8;
BB85_9:
.loc 2 74 1
setp.ne.s32 %p6, %r5, 0;
@%p6 bra BB85_11;
mul.wide.s32 %rd7, %r37, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 74 1
st.global.f64 [%rd8], %fd12;
BB85_11:
.loc 2 74 22
add.s32 %r37, %r6, %r37;
.loc 2 74 1
setp.lt.s32 %p7, %r37, %r16;
@%p7 bra BB85_2;
BB85_12:
.loc 2 74 2
ret;
}
.visible .entry reduce_row_add_double(
.param .u32 reduce_row_add_double_param_0,
.param .u32 reduce_row_add_double_param_1,
.param .u64 reduce_row_add_double_param_2,
.param .u64 reduce_row_add_double_param_3,
.param .u32 reduce_row_add_double_param_4
)
{
.reg .pred %p<5>;
.reg .s32 %r<18>;
.reg .s64 %rd<9>;
.reg .f64 %fd<8>;
ld.param.u32 %r7, [reduce_row_add_double_param_0];
ld.param.u32 %r8, [reduce_row_add_double_param_1];
ld.param.u64 %rd3, [reduce_row_add_double_param_2];
ld.param.u64 %rd4, [reduce_row_add_double_param_3];
ld.param.u32 %r9, [reduce_row_add_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 74 1
mov.u32 %r10, %nctaid.x;
mov.u32 %r11, %ntid.x;
mul.lo.s32 %r1, %r10, %r11;
.loc 2 74 1
mov.u32 %r12, %ctaid.x;
mov.u32 %r13, %tid.x;
mad.lo.s32 %r16, %r11, %r12, %r13;
.loc 2 74 1
setp.ge.s32 %p1, %r16, %r7;
@%p1 bra BB86_5;
BB86_1:
mov.f64 %fd7, 0d0000000000000000;
setp.gt.s32 %p2, %r8, 0;
.loc 2 74 1
@%p2 bra BB86_2;
bra.uni BB86_4;
BB86_2:
mov.u32 %r17, 0;
BB86_3:
.loc 2 74 1
mad.lo.s32 %r15, %r17, %r9, %r16;
mul.wide.s32 %rd5, %r15, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 74 1
ld.global.f64 %fd6, [%rd6];
add.f64 %fd7, %fd7, %fd6;
.loc 2 74 22
add.s32 %r17, %r17, 1;
.loc 2 74 1
setp.lt.s32 %p3, %r17, %r8;
@%p3 bra BB86_3;
BB86_4:
mul.wide.s32 %rd7, %r16, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 74 1
st.global.f64 [%rd8], %fd7;
.loc 2 74 22
add.s32 %r16, %r16, %r1;
.loc 2 74 1
setp.lt.s32 %p4, %r16, %r7;
@%p4 bra BB86_1;
BB86_5:
.loc 2 74 2
ret;
}
.visible .entry reduce_max_double(
.param .u32 reduce_max_double_param_0,
.param .u32 reduce_max_double_param_1,
.param .u64 reduce_max_double_param_2,
.param .u64 reduce_max_double_param_3,
.param .u32 reduce_max_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<50>;
.reg .s64 %rd<9>;
.reg .f64 %fd<15>;
ld.param.u32 %r16, [reduce_max_double_param_0];
ld.param.u32 %r17, [reduce_max_double_param_1];
ld.param.u64 %rd3, [reduce_max_double_param_2];
ld.param.u64 %rd4, [reduce_max_double_param_3];
ld.param.u32 %r18, [reduce_max_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 75 1
mov.u32 %r1, %ntid.y;
mov.u32 %r19, %ctaid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r47, %r1, %r19, %r20;
.loc 2 75 1
setp.lt.s32 %p1, %r47, %r17;
.loc 2 75 1
mov.u32 %r3, %ntid.x;
.loc 2 75 1
@%p1 bra BB87_2;
mov.f64 %fd14, 0dFFF0000000000000;
bra.uni BB87_7;
BB87_2:
.loc 2 75 1
mov.u32 %r21, %tid.x;
mov.u32 %r22, %ctaid.x;
mad.lo.s32 %r4, %r3, %r22, %r21;
.loc 2 75 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r1;
.loc 2 75 22
mov.u32 %r24, %nctaid.x;
mul.lo.s32 %r6, %r24, %r3;
mov.f64 %fd14, 0dFFF0000000000000;
BB87_3:
.loc 2 75 1
setp.ge.s32 %p2, %r4, %r16;
@%p2 bra BB87_6;
.loc 2 75 1
mul.lo.s32 %r8, %r47, %r18;
mov.u32 %r48, %r4;
BB87_5:
.loc 2 75 1
mov.u32 %r9, %r48;
add.s32 %r25, %r9, %r8;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd2, %rd5;
ld.global.f64 %fd11, [%rd6];
.loc 4 2785 10
max.f64 %fd14, %fd14, %fd11;
.loc 2 75 22
add.s32 %r10, %r6, %r9;
.loc 2 75 1
setp.lt.s32 %p3, %r10, %r16;
mov.u32 %r48, %r10;
@%p3 bra BB87_5;
BB87_6:
.loc 2 75 22
add.s32 %r47, %r5, %r47;
.loc 2 75 1
setp.lt.s32 %p4, %r47, %r17;
@%p4 bra BB87_3;
BB87_7:
.loc 2 75 1
bar.sync 0;
.loc 2 75 1
setp.lt.u32 %p5, %r3, 2;
@%p5 bra BB87_10;
.loc 2 75 1
mov.u32 %r27, WARP_SZ;
mov.u32 %r28, 32;
.loc 5 109 1
sub.s32 %r29, %r28, %r27;
shl.b32 %r30, %r29, 8;
or.b32 %r13, %r30, 31;
mov.u32 %r49, 1;
BB87_9:
.loc 1 115 1
// inline asm
mov.b64 { %r31, %r32 }, %fd14;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r33, %r32, %r49, %r13;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r37, %r31, %r49, %r13;
// inline asm
.loc 4 3330 10
mov.b64 %fd13, {%r37, %r33};
.loc 4 2785 10
max.f64 %fd14, %fd14, %fd13;
.loc 2 75 40
shl.b32 %r49, %r49, 1;
.loc 2 75 1
setp.lt.u32 %p6, %r49, %r3;
@%p6 bra BB87_9;
BB87_10:
.loc 2 75 1
mov.u32 %r41, %tid.x;
and.b32 %r42, %r41, 31;
setp.ne.s32 %p7, %r42, 0;
@%p7 bra BB87_12;
.loc 2 75 1
mov.u32 %r43, %nctaid.y;
mov.u32 %r44, %ctaid.x;
mad.lo.s32 %r46, %r43, %r44, %r19;
mul.wide.u32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
st.global.f64 [%rd8], %fd14;
BB87_12:
.loc 2 75 2
ret;
}
.visible .entry reduce_col_max_double(
.param .u32 reduce_col_max_double_param_0,
.param .u32 reduce_col_max_double_param_1,
.param .u64 reduce_col_max_double_param_2,
.param .u64 reduce_col_max_double_param_3,
.param .u32 reduce_col_max_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<40>;
.reg .s64 %rd<9>;
.reg .f64 %fd<13>;
ld.param.u32 %r15, [reduce_col_max_double_param_0];
ld.param.u32 %r16, [reduce_col_max_double_param_1];
ld.param.u64 %rd3, [reduce_col_max_double_param_2];
ld.param.u64 %rd4, [reduce_col_max_double_param_3];
ld.param.u32 %r17, [reduce_col_max_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 75 1
mov.u32 %r1, %ntid.y;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r37, %r1, %r18, %r19;
.loc 2 75 1
setp.ge.s32 %p1, %r37, %r16;
@%p1 bra BB88_12;
.loc 2 75 1
mov.u32 %r3, %tid.x;
.loc 2 75 1
mov.u32 %r4, %ntid.x;
.loc 2 75 1
and.b32 %r5, %r3, 31;
.loc 2 75 22
mov.u32 %r20, %nctaid.x;
mul.lo.s32 %r6, %r20, %r1;
.loc 2 75 1
mov.u32 %r21, WARP_SZ;
mov.u32 %r22, 32;
.loc 5 109 1
sub.s32 %r23, %r22, %r21;
shl.b32 %r24, %r23, 8;
or.b32 %r7, %r24, 31;
BB88_2:
setp.lt.s32 %p2, %r3, %r15;
.loc 2 75 1
@%p2 bra BB88_4;
mov.f64 %fd12, 0dFFF0000000000000;
bra.uni BB88_6;
BB88_4:
.loc 2 75 1
mul.lo.s32 %r9, %r37, %r17;
mov.f64 %fd12, 0dFFF0000000000000;
mov.u32 %r38, %r3;
BB88_5:
.loc 2 75 1
mov.u32 %r10, %r38;
add.s32 %r25, %r10, %r9;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 75 1
ld.global.f64 %fd9, [%rd6];
.loc 4 2785 10
max.f64 %fd12, %fd12, %fd9;
.loc 2 75 22
add.s32 %r11, %r4, %r10;
.loc 2 75 1
setp.lt.s32 %p3, %r11, %r15;
mov.u32 %r38, %r11;
@%p3 bra BB88_5;
BB88_6:
.loc 2 75 1
bar.sync 0;
.loc 2 75 1
setp.lt.u32 %p4, %r4, 2;
@%p4 bra BB88_9;
mov.u32 %r39, 1;
BB88_8:
.loc 1 115 1
// inline asm
mov.b64 { %r27, %r28 }, %fd12;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r29, %r28, %r39, %r7;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r33, %r27, %r39, %r7;
// inline asm
.loc 4 3330 10
mov.b64 %fd11, {%r33, %r29};
.loc 4 2785 10
max.f64 %fd12, %fd12, %fd11;
.loc 2 75 40
shl.b32 %r39, %r39, 1;
.loc 2 75 1
setp.lt.u32 %p5, %r39, %r4;
@%p5 bra BB88_8;
BB88_9:
.loc 2 75 1
setp.ne.s32 %p6, %r5, 0;
@%p6 bra BB88_11;
mul.wide.s32 %rd7, %r37, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 75 1
st.global.f64 [%rd8], %fd12;
BB88_11:
.loc 2 75 22
add.s32 %r37, %r6, %r37;
.loc 2 75 1
setp.lt.s32 %p7, %r37, %r16;
@%p7 bra BB88_2;
BB88_12:
.loc 2 75 2
ret;
}
.visible .entry reduce_row_max_double(
.param .u32 reduce_row_max_double_param_0,
.param .u32 reduce_row_max_double_param_1,
.param .u64 reduce_row_max_double_param_2,
.param .u64 reduce_row_max_double_param_3,
.param .u32 reduce_row_max_double_param_4
)
{
.reg .pred %p<5>;
.reg .s32 %r<18>;
.reg .s64 %rd<9>;
.reg .f64 %fd<8>;
ld.param.u32 %r7, [reduce_row_max_double_param_0];
ld.param.u32 %r8, [reduce_row_max_double_param_1];
ld.param.u64 %rd3, [reduce_row_max_double_param_2];
ld.param.u64 %rd4, [reduce_row_max_double_param_3];
ld.param.u32 %r9, [reduce_row_max_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 75 1
mov.u32 %r10, %nctaid.x;
mov.u32 %r11, %ntid.x;
mul.lo.s32 %r1, %r10, %r11;
.loc 2 75 1
mov.u32 %r12, %ctaid.x;
mov.u32 %r13, %tid.x;
mad.lo.s32 %r16, %r11, %r12, %r13;
.loc 2 75 1
setp.ge.s32 %p1, %r16, %r7;
@%p1 bra BB89_5;
BB89_1:
mov.f64 %fd7, 0dFFF0000000000000;
setp.gt.s32 %p2, %r8, 0;
.loc 2 75 1
@%p2 bra BB89_2;
bra.uni BB89_4;
BB89_2:
mov.u32 %r17, 0;
BB89_3:
.loc 2 75 1
mad.lo.s32 %r15, %r17, %r9, %r16;
mul.wide.s32 %rd5, %r15, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 75 1
ld.global.f64 %fd6, [%rd6];
.loc 4 2785 10
max.f64 %fd7, %fd7, %fd6;
.loc 2 75 22
add.s32 %r17, %r17, 1;
.loc 2 75 1
setp.lt.s32 %p3, %r17, %r8;
@%p3 bra BB89_3;
BB89_4:
mul.wide.s32 %rd7, %r16, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 75 1
st.global.f64 [%rd8], %fd7;
.loc 2 75 22
add.s32 %r16, %r16, %r1;
.loc 2 75 1
setp.lt.s32 %p4, %r16, %r7;
@%p4 bra BB89_1;
BB89_5:
.loc 2 75 2
ret;
}
.visible .entry reduce_min_double(
.param .u32 reduce_min_double_param_0,
.param .u32 reduce_min_double_param_1,
.param .u64 reduce_min_double_param_2,
.param .u64 reduce_min_double_param_3,
.param .u32 reduce_min_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<50>;
.reg .s64 %rd<9>;
.reg .f64 %fd<15>;
ld.param.u32 %r16, [reduce_min_double_param_0];
ld.param.u32 %r17, [reduce_min_double_param_1];
ld.param.u64 %rd3, [reduce_min_double_param_2];
ld.param.u64 %rd4, [reduce_min_double_param_3];
ld.param.u32 %r18, [reduce_min_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 76 1
mov.u32 %r1, %ntid.y;
mov.u32 %r19, %ctaid.y;
mov.u32 %r20, %tid.y;
mad.lo.s32 %r47, %r1, %r19, %r20;
.loc 2 76 1
setp.lt.s32 %p1, %r47, %r17;
.loc 2 76 1
mov.u32 %r3, %ntid.x;
.loc 2 76 1
@%p1 bra BB90_2;
mov.f64 %fd14, 0d7FF0000000000000;
bra.uni BB90_7;
BB90_2:
.loc 2 76 1
mov.u32 %r21, %tid.x;
mov.u32 %r22, %ctaid.x;
mad.lo.s32 %r4, %r3, %r22, %r21;
.loc 2 76 22
mov.u32 %r23, %nctaid.y;
mul.lo.s32 %r5, %r23, %r1;
.loc 2 76 22
mov.u32 %r24, %nctaid.x;
mul.lo.s32 %r6, %r24, %r3;
mov.f64 %fd14, 0d7FF0000000000000;
BB90_3:
.loc 2 76 1
setp.ge.s32 %p2, %r4, %r16;
@%p2 bra BB90_6;
.loc 2 76 1
mul.lo.s32 %r8, %r47, %r18;
mov.u32 %r48, %r4;
BB90_5:
.loc 2 76 1
mov.u32 %r9, %r48;
add.s32 %r25, %r9, %r8;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd2, %rd5;
ld.global.f64 %fd11, [%rd6];
.loc 4 2780 10
min.f64 %fd14, %fd14, %fd11;
.loc 2 76 22
add.s32 %r10, %r6, %r9;
.loc 2 76 1
setp.lt.s32 %p3, %r10, %r16;
mov.u32 %r48, %r10;
@%p3 bra BB90_5;
BB90_6:
.loc 2 76 22
add.s32 %r47, %r5, %r47;
.loc 2 76 1
setp.lt.s32 %p4, %r47, %r17;
@%p4 bra BB90_3;
BB90_7:
.loc 2 76 1
bar.sync 0;
.loc 2 76 1
setp.lt.u32 %p5, %r3, 2;
@%p5 bra BB90_10;
.loc 2 76 1
mov.u32 %r27, WARP_SZ;
mov.u32 %r28, 32;
.loc 5 109 1
sub.s32 %r29, %r28, %r27;
shl.b32 %r30, %r29, 8;
or.b32 %r13, %r30, 31;
mov.u32 %r49, 1;
BB90_9:
.loc 1 115 1
// inline asm
mov.b64 { %r31, %r32 }, %fd14;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r33, %r32, %r49, %r13;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r37, %r31, %r49, %r13;
// inline asm
.loc 4 3330 10
mov.b64 %fd13, {%r37, %r33};
.loc 4 2780 10
min.f64 %fd14, %fd14, %fd13;
.loc 2 76 40
shl.b32 %r49, %r49, 1;
.loc 2 76 1
setp.lt.u32 %p6, %r49, %r3;
@%p6 bra BB90_9;
BB90_10:
.loc 2 76 1
mov.u32 %r41, %tid.x;
and.b32 %r42, %r41, 31;
setp.ne.s32 %p7, %r42, 0;
@%p7 bra BB90_12;
.loc 2 76 1
mov.u32 %r43, %nctaid.y;
mov.u32 %r44, %ctaid.x;
mad.lo.s32 %r46, %r43, %r44, %r19;
mul.wide.u32 %rd7, %r46, 8;
add.s64 %rd8, %rd1, %rd7;
st.global.f64 [%rd8], %fd14;
BB90_12:
.loc 2 76 2
ret;
}
.visible .entry reduce_col_min_double(
.param .u32 reduce_col_min_double_param_0,
.param .u32 reduce_col_min_double_param_1,
.param .u64 reduce_col_min_double_param_2,
.param .u64 reduce_col_min_double_param_3,
.param .u32 reduce_col_min_double_param_4
)
{
.reg .pred %p<8>;
.reg .s32 %r<40>;
.reg .s64 %rd<9>;
.reg .f64 %fd<13>;
ld.param.u32 %r15, [reduce_col_min_double_param_0];
ld.param.u32 %r16, [reduce_col_min_double_param_1];
ld.param.u64 %rd3, [reduce_col_min_double_param_2];
ld.param.u64 %rd4, [reduce_col_min_double_param_3];
ld.param.u32 %r17, [reduce_col_min_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 76 1
mov.u32 %r1, %ntid.y;
mov.u32 %r18, %ctaid.x;
mov.u32 %r19, %tid.y;
mad.lo.s32 %r37, %r1, %r18, %r19;
.loc 2 76 1
setp.ge.s32 %p1, %r37, %r16;
@%p1 bra BB91_12;
.loc 2 76 1
mov.u32 %r3, %tid.x;
.loc 2 76 1
mov.u32 %r4, %ntid.x;
.loc 2 76 1
and.b32 %r5, %r3, 31;
.loc 2 76 22
mov.u32 %r20, %nctaid.x;
mul.lo.s32 %r6, %r20, %r1;
.loc 2 76 1
mov.u32 %r21, WARP_SZ;
mov.u32 %r22, 32;
.loc 5 109 1
sub.s32 %r23, %r22, %r21;
shl.b32 %r24, %r23, 8;
or.b32 %r7, %r24, 31;
BB91_2:
setp.lt.s32 %p2, %r3, %r15;
.loc 2 76 1
@%p2 bra BB91_4;
mov.f64 %fd12, 0d7FF0000000000000;
bra.uni BB91_6;
BB91_4:
.loc 2 76 1
mul.lo.s32 %r9, %r37, %r17;
mov.f64 %fd12, 0d7FF0000000000000;
mov.u32 %r38, %r3;
BB91_5:
.loc 2 76 1
mov.u32 %r10, %r38;
add.s32 %r25, %r10, %r9;
mul.wide.s32 %rd5, %r25, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 76 1
ld.global.f64 %fd9, [%rd6];
.loc 4 2780 10
min.f64 %fd12, %fd12, %fd9;
.loc 2 76 22
add.s32 %r11, %r4, %r10;
.loc 2 76 1
setp.lt.s32 %p3, %r11, %r15;
mov.u32 %r38, %r11;
@%p3 bra BB91_5;
BB91_6:
.loc 2 76 1
bar.sync 0;
.loc 2 76 1
setp.lt.u32 %p4, %r4, 2;
@%p4 bra BB91_9;
mov.u32 %r39, 1;
BB91_8:
.loc 1 115 1
// inline asm
mov.b64 { %r27, %r28 }, %fd12;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r29, %r28, %r39, %r7;
// inline asm
.loc 5 110 1
// inline asm
shfl.down.b32 %r33, %r27, %r39, %r7;
// inline asm
.loc 4 3330 10
mov.b64 %fd11, {%r33, %r29};
.loc 4 2780 10
min.f64 %fd12, %fd12, %fd11;
.loc 2 76 40
shl.b32 %r39, %r39, 1;
.loc 2 76 1
setp.lt.u32 %p5, %r39, %r4;
@%p5 bra BB91_8;
BB91_9:
.loc 2 76 1
setp.ne.s32 %p6, %r5, 0;
@%p6 bra BB91_11;
mul.wide.s32 %rd7, %r37, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 76 1
st.global.f64 [%rd8], %fd12;
BB91_11:
.loc 2 76 22
add.s32 %r37, %r6, %r37;
.loc 2 76 1
setp.lt.s32 %p7, %r37, %r16;
@%p7 bra BB91_2;
BB91_12:
.loc 2 76 2
ret;
}
.visible .entry reduce_row_min_double(
.param .u32 reduce_row_min_double_param_0,
.param .u32 reduce_row_min_double_param_1,
.param .u64 reduce_row_min_double_param_2,
.param .u64 reduce_row_min_double_param_3,
.param .u32 reduce_row_min_double_param_4
)
{
.reg .pred %p<5>;
.reg .s32 %r<18>;
.reg .s64 %rd<9>;
.reg .f64 %fd<8>;
ld.param.u32 %r7, [reduce_row_min_double_param_0];
ld.param.u32 %r8, [reduce_row_min_double_param_1];
ld.param.u64 %rd3, [reduce_row_min_double_param_2];
ld.param.u64 %rd4, [reduce_row_min_double_param_3];
ld.param.u32 %r9, [reduce_row_min_double_param_4];
cvta.to.global.u64 %rd1, %rd3;
cvta.to.global.u64 %rd2, %rd4;
.loc 2 76 1
mov.u32 %r10, %nctaid.x;
mov.u32 %r11, %ntid.x;
mul.lo.s32 %r1, %r10, %r11;
.loc 2 76 1
mov.u32 %r12, %ctaid.x;
mov.u32 %r13, %tid.x;
mad.lo.s32 %r16, %r11, %r12, %r13;
.loc 2 76 1
setp.ge.s32 %p1, %r16, %r7;
@%p1 bra BB92_5;
BB92_1:
mov.f64 %fd7, 0d7FF0000000000000;
setp.gt.s32 %p2, %r8, 0;
.loc 2 76 1
@%p2 bra BB92_2;
bra.uni BB92_4;
BB92_2:
mov.u32 %r17, 0;
BB92_3:
.loc 2 76 1
mad.lo.s32 %r15, %r17, %r9, %r16;
mul.wide.s32 %rd5, %r15, 8;
add.s64 %rd6, %rd2, %rd5;
.loc 2 76 1
ld.global.f64 %fd6, [%rd6];
.loc 4 2780 10
min.f64 %fd7, %fd7, %fd6;
.loc 2 76 22
add.s32 %r17, %r17, 1;
.loc 2 76 1
setp.lt.s32 %p3, %r17, %r8;
@%p3 bra BB92_3;
BB92_4:
mul.wide.s32 %rd7, %r16, 8;
add.s64 %rd8, %rd1, %rd7;
.loc 2 76 1
st.global.f64 [%rd8], %fd7;
.loc 2 76 22
add.s32 %r16, %r16, %r1;
.loc 2 76 1
setp.lt.s32 %p4, %r16, %r7;
@%p4 bra BB92_1;
BB92_5:
.loc 2 76 2
ret;
}
.func (.param .b64 func_retval0) __internal_trig_reduction_slowpathd(
.param .b64 __internal_trig_reduction_slowpathd_param_0,
.param .b64 __internal_trig_reduction_slowpathd_param_1
)
{
.local .align 8 .b8 __local_depot93[40];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<8>;
.reg .s32 %r<48>;
.reg .s64 %rd<90>;
.reg .f64 %fd<3>;
mov.u64 %SPL, __local_depot93;
ld.param.f64 %fd1, [__internal_trig_reduction_slowpathd_param_0];
ld.param.u64 %rd30, [__internal_trig_reduction_slowpathd_param_1];
add.u64 %rd31, %SPL, 0;
{
.reg .b32 %temp;
mov.b64 {%temp, %r1}, %fd1;
}
and.b32 %r46, %r1, -2147483648;
shr.u32 %r3, %r1, 20;
and.b32 %r16, %r3, 2047;
add.s32 %r17, %r16, -1024;
mov.b64 %rd32, %fd1;
shl.b64 %rd33, %rd32, 11;
or.b64 %rd2, %rd33, -9223372036854775808;
shr.u32 %r18, %r17, 6;
mov.u32 %r19, 16;
sub.s32 %r4, %r19, %r18;
mov.u32 %r20, 15;
sub.s32 %r45, %r20, %r18;
mov.u32 %r21, 19;
sub.s32 %r22, %r21, %r18;
mov.u32 %r23, 18;
min.s32 %r6, %r23, %r22;
setp.lt.s32 %p1, %r45, %r6;
@%p1 bra BB93_2;
mov.u64 %rd84, 0;
bra.uni BB93_4;
BB93_2:
sub.s32 %r29, %r20, %r18;
mul.wide.s32 %rd36, %r29, 8;
mov.u64 %rd37, __cudart_i2opi_d;
add.s64 %rd83, %rd37, %rd36;
mov.u64 %rd84, 0;
mov.u64 %rd82, %rd31;
BB93_3:
.pragma "nounroll";
mov.u64 %rd4, %rd82;
ld.const.u64 %rd40, [%rd83];
// inline asm
{
.reg .u32 r0, r1, r2, r3, alo, ahi, blo, bhi, clo, chi;
mov.b64 {alo,ahi}, %rd40;
mov.b64 {blo,bhi}, %rd2;
mov.b64 {clo,chi}, %rd84;
mad.lo.cc.u32 r0, alo, blo, clo;
madc.hi.cc.u32 r1, alo, blo, chi;
madc.hi.u32 r2, alo, bhi, 0;
mad.lo.cc.u32 r1, alo, bhi, r1;
madc.hi.cc.u32 r2, ahi, blo, r2;
madc.hi.u32 r3, ahi, bhi, 0;
mad.lo.cc.u32 r1, ahi, blo, r1;
madc.lo.cc.u32 r2, ahi, bhi, r2;
addc.u32 r3, r3, 0;
mov.b64 %rd38, {r0,r1};
mov.b64 %rd39, {r2,r3};
}
// inline asm
st.local.u64 [%rd4], %rd38;
add.s64 %rd83, %rd83, 8;
add.s64 %rd9, %rd4, 8;
add.s32 %r45, %r45, 1;
setp.lt.s32 %p2, %r45, %r6;
mov.u64 %rd84, %rd39;
mov.u64 %rd82, %rd9;
@%p2 bra BB93_3;
BB93_4:
mov.u32 %r30, 1;
sub.s32 %r31, %r30, %r4;
add.s32 %r32, %r31, %r45;
mul.wide.s32 %rd43, %r32, 8;
add.s64 %rd44, %rd31, %rd43;
st.local.u64 [%rd44], %rd84;
ld.local.u64 %rd85, [%rd31+16];
ld.local.u64 %rd86, [%rd31+24];
and.b32 %r10, %r3, 63;
setp.eq.s32 %p3, %r10, 0;
@%p3 bra BB93_6;
mov.u32 %r33, 64;
sub.s32 %r34, %r33, %r10;
shl.b64 %rd45, %rd86, %r10;
shr.u64 %rd46, %rd85, %r34;
or.b64 %rd86, %rd45, %rd46;
shl.b64 %rd47, %rd85, %r10;
ld.local.u64 %rd48, [%rd31+8];
shr.u64 %rd49, %rd48, %r34;
or.b64 %rd85, %rd49, %rd47;
BB93_6:
shr.u64 %rd50, %rd86, 62;
cvt.u32.u64 %r35, %rd50;
shr.u64 %rd51, %rd85, 62;
shl.b64 %rd52, %rd86, 2;
or.b64 %rd88, %rd52, %rd51;
shl.b64 %rd87, %rd85, 2;
shr.u64 %rd53, %rd86, 61;
cvt.u32.u64 %r36, %rd53;
and.b32 %r37, %r36, 1;
add.s32 %r38, %r37, %r35;
neg.s32 %r39, %r38;
setp.eq.s32 %p4, %r46, 0;
selp.b32 %r40, %r38, %r39, %p4;
st.u32 [%rd30], %r40;
setp.eq.s32 %p5, %r37, 0;
@%p5 bra BB93_8;
mov.u64 %rd57, 0;
// inline asm
{
.reg .u32 r0, r1, r2, r3, a0, a1, a2, a3, b0, b1, b2, b3;
mov.b64 {a0,a1}, %rd57;
mov.b64 {a2,a3}, %rd57;
mov.b64 {b0,b1}, %rd87;
mov.b64 {b2,b3}, %rd88;
sub.cc.u32 r0, a0, b0;
subc.cc.u32 r1, a1, b1;
subc.cc.u32 r2, a2, b2;
subc.u32 r3, a3, b3;
mov.b64 %rd54, {r0,r1};
mov.b64 %rd55, {r2,r3};
}
// inline asm
xor.b32 %r46, %r46, -2147483648;
mov.u64 %rd88, %rd55;
mov.u64 %rd87, %rd54;
BB93_8:
clz.b64 %r47, %rd88;
setp.eq.s32 %p6, %r47, 0;
@%p6 bra BB93_10;
shl.b64 %rd60, %rd88, %r47;
mov.u32 %r41, 64;
sub.s32 %r42, %r41, %r47;
shr.u64 %rd61, %rd87, %r42;
or.b64 %rd88, %rd61, %rd60;
BB93_10:
mov.u64 %rd65, -3958705157555305931;
// inline asm
{
.reg .u32 r0, r1, r2, r3, alo, ahi, blo, bhi;
mov.b64 {alo,ahi}, %rd88;
mov.b64 {blo,bhi}, %rd65;
mul.lo.u32 r0, alo, blo;
mul.hi.u32 r1, alo, blo;
mad.lo.cc.u32 r1, alo, bhi, r1;
madc.hi.u32 r2, alo, bhi, 0;
mad.lo.cc.u32 r1, ahi, blo, r1;
madc.hi.cc.u32 r2, ahi, blo, r2;
madc.hi.u32 r3, ahi, bhi, 0;
mad.lo.cc.u32 r2, ahi, bhi, r2;
addc.u32 r3, r3, 0;
mov.b64 %rd62, {r0,r1};
mov.b64 %rd63, {r2,r3};
}
// inline asm
setp.lt.s64 %p7, %rd63, 1;
mov.u64 %rd89, %rd63;
@%p7 bra BB93_12;
// inline asm
{
.reg .u32 r0, r1, r2, r3, a0, a1, a2, a3, b0, b1, b2, b3;
mov.b64 {a0,a1}, %rd62;
mov.b64 {a2,a3}, %rd63;
mov.b64 {b0,b1}, %rd62;
mov.b64 {b2,b3}, %rd63;
add.cc.u32 r0, a0, b0;
addc.cc.u32 r1, a1, b1;
addc.cc.u32 r2, a2, b2;
addc.u32 r3, a3, b3;
mov.b64 %rd66, {r0,r1};
mov.b64 %rd67, {r2,r3};
}
// inline asm
add.s32 %r47, %r47, 1;
mov.u64 %rd89, %rd67;
BB93_12:
cvt.u64.u32 %rd72, %r46;
shl.b64 %rd73, %rd72, 32;
mov.u32 %r43, 1022;
sub.s32 %r44, %r43, %r47;
cvt.u64.u32 %rd74, %r44;
shl.b64 %rd75, %rd74, 52;
add.s64 %rd76, %rd89, 1;
shr.u64 %rd77, %rd76, 10;
add.s64 %rd78, %rd77, 1;
shr.u64 %rd79, %rd78, 1;
add.s64 %rd80, %rd75, %rd79;
or.b64 %rd81, %rd80, %rd73;
mov.b64 %fd2, %rd81;
st.param.f64 [func_retval0+0], %fd2;
ret;
}
.func (.param .b64 func_retval0) __internal_accurate_pow(
.param .b64 __internal_accurate_pow_param_0,
.param .b64 __internal_accurate_pow_param_1
)
{
.reg .pred %p<11>;
.reg .f32 %f<5>;
.reg .s32 %r<39>;
.reg .f64 %fd<144>;
ld.param.f64 %fd14, [__internal_accurate_pow_param_0];
ld.param.f64 %fd15, [__internal_accurate_pow_param_1];
{
.reg .b32 %temp;
mov.b64 {%temp, %r35}, %fd14;
}
{
.reg .b32 %temp;
mov.b64 {%r34, %temp}, %fd14;
}
shr.u32 %r18, %r35, 20;
and.b32 %r36, %r18, 2047;
setp.ne.s32 %p1, %r36, 0;
@%p1 bra BB94_2;
mul.f64 %fd16, %fd14, 0d4350000000000000;
{
.reg .b32 %temp;
mov.b64 {%temp, %r35}, %fd16;
}
{
.reg .b32 %temp;
mov.b64 {%r34, %temp}, %fd16;
}
shr.u32 %r19, %r35, 20;
and.b32 %r20, %r19, 2047;
add.s32 %r36, %r20, -54;
BB94_2:
add.s32 %r37, %r36, -1023;
and.b32 %r21, %r35, -2146435073;
or.b32 %r22, %r21, 1072693248;
mov.b64 %fd141, {%r34, %r22};
setp.lt.u32 %p2, %r22, 1073127583;
@%p2 bra BB94_4;
{
.reg .b32 %temp;
mov.b64 {%r23, %temp}, %fd141;
}
{
.reg .b32 %temp;
mov.b64 {%temp, %r24}, %fd141;
}
add.s32 %r25, %r24, -1048576;
mov.b64 %fd141, {%r23, %r25};
add.s32 %r37, %r36, -1022;
BB94_4:
add.f64 %fd17, %fd141, 0d3FF0000000000000;
mov.f64 %fd19, 0d3FF0000000000000;
// inline asm
cvt.rn.f32.f64 %f1,%fd17;
// inline asm
// inline asm
rcp.approx.ftz.f32 %f2,%f1;
// inline asm
// inline asm
cvt.f64.f32 %fd18,%f2;
// inline asm
neg.f64 %fd20, %fd17;
fma.rn.f64 %fd21, %fd20, %fd18, %fd19;
fma.rn.f64 %fd22, %fd21, %fd21, %fd21;
fma.rn.f64 %fd23, %fd22, %fd18, %fd18;
add.f64 %fd24, %fd141, 0dBFF0000000000000;
mul.f64 %fd25, %fd24, %fd23;
fma.rn.f64 %fd26, %fd24, %fd23, %fd25;
mul.f64 %fd27, %fd26, %fd26;
mov.f64 %fd28, 0d3ED0F5D241AD3B5A;
mov.f64 %fd29, 0d3EB0F5FF7D2CAFE2;
fma.rn.f64 %fd30, %fd29, %fd27, %fd28;
mov.f64 %fd31, 0d3EF3B20A75488A3F;
fma.rn.f64 %fd32, %fd30, %fd27, %fd31;
mov.f64 %fd33, 0d3F1745CDE4FAECD5;
fma.rn.f64 %fd34, %fd32, %fd27, %fd33;
mov.f64 %fd35, 0d3F3C71C7258A578B;
fma.rn.f64 %fd36, %fd34, %fd27, %fd35;
mov.f64 %fd37, 0d3F6249249242B910;
fma.rn.f64 %fd38, %fd36, %fd27, %fd37;
mov.f64 %fd39, 0d3F89999999999DFB;
fma.rn.f64 %fd40, %fd38, %fd27, %fd39;
sub.f64 %fd41, %fd24, %fd26;
add.f64 %fd42, %fd41, %fd41;
neg.f64 %fd43, %fd26;
fma.rn.f64 %fd44, %fd43, %fd24, %fd42;
mul.f64 %fd45, %fd23, %fd44;
fma.rn.f64 %fd46, %fd40, %fd27, 0d3FB5555555555555;
mov.f64 %fd47, 0d3FB5555555555555;
sub.f64 %fd48, %fd47, %fd46;
fma.rn.f64 %fd49, %fd40, %fd27, %fd48;
add.f64 %fd50, %fd49, 0d0000000000000000;
add.f64 %fd51, %fd50, 0dBC46A4CB00B9E7B0;
add.f64 %fd52, %fd46, %fd51;
sub.f64 %fd53, %fd46, %fd52;
add.f64 %fd54, %fd53, %fd51;
mul.rn.f64 %fd55, %fd52, %fd26;
neg.f64 %fd56, %fd55;
fma.rn.f64 %fd57, %fd52, %fd26, %fd56;
fma.rn.f64 %fd58, %fd52, %fd45, %fd57;
fma.rn.f64 %fd59, %fd54, %fd26, %fd58;
add.f64 %fd60, %fd55, %fd59;
sub.f64 %fd61, %fd55, %fd60;
add.f64 %fd62, %fd61, %fd59;
mul.rn.f64 %fd63, %fd60, %fd26;
neg.f64 %fd64, %fd63;
fma.rn.f64 %fd65, %fd60, %fd26, %fd64;
fma.rn.f64 %fd66, %fd60, %fd45, %fd65;
fma.rn.f64 %fd67, %fd62, %fd26, %fd66;
add.f64 %fd68, %fd63, %fd67;
sub.f64 %fd69, %fd63, %fd68;
add.f64 %fd70, %fd69, %fd67;
mul.rn.f64 %fd71, %fd68, %fd26;
neg.f64 %fd72, %fd71;
fma.rn.f64 %fd73, %fd68, %fd26, %fd72;
fma.rn.f64 %fd74, %fd68, %fd45, %fd73;
fma.rn.f64 %fd75, %fd70, %fd26, %fd74;
add.f64 %fd76, %fd71, %fd75;
sub.f64 %fd77, %fd71, %fd76;
add.f64 %fd78, %fd77, %fd75;
add.f64 %fd79, %fd26, %fd76;
sub.f64 %fd80, %fd26, %fd79;
add.f64 %fd81, %fd80, %fd76;
add.f64 %fd82, %fd81, %fd78;
add.f64 %fd83, %fd82, %fd45;
add.f64 %fd84, %fd79, %fd83;
sub.f64 %fd85, %fd79, %fd84;
add.f64 %fd86, %fd85, %fd83;
cvt.rn.f64.s32 %fd87, %r37;
mov.f64 %fd88, 0d3FE62E42FEFA3000;
mul.rn.f64 %fd89, %fd87, %fd88;
mov.f64 %fd90, 0d3D53DE6AF278ECE6;
mul.rn.f64 %fd91, %fd87, %fd90;
add.f64 %fd92, %fd89, %fd84;
sub.f64 %fd93, %fd89, %fd92;
add.f64 %fd94, %fd93, %fd84;
add.f64 %fd95, %fd94, %fd86;
add.f64 %fd96, %fd95, %fd91;
add.f64 %fd97, %fd92, %fd96;
sub.f64 %fd98, %fd92, %fd97;
add.f64 %fd99, %fd98, %fd96;
abs.f64 %fd100, %fd15;
setp.gt.f64 %p3, %fd100, 0d7F0D2A1BE4048F90;
mul.f64 %fd101, %fd15, 0d3F20000000000000;
selp.f64 %fd102, %fd101, %fd15, %p3;
mul.rn.f64 %fd103, %fd97, %fd102;
neg.f64 %fd104, %fd103;
fma.rn.f64 %fd105, %fd97, %fd102, %fd104;
fma.rn.f64 %fd106, %fd99, %fd102, %fd105;
add.f64 %fd4, %fd103, %fd106;
sub.f64 %fd107, %fd103, %fd4;
add.f64 %fd5, %fd107, %fd106;
{
.reg .b32 %temp;
mov.b64 {%temp, %r13}, %fd4;
}
setp.lt.u32 %p4, %r13, 1082535491;
setp.lt.s32 %p5, %r13, -1064875759;
or.pred %p6, %p4, %p5;
@%p6 bra BB94_6;
setp.lt.s32 %p7, %r13, 0;
selp.f64 %fd108, 0d0000000000000000, 0d7FF0000000000000, %p7;
abs.f64 %fd109, %fd4;
setp.gtu.f64 %p8, %fd109, 0d7FF0000000000000;
add.f64 %fd110, %fd4, %fd4;
selp.f64 %fd143, %fd110, %fd108, %p8;
bra.uni BB94_10;
BB94_6:
mul.f64 %fd111, %fd4, 0d3FF71547652B82FE;
cvt.rni.f64.f64 %fd112, %fd111;
cvt.rzi.s32.f64 %r14, %fd112;
mov.f64 %fd113, 0dBFE62E42FEFA39EF;
fma.rn.f64 %fd114, %fd112, %fd113, %fd4;
mov.f64 %fd115, 0dBC7ABC9E3B39803F;
fma.rn.f64 %fd116, %fd112, %fd115, %fd114;
mov.f64 %fd117, 0d3E928A27E30F5561;
mov.f64 %fd118, 0d3E5AE6449C0686C0;
fma.rn.f64 %fd119, %fd118, %fd116, %fd117;
mov.f64 %fd120, 0d3EC71DE8E6486D6B;
fma.rn.f64 %fd121, %fd119, %fd116, %fd120;
mov.f64 %fd122, 0d3EFA019A6B2464C5;
fma.rn.f64 %fd123, %fd121, %fd116, %fd122;
mov.f64 %fd124, 0d3F2A01A0171064A5;
fma.rn.f64 %fd125, %fd123, %fd116, %fd124;
mov.f64 %fd126, 0d3F56C16C17F29C8D;
fma.rn.f64 %fd127, %fd125, %fd116, %fd126;
mov.f64 %fd128, 0d3F8111111111A24E;
fma.rn.f64 %fd129, %fd127, %fd116, %fd128;
mov.f64 %fd130, 0d3FA555555555211D;
fma.rn.f64 %fd131, %fd129, %fd116, %fd130;
mov.f64 %fd132, 0d3FC5555555555530;
fma.rn.f64 %fd133, %fd131, %fd116, %fd132;
mov.f64 %fd134, 0d3FE0000000000005;
fma.rn.f64 %fd135, %fd133, %fd116, %fd134;
fma.rn.f64 %fd137, %fd135, %fd116, %fd19;
fma.rn.f64 %fd142, %fd137, %fd116, %fd19;
abs.s32 %r26, %r14;
setp.lt.s32 %p9, %r26, 1023;
@%p9 bra BB94_8;
add.s32 %r27, %r14, 2046;
shl.b32 %r28, %r27, 19;
and.b32 %r29, %r28, -1048576;
shl.b32 %r30, %r27, 20;
sub.s32 %r38, %r30, %r29;
mov.u32 %r31, 0;
mov.b64 %fd138, {%r31, %r29};
mul.f64 %fd142, %fd142, %fd138;
bra.uni BB94_9;
BB94_8:
shl.b32 %r32, %r14, 20;
add.s32 %r38, %r32, 1072693248;
BB94_9:
mov.u32 %r33, 0;
mov.b64 %fd139, {%r33, %r38};
mul.f64 %fd143, %fd142, %fd139;
BB94_10:
abs.f64 %fd140, %fd143;
setp.eq.f64 %p10, %fd140, 0d7FF0000000000000;
@%p10 bra BB94_12;
fma.rn.f64 %fd143, %fd143, %fd5, %fd143;
BB94_12:
st.param.f64 [func_retval0+0], %fd143;
ret;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy