From 354db1cdba0cb2846910b3522f79958e3b9e6bd0 Mon Sep 17 00:00:00 2001 From: "Keith A. Lidke" Date: Wed, 8 Dec 2021 21:28:28 -0700 Subject: [PATCH] convert cuda gauss to correct pixel convention --- MATLAB/examples/Example_LocalizeData.m | 2 +- MATLAB/ptx/smi_cuda_gaussBlobROIStack.cu | 6 +- MATLAB/ptx/smi_cuda_gaussBlobROIStack.ptx | 3397 +- MATLAB/ptx/smi_cuda_gaussMLEv2.ptx | 95126 ++++++++++------ MATLAB/source/cuda/cuda_Make.m | 14 +- .../smi_cuda_gaussBlobROIStack.cu | 6 +- .../cuda/smi_cuda_gaussMLEv2/GPUgaussLib.cuh | 10 +- 7 files changed, 59886 insertions(+), 38675 deletions(-) diff --git a/MATLAB/examples/Example_LocalizeData.m b/MATLAB/examples/Example_LocalizeData.m index d25eae0d..11117375 100644 --- a/MATLAB/examples/Example_LocalizeData.m +++ b/MATLAB/examples/Example_LocalizeData.m @@ -14,7 +14,7 @@ [SMD] = LD.genLocalizations(); %Set Verbose to give color overlay output -LD.Verbose=2 +LD.Verbose=3 [SMD] = LD.genLocalizations(); diff --git a/MATLAB/ptx/smi_cuda_gaussBlobROIStack.cu b/MATLAB/ptx/smi_cuda_gaussBlobROIStack.cu index cc04914b..c4f194e4 100644 --- a/MATLAB/ptx/smi_cuda_gaussBlobROIStack.cu +++ b/MATLAB/ptx/smi_cuda_gaussBlobROIStack.cu @@ -33,7 +33,7 @@ __global__ void kernel_guassiansampleblobs( const int sz, const int Nframes, con for(jj=0;jj; - .reg .f32 %f<55>; - .reg .b32 %r<150>; - .reg .f64 %fd<407>; - .reg .b64 %rd<39>; + .reg .pred %p<132>; + .reg .f32 %f<43>; + .reg .b32 %r<159>; + .reg .f64 %fd<175>; + .reg .b64 %rd<30>; ld.param.u32 %r36, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_0]; ld.param.u32 %r37, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_1]; - ld.param.u64 %rd9, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_2]; - ld.param.u64 %rd10, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_3]; - ld.param.u64 %rd11, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_4]; - ld.param.u64 %rd12, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_5]; - ld.param.u64 %rd13, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_6]; - ld.param.u64 %rd14, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_7]; - ld.param.u64 %rd15, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_8]; - ld.param.u64 %rd16, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_9]; - mov.u32 %r38, %tid.x; - mov.u32 %r39, %ntid.x; - mov.u32 %r40, %ctaid.x; - mad.lo.s32 %r1, %r39, %r40, %r38; - setp.ge.s32 %p1, %r1, %r37; - @%p1 bra BB0_34; - - cvta.to.global.u64 %rd17, %rd9; - mul.wide.s32 %rd18, %r1, 4; - add.s64 %rd19, %rd17, %rd18; - ld.global.f32 %f1, [%rd19]; - cvta.to.global.u64 %rd20, %rd10; - add.s64 %rd21, %rd20, %rd18; - ld.global.f32 %f2, [%rd21]; - cvta.to.global.u64 %rd22, %rd11; - add.s64 %rd1, %rd22, %rd18; - cvta.to.global.u64 %rd23, %rd12; - add.s64 %rd2, %rd23, %rd18; - cvta.to.global.u64 %rd24, %rd13; - add.s64 %rd3, %rd24, %rd18; - cvta.to.global.u64 %rd25, %rd14; - add.s64 %rd4, %rd25, %rd18; - cvta.to.global.u64 %rd26, %rd15; - add.s64 %rd5, %rd26, %rd18; - setp.lt.s32 %p2, %r36, 1; - @%p2 bra BB0_34; - - ld.global.f32 %f10, [%rd1]; - ld.global.f32 %f11, [%rd3]; - ld.global.f32 %f12, [%rd4]; - ld.global.f32 %f13, [%rd5]; - mul.f32 %f14, %f13, %f13; - mov.f32 %f15, 0f3F800000; - sub.f32 %f16, %f15, %f14; - sqrt.rn.f32 %f17, %f16; - cvt.f64.f32 %fd46, %f17; - cvt.f64.f32 %fd47, %f12; - cvt.f64.f32 %fd48, %f11; - mul.f64 %fd49, %fd48, 0d401921FAFC8B007A; - mul.f64 %fd50, %fd49, %fd47; - mul.f64 %fd51, %fd50, %fd46; - cvt.f64.f32 %fd52, %f10; - div.rn.f64 %fd53, %fd52, %fd51; - cvt.rn.f32.f64 %f18, %fd53; - ld.global.f32 %f19, [%rd2]; - cvt.f64.f32 %fd1, %f19; - cvt.f64.f32 %fd2, %f18; - add.f32 %f20, %f16, %f16; - mov.f32 %f21, 0fBF800000; - div.rn.f32 %f22, %f21, %f20; - cvt.f64.f32 %fd3, %f22; - mul.f32 %f23, %f11, %f11; - cvt.f64.f32 %fd4, %f23; - mul.f32 %f24, %f12, %f12; - cvt.f64.f32 %fd5, %f24; - add.f32 %f25, %f13, %f13; - cvt.f64.f32 %fd6, %f25; - mul.f32 %f26, %f11, %f12; - cvt.f64.f32 %fd7, %f26; - cvt.f64.f32 %fd54, %f2; - add.f64 %fd55, %fd54, 0dBFE0000000000000; - mul.f64 %fd56, %fd55, %fd55; - div.rn.f64 %fd8, %fd56, %fd5; - mov.u32 %r41, 0; - mov.u32 %r145, %r41; - -BB0_3: - cvt.rn.f32.s32 %f27, %r145; - sub.f32 %f28, %f1, %f27; - cvt.f64.f32 %fd57, %f28; - add.f64 %fd58, %fd57, 0dBFE0000000000000; - mul.f64 %fd59, %fd58, %fd58; - div.rn.f64 %fd9, %fd59, %fd4; - mul.f64 %fd10, %fd6, %fd58; - mad.lo.s32 %r50, %r1, %r36, %r145; - mul.lo.s32 %r3, %r50, %r36; - and.b32 %r45, %r36, 3; - setp.eq.s32 %p3, %r45, 0; - mov.u32 %r149, %r41; - @%p3 bra BB0_18; - - setp.eq.s32 %p4, %r45, 1; - mov.u32 %r147, %r41; - @%p4 bra BB0_14; - - setp.eq.s32 %p5, %r45, 2; - mov.u32 %r146, %r41; - @%p5 bra BB0_10; - - add.f64 %fd60, %fd9, %fd8; - mul.f64 %fd63, %fd10, %fd55; - div.rn.f64 %fd64, %fd63, %fd7; - sub.f64 %fd65, %fd60, %fd64; - mul.f64 %fd11, %fd3, %fd65; - mov.f64 %fd66, 0d4338000000000000; - mov.f64 %fd67, 0d3FF71547652B82FE; - fma.rn.f64 %fd68, %fd11, %fd67, %fd66; - { - .reg .b32 %temp; - mov.b64 {%r4, %temp}, %fd68; - } - mov.f64 %fd69, 0dC338000000000000; - add.rn.f64 %fd70, %fd68, %fd69; - mov.f64 %fd71, 0dBFE62E42FEFA39EF; - fma.rn.f64 %fd72, %fd70, %fd71, %fd11; - mov.f64 %fd73, 0dBC7ABC9E3B39803F; - fma.rn.f64 %fd74, %fd70, %fd73, %fd72; - mov.f64 %fd75, 0d3E928AF3FCA213EA; - mov.f64 %fd76, 0d3E5ADE1569CE2BDF; - fma.rn.f64 %fd77, %fd76, %fd74, %fd75; - mov.f64 %fd78, 0d3EC71DEE62401315; - fma.rn.f64 %fd79, %fd77, %fd74, %fd78; - mov.f64 %fd80, 0d3EFA01997C89EB71; - fma.rn.f64 %fd81, %fd79, %fd74, %fd80; - mov.f64 %fd82, 0d3F2A01A014761F65; - fma.rn.f64 %fd83, %fd81, %fd74, %fd82; - mov.f64 %fd84, 0d3F56C16C1852B7AF; - fma.rn.f64 %fd85, %fd83, %fd74, %fd84; - mov.f64 %fd86, 0d3F81111111122322; - fma.rn.f64 %fd87, %fd85, %fd74, %fd86; - mov.f64 %fd88, 0d3FA55555555502A1; - fma.rn.f64 %fd89, %fd87, %fd74, %fd88; - mov.f64 %fd90, 0d3FC5555555555511; - fma.rn.f64 %fd91, %fd89, %fd74, %fd90; - mov.f64 %fd92, 0d3FE000000000000B; - fma.rn.f64 %fd93, %fd91, %fd74, %fd92; - mov.f64 %fd94, 0d3FF0000000000000; - fma.rn.f64 %fd95, %fd93, %fd74, %fd94; - fma.rn.f64 %fd96, %fd95, %fd74, %fd94; - { - .reg .b32 %temp; - mov.b64 {%r5, %temp}, %fd96; - } - { - .reg .b32 %temp; - mov.b64 {%temp, %r6}, %fd96; - } - shl.b32 %r51, %r4, 20; - add.s32 %r52, %r6, %r51; - mov.b64 %fd400, {%r5, %r52}; - { - .reg .b32 %temp; - mov.b64 {%temp, %r53}, %fd11; - } - mov.b32 %f29, %r53; - abs.f32 %f3, %f29; - setp.lt.f32 %p6, %f3, 0f4086232B; - @%p6 bra BB0_9; - - setp.lt.f64 %p7, %fd11, 0d0000000000000000; - add.f64 %fd97, %fd11, 0d7FF0000000000000; - selp.f64 %fd400, 0d0000000000000000, %fd97, %p7; - setp.geu.f32 %p8, %f3, 0f40874800; - @%p8 bra BB0_9; - - shr.u32 %r54, %r4, 31; - add.s32 %r55, %r4, %r54; - shr.s32 %r56, %r55, 1; - shl.b32 %r57, %r56, 20; - add.s32 %r58, %r57, %r6; - mov.b64 %fd98, {%r5, %r58}; - sub.s32 %r59, %r4, %r56; - shl.b32 %r60, %r59, 20; - add.s32 %r61, %r60, 1072693248; - mov.u32 %r62, 0; - mov.b64 %fd99, {%r62, %r61}; - mul.f64 %fd400, %fd98, %fd99; - -BB0_9: - fma.rn.f64 %fd100, %fd2, %fd400, %fd1; - cvt.rn.f32.f64 %f30, %fd100; - cvta.to.global.u64 %rd27, %rd16; - mul.wide.s32 %rd28, %r3, 4; - add.s64 %rd29, %rd27, %rd28; - st.global.f32 [%rd29], %f30; - mov.u32 %r146, 1; - -BB0_10: - cvt.rn.f32.s32 %f31, %r146; - sub.f32 %f32, %f2, %f31; - cvt.f64.f32 %fd101, %f32; - add.f64 %fd102, %fd101, 0dBFE0000000000000; - mul.f64 %fd103, %fd102, %fd102; - div.rn.f64 %fd104, %fd103, %fd5; - add.f64 %fd105, %fd9, %fd104; - mul.f64 %fd106, %fd10, %fd102; - div.rn.f64 %fd107, %fd106, %fd7; - sub.f64 %fd108, %fd105, %fd107; - mul.f64 %fd16, %fd3, %fd108; + ld.param.u64 %rd5, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_2]; + ld.param.u64 %rd6, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_3]; + ld.param.u64 %rd7, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_4]; + ld.param.u64 %rd8, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_5]; + ld.param.u64 %rd9, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_6]; + ld.param.u64 %rd10, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_7]; + ld.param.u64 %rd11, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_8]; + ld.param.u64 %rd12, [_Z26kernel_guassiansampleblobsiiPKfS0_S0_S0_S0_S0_S0_Pf_param_9]; + mov.u32 %r38, %ntid.x; + mov.u32 %r39, %ctaid.x; + mov.u32 %r40, %tid.x; + mad.lo.s32 %r1, %r39, %r38, %r40; + setp.ge.s32 %p9, %r1, %r37; + @%p9 bra $L__BB0_71; + + cvta.to.global.u64 %rd13, %rd5; + mul.wide.s32 %rd14, %r1, 4; + add.s64 %rd15, %rd13, %rd14; + ld.global.f32 %f1, [%rd15]; + cvta.to.global.u64 %rd16, %rd6; + add.s64 %rd17, %rd16, %rd14; + ld.global.f32 %f2, [%rd17]; + cvta.to.global.u64 %rd18, %rd7; + add.s64 %rd19, %rd18, %rd14; + cvta.to.global.u64 %rd20, %rd8; + add.s64 %rd21, %rd20, %rd14; + ld.global.f32 %f3, [%rd21]; + cvta.to.global.u64 %rd22, %rd9; + add.s64 %rd23, %rd22, %rd14; + cvta.to.global.u64 %rd24, %rd10; + add.s64 %rd25, %rd24, %rd14; + cvta.to.global.u64 %rd26, %rd11; + add.s64 %rd27, %rd26, %rd14; + ld.global.f32 %f4, [%rd19]; + ld.global.f32 %f5, [%rd23]; + cvt.f64.f32 %fd1, %f5; + ld.global.f32 %f6, [%rd25]; + cvt.f64.f32 %fd2, %f6; + ld.global.f32 %f7, [%rd27]; + cvt.f64.f32 %fd3, %f7; + { + .reg .b32 %temp; + mov.b64 {%temp, %r2}, %fd3; + } + mov.f64 %fd65, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r3}, %fd65; + } + and.b32 %r4, %r3, 2146435072; + setp.eq.s32 %p10, %r4, 1062207488; + abs.f64 %fd4, %fd3; + { // callseq 0, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd4; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0 + ); + ld.param.f64 %fd160, [retval0+0]; + } // callseq 0 + setp.lt.s32 %p11, %r2, 0; + and.pred %p1, %p11, %p10; + not.pred %p12, %p1; + @%p12 bra $L__BB0_3; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r41}, %fd160; + } + xor.b32 %r42, %r41, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r43, %temp}, %fd160; + } + mov.b64 %fd160, {%r43, %r42}; + +$L__BB0_3: + setp.eq.f32 %p13, %f7, 0f00000000; + @%p13 bra $L__BB0_7; + bra.uni $L__BB0_4; + +$L__BB0_7: + selp.b32 %r44, %r2, 0, %p10; + mov.u32 %r45, 0; + or.b32 %r46, %r44, 2146435072; + setp.lt.s32 %p17, %r3, 0; + selp.b32 %r47, %r46, %r44, %p17; + mov.b64 %fd160, {%r45, %r47}; + bra.uni $L__BB0_8; + +$L__BB0_4: + setp.gt.s32 %p14, %r2, -1; + @%p14 bra $L__BB0_8; + + cvt.rzi.f64.f64 %fd67, %fd65; + setp.eq.f64 %p15, %fd67, 0d4000000000000000; + @%p15 bra $L__BB0_8; + + mov.f64 %fd160, 0dFFF8000000000000; + +$L__BB0_8: + add.f64 %fd10, %fd3, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r48}, %fd10; + } + and.b32 %r49, %r48, 2146435072; + setp.ne.s32 %p18, %r49, 2146435072; + mov.f64 %fd161, %fd160; + @%p18 bra $L__BB0_14; + + setp.gtu.f64 %p19, %fd4, 0d7FF0000000000000; + mov.f64 %fd161, %fd10; + @%p19 bra $L__BB0_14; + + { + .reg .b32 %temp; + mov.b64 {%r50, %temp}, %fd65; + } + and.b32 %r5, %r3, 2147483647; + setp.eq.s32 %p20, %r5, 2146435072; + setp.eq.s32 %p21, %r50, 0; + and.pred %p22, %p20, %p21; + @%p22 bra $L__BB0_13; + bra.uni $L__BB0_11; + +$L__BB0_13: + setp.gt.f64 %p29, %fd4, 0d3FF0000000000000; + selp.b32 %r57, 2146435072, 0, %p29; + mov.u32 %r58, 0; + xor.b32 %r59, %r57, 2146435072; + setp.lt.s32 %p30, %r3, 0; + selp.b32 %r60, %r59, %r57, %p30; + setp.eq.f32 %p31, %f7, 0fBF800000; + selp.b32 %r61, 1072693248, %r60, %p31; + mov.b64 %fd161, {%r58, %r61}; + bra.uni $L__BB0_14; + +$L__BB0_11: + { + .reg .b32 %temp; + mov.b64 {%r51, %temp}, %fd3; + } + and.b32 %r52, %r2, 2147483647; + setp.ne.s32 %p23, %r52, 2146435072; + setp.ne.s32 %p24, %r51, 0; + or.pred %p25, %p23, %p24; + mov.f64 %fd161, %fd160; + @%p25 bra $L__BB0_14; + + setp.gt.s32 %p26, %r3, -1; + selp.b32 %r53, 2146435072, 0, %p26; + mov.u32 %r54, 0; + setp.ne.s32 %p27, %r5, 1071644672; + and.pred %p28, %p27, %p1; + or.b32 %r55, %r53, -2147483648; + selp.b32 %r56, %r55, %r53, %p28; + mov.b64 %fd161, {%r54, %r56}; + +$L__BB0_14: + setp.lt.s32 %p32, %r36, 1; + @%p32 bra $L__BB0_71; + + cvta.to.global.u64 %rd1, %rd12; + setp.eq.f32 %p33, %f7, 0f3F800000; + mov.f64 %fd70, 0d3FF0000000000000; + sub.f64 %fd71, %fd70, %fd161; + selp.f64 %fd72, 0d0000000000000000, %fd71, %p33; + cvt.f64.f32 %fd14, %f3; + cvt.f64.f32 %fd73, %f4; + mul.f64 %fd74, %fd1, 0d401921FAFC8B007A; + mul.f64 %fd75, %fd74, %fd2; + sqrt.rn.f64 %fd76, %fd72; + mul.f64 %fd77, %fd75, %fd76; + div.rn.f64 %fd78, %fd73, %fd77; + cvt.rn.f32.f64 %f14, %fd78; + cvt.f64.f32 %fd15, %f14; + add.f64 %fd79, %fd72, %fd72; + mov.f64 %fd80, 0dBFF0000000000000; + div.rn.f64 %fd16, %fd80, %fd79; + mul.f32 %f9, %f5, %f6; + abs.f64 %fd81, %fd1; + { // callseq 1, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd81; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0 + ); + ld.param.f64 %fd17, [retval0+0]; + } // callseq 1 + mov.u32 %r157, 0; + { + .reg .b32 %temp; + mov.b64 {%temp, %r6}, %fd1; + } + setp.lt.s32 %p35, %r6, 0; + and.pred %p2, %p35, %p10; + selp.b32 %r63, %r6, 0, %p10; + or.b32 %r64, %r63, 2146435072; + setp.lt.s32 %p36, %r3, 0; + selp.b32 %r7, %r64, %r63, %p36; + add.f64 %fd18, %fd1, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r65}, %fd18; + } + and.b32 %r8, %r65, 2146435072; + setp.ne.s32 %p37, %r8, 2146435072; + setp.gt.s32 %p38, %r3, -1; + selp.b32 %r9, 2146435072, 0, %p38; + and.b32 %r10, %r3, 2147483647; + setp.ne.s32 %p39, %r10, 1071644672; + or.b32 %r11, %r9, -2147483648; + setp.gtu.f64 %p40, %fd81, 0d7FF0000000000000; + setp.gt.f64 %p41, %fd81, 0d3FF0000000000000; + selp.b32 %r66, 2146435072, 0, %p41; + xor.b32 %r67, %r66, 2146435072; + selp.b32 %r68, %r67, %r66, %p36; + setp.eq.f32 %p42, %f5, 0fBF800000; + selp.b32 %r12, 1072693248, %r68, %p42; + and.b32 %r13, %r6, 2147483647; + and.pred %p43, %p39, %p2; + selp.b32 %r14, %r11, %r9, %p43; + abs.f64 %fd82, %fd2; + { // callseq 2, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd82; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0 + ); + ld.param.f64 %fd19, [retval0+0]; + } // callseq 2 + { + .reg .b32 %temp; + mov.b64 {%temp, %r15}, %fd2; + } + setp.lt.s32 %p44, %r15, 0; + and.pred %p3, %p44, %p10; + selp.b32 %r69, %r15, 0, %p10; + or.b32 %r70, %r69, 2146435072; + selp.b32 %r16, %r70, %r69, %p36; + add.f64 %fd20, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r71}, %fd20; + } + and.b32 %r17, %r71, 2146435072; + setp.ne.s32 %p45, %r17, 2146435072; + setp.gtu.f64 %p46, %fd82, 0d7FF0000000000000; + setp.gt.f64 %p47, %fd82, 0d3FF0000000000000; + selp.b32 %r72, 2146435072, 0, %p47; + xor.b32 %r73, %r72, 2146435072; + selp.b32 %r74, %r73, %r72, %p36; + setp.eq.f32 %p48, %f6, 0fBF800000; + selp.b32 %r18, 1072693248, %r74, %p48; + and.b32 %r19, %r15, 2147483647; + and.pred %p49, %p39, %p3; + selp.b32 %r20, %r11, %r9, %p49; + or.pred %p4, %p37, %p40; + or.pred %p5, %p45, %p46; + mul.lo.s32 %r21, %r1, %r36; + +$L__BB0_16: + add.f32 %f21, %f7, %f7; + mov.u32 %r158, 0; + cvt.rn.f32.s32 %f15, %r157; + sub.f32 %f10, %f1, %f15; + cvt.f64.f32 %fd21, %f10; + abs.f64 %fd83, %fd21; + { // callseq 3, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd83; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0 + ); + ld.param.f64 %fd22, [retval0+0]; + } // callseq 3 + add.s32 %r82, %r157, %r21; + mul.lo.s32 %r83, %r82, %r36; + mul.wide.s32 %rd28, %r83, 4; + add.s64 %rd29, %rd1, %rd28; + +$L__BB0_17: + cvt.rn.f32.s32 %f23, %r157; + sub.f32 %f22, %f1, %f23; + cvt.f64.f32 %fd146, %f22; + { + .reg .b32 %temp; + mov.b64 {%temp, %r141}, %fd146; + } + setp.lt.s32 %p123, %r141, 0; + and.pred %p122, %p123, %p10; + not.pred %p59, %p122; + mov.f64 %fd163, %fd22; + @%p59 bra $L__BB0_19; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r84}, %fd22; + } + xor.b32 %r85, %r84, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r86, %temp}, %fd22; + } + mov.b64 %fd163, {%r86, %r85}; + +$L__BB0_19: + cvt.rn.f32.s32 %f25, %r157; + sub.f32 %f24, %f1, %f25; + setp.eq.f32 %p60, %f24, 0f00000000; + @%p60 bra $L__BB0_23; + bra.uni $L__BB0_20; + +$L__BB0_23: + cvt.rn.f32.s32 %f42, %r157; + sub.f32 %f41, %f1, %f42; + cvt.f64.f32 %fd157, %f41; + { + .reg .b32 %temp; + mov.b64 {%temp, %r156}, %fd157; + } + selp.b32 %r155, %r156, 0, %p10; + or.b32 %r154, %r155, 2146435072; + selp.b32 %r153, %r154, %r155, %p36; + mov.u32 %r87, 0; + mov.b64 %fd163, {%r87, %r153}; + bra.uni $L__BB0_24; + +$L__BB0_20: + cvt.rn.f32.s32 %f27, %r157; + sub.f32 %f26, %f1, %f27; + cvt.f64.f32 %fd147, %f26; + { + .reg .b32 %temp; + mov.b64 {%temp, %r142}, %fd147; + } + setp.gt.s32 %p61, %r142, -1; + @%p61 bra $L__BB0_24; + + cvt.rzi.f64.f64 %fd85, %fd65; + setp.eq.f64 %p62, %fd85, 0d4000000000000000; + @%p62 bra $L__BB0_24; + + mov.f64 %fd163, 0dFFF8000000000000; + +$L__BB0_24: + cvt.rn.f32.s32 %f29, %r157; + sub.f32 %f28, %f1, %f29; + cvt.f64.f32 %fd150, %f28; + add.f64 %fd149, %fd150, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r144}, %fd149; + } + and.b32 %r143, %r144, 2146435072; + setp.ne.s32 %p126, %r143, 2146435072; + abs.f64 %fd148, %fd150; + setp.gtu.f64 %p125, %fd148, 0d7FF0000000000000; + or.pred %p124, %p126, %p125; + selp.f64 %fd164, %fd163, %fd149, %p126; + @%p124 bra $L__BB0_29; + + setp.eq.s32 %p64, %r10, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r88, %temp}, %fd65; + } + setp.eq.s32 %p65, %r88, 0; + and.pred %p66, %p64, %p65; + @%p66 bra $L__BB0_28; + bra.uni $L__BB0_26; + +$L__BB0_28: + cvt.rn.f32.s32 %f35, %r157; + sub.f32 %f34, %f1, %f35; + cvt.f64.f32 %fd155, %f34; + abs.f64 %fd154, %fd155; + setp.gt.f64 %p131, %fd154, 0d3FF0000000000000; + selp.b32 %r152, 2146435072, 0, %p131; + xor.b32 %r151, %r152, 2146435072; + setp.eq.f32 %p130, %f34, 0fBF800000; + selp.b32 %r150, %r151, %r152, %p36; + selp.b32 %r149, 1072693248, %r150, %p130; + mov.u32 %r91, 0; + mov.b64 %fd164, {%r91, %r149}; + bra.uni $L__BB0_29; + +$L__BB0_26: + cvt.f64.f32 %fd152, %f10; + cvt.rn.f32.s32 %f31, %r157; + sub.f32 %f30, %f1, %f31; + cvt.f64.f32 %fd151, %f30; + { + .reg .b32 %temp; + mov.b64 {%temp, %r146}, %fd151; + } + and.b32 %r145, %r146, 2147483647; + setp.ne.s32 %p67, %r145, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r89, %temp}, %fd151; + } + setp.ne.s32 %p68, %r89, 0; + or.pred %p69, %p67, %p68; + mov.f64 %fd164, %fd163; + @%p69 bra $L__BB0_29; + + cvt.rn.f32.s32 %f33, %r157; + sub.f32 %f32, %f1, %f33; + cvt.f64.f32 %fd153, %f32; + { + .reg .b32 %temp; + mov.b64 {%temp, %r148}, %fd153; + } + setp.lt.s32 %p129, %r148, 0; + and.pred %p128, %p129, %p10; + and.pred %p127, %p39, %p128; + selp.b32 %r147, %r11, %r9, %p127; + mov.u32 %r90, 0; + mov.b64 %fd164, {%r90, %r147}; + +$L__BB0_29: + not.pred %p70, %p2; + mov.f64 %fd166, %fd17; + @%p70 bra $L__BB0_31; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r92}, %fd17; + } + xor.b32 %r93, %r92, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r94, %temp}, %fd17; + } + mov.b64 %fd166, {%r94, %r93}; + +$L__BB0_31: + setp.eq.f32 %p71, %f5, 0f00000000; + @%p71 bra $L__BB0_35; + bra.uni $L__BB0_32; + +$L__BB0_35: + mov.u32 %r95, 0; + mov.b64 %fd166, {%r95, %r7}; + bra.uni $L__BB0_36; + +$L__BB0_32: + setp.gt.s32 %p72, %r6, -1; + @%p72 bra $L__BB0_36; + + cvt.rzi.f64.f64 %fd89, %fd65; + setp.eq.f64 %p73, %fd89, 0d4000000000000000; + @%p73 bra $L__BB0_36; + + mov.f64 %fd166, 0dFFF8000000000000; + +$L__BB0_36: + selp.f64 %fd167, %fd166, %fd18, %p37; + @%p4 bra $L__BB0_41; + + setp.eq.s32 %p75, %r10, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r96, %temp}, %fd65; + } + setp.eq.s32 %p76, %r96, 0; + and.pred %p77, %p75, %p76; + @%p77 bra $L__BB0_40; + bra.uni $L__BB0_38; + +$L__BB0_40: + mov.u32 %r99, 0; + mov.b64 %fd167, {%r99, %r12}; + bra.uni $L__BB0_41; + +$L__BB0_38: + setp.ne.s32 %p78, %r13, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r97, %temp}, %fd1; + } + setp.ne.s32 %p79, %r97, 0; + or.pred %p80, %p78, %p79; + mov.f64 %fd167, %fd166; + @%p80 bra $L__BB0_41; + + mov.u32 %r98, 0; + mov.b64 %fd167, {%r98, %r14}; + +$L__BB0_41: + cvt.rn.f32.s32 %f37, %r157; + sub.f32 %f36, %f1, %f37; + setp.eq.f32 %p81, %f5, 0f3F800000; + selp.f64 %fd92, 0d3FF0000000000000, %fd167, %p81; + setp.eq.f32 %p82, %f36, 0f3F800000; + selp.f64 %fd93, 0d3FF0000000000000, %fd164, %p82; + div.rn.f64 %fd40, %fd93, %fd92; + cvt.rn.f32.s32 %f16, %r158; + sub.f32 %f12, %f2, %f16; + cvt.f64.f32 %fd41, %f12; + { + .reg .b32 %temp; + mov.b64 {%temp, %r30}, %fd41; + } + abs.f64 %fd42, %fd41; + { // callseq 4, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd42; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0 + ); + ld.param.f64 %fd169, [retval0+0]; + } // callseq 4 + setp.lt.s32 %p83, %r30, 0; + and.pred %p8, %p83, %p10; + not.pred %p85, %p8; + @%p85 bra $L__BB0_43; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r100}, %fd169; + } + xor.b32 %r101, %r100, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r102, %temp}, %fd169; + } + mov.b64 %fd169, {%r102, %r101}; + +$L__BB0_43: + setp.eq.f32 %p86, %f12, 0f00000000; + @%p86 bra $L__BB0_47; + bra.uni $L__BB0_44; + +$L__BB0_47: + mov.u32 %r103, 0; + selp.b32 %r104, %r30, 0, %p10; + or.b32 %r105, %r104, 2146435072; + selp.b32 %r106, %r105, %r104, %p36; + mov.b64 %fd169, {%r103, %r106}; + bra.uni $L__BB0_48; + +$L__BB0_44: + setp.gt.s32 %p87, %r30, -1; + @%p87 bra $L__BB0_48; + + cvt.rzi.f64.f64 %fd95, %fd65; + setp.eq.f64 %p88, %fd95, 0d4000000000000000; + @%p88 bra $L__BB0_48; + + mov.f64 %fd169, 0dFFF8000000000000; + +$L__BB0_48: + add.f64 %fd48, %fd41, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r107}, %fd48; + } + and.b32 %r108, %r107, 2146435072; + setp.ne.s32 %p91, %r108, 2146435072; + mov.f64 %fd170, %fd169; + @%p91 bra $L__BB0_54; + + setp.gtu.f64 %p92, %fd42, 0d7FF0000000000000; + mov.f64 %fd170, %fd48; + @%p92 bra $L__BB0_54; + + setp.eq.s32 %p93, %r10, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r109, %temp}, %fd65; + } + setp.eq.s32 %p94, %r109, 0; + and.pred %p95, %p93, %p94; + @%p95 bra $L__BB0_53; + bra.uni $L__BB0_51; + +$L__BB0_53: + mov.u32 %r114, 0; + setp.gt.f64 %p102, %fd42, 0d3FF0000000000000; + selp.b32 %r115, 2146435072, 0, %p102; + xor.b32 %r116, %r115, 2146435072; + selp.b32 %r117, %r116, %r115, %p36; + setp.eq.f32 %p103, %f12, 0fBF800000; + selp.b32 %r118, 1072693248, %r117, %p103; + mov.b64 %fd170, {%r114, %r118}; + bra.uni $L__BB0_54; + +$L__BB0_51: + { + .reg .b32 %temp; + mov.b64 {%r110, %temp}, %fd41; + } + and.b32 %r111, %r30, 2147483647; + setp.ne.s32 %p96, %r111, 2146435072; + setp.ne.s32 %p97, %r110, 0; + or.pred %p98, %p96, %p97; + mov.f64 %fd170, %fd169; + @%p98 bra $L__BB0_54; + + and.pred %p100, %p39, %p8; + selp.b32 %r112, %r11, %r9, %p100; + mov.u32 %r113, 0; + mov.b64 %fd170, {%r113, %r112}; + +$L__BB0_54: + not.pred %p104, %p3; + mov.f64 %fd172, %fd19; + @%p104 bra $L__BB0_56; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r119}, %fd19; + } + xor.b32 %r120, %r119, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r121, %temp}, %fd19; + } + mov.b64 %fd172, {%r121, %r120}; + +$L__BB0_56: + setp.eq.f32 %p105, %f6, 0f00000000; + @%p105 bra $L__BB0_60; + bra.uni $L__BB0_57; + +$L__BB0_60: + mov.u32 %r122, 0; + mov.b64 %fd172, {%r122, %r16}; + bra.uni $L__BB0_61; + +$L__BB0_57: + setp.gt.s32 %p106, %r15, -1; + @%p106 bra $L__BB0_61; + + cvt.rzi.f64.f64 %fd99, %fd65; + setp.eq.f64 %p107, %fd99, 0d4000000000000000; + @%p107 bra $L__BB0_61; + + mov.f64 %fd172, 0dFFF8000000000000; + +$L__BB0_61: + selp.f64 %fd173, %fd172, %fd20, %p45; + @%p5 bra $L__BB0_66; + + setp.eq.s32 %p109, %r10, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r123, %temp}, %fd65; + } + setp.eq.s32 %p110, %r123, 0; + and.pred %p111, %p109, %p110; + @%p111 bra $L__BB0_65; + bra.uni $L__BB0_63; + +$L__BB0_65: + mov.u32 %r126, 0; + mov.b64 %fd173, {%r126, %r18}; + bra.uni $L__BB0_66; + +$L__BB0_63: + setp.ne.s32 %p112, %r19, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r124, %temp}, %fd2; + } + setp.ne.s32 %p113, %r124, 0; + or.pred %p114, %p112, %p113; + mov.f64 %fd173, %fd172; + @%p114 bra $L__BB0_66; + + mov.u32 %r125, 0; + mov.b64 %fd173, {%r125, %r20}; + +$L__BB0_66: + cvt.rn.f32.s32 %f40, %r157; + sub.f32 %f39, %f1, %f40; + mul.f32 %f38, %f21, %f39; + mov.f64 %fd156, 0d3FF0000000000000; + setp.eq.f32 %p115, %f6, 0f3F800000; + selp.f64 %fd102, 0d3FF0000000000000, %fd173, %p115; + setp.eq.f32 %p116, %f12, 0f3F800000; + selp.f64 %fd104, 0d3FF0000000000000, %fd170, %p116; + div.rn.f64 %fd105, %fd104, %fd102; + add.f64 %fd106, %fd40, %fd105; + mul.f32 %f17, %f38, %f12; + div.rn.f32 %f18, %f17, %f9; + cvt.f64.f32 %fd107, %f18; + sub.f64 %fd108, %fd106, %fd107; + mul.f64 %fd60, %fd16, %fd108; mov.f64 %fd109, 0d4338000000000000; mov.f64 %fd110, 0d3FF71547652B82FE; - fma.rn.f64 %fd111, %fd16, %fd110, %fd109; + fma.rn.f64 %fd111, %fd60, %fd110, %fd109; { .reg .b32 %temp; - mov.b64 {%r8, %temp}, %fd111; + mov.b64 {%r31, %temp}, %fd111; } mov.f64 %fd112, 0dC338000000000000; add.rn.f64 %fd113, %fd111, %fd112; mov.f64 %fd114, 0dBFE62E42FEFA39EF; - fma.rn.f64 %fd115, %fd113, %fd114, %fd16; + fma.rn.f64 %fd115, %fd113, %fd114, %fd60; mov.f64 %fd116, 0dBC7ABC9E3B39803F; fma.rn.f64 %fd117, %fd113, %fd116, %fd115; mov.f64 %fd118, 0d3E928AF3FCA213EA; @@ -263,536 +783,72 @@ BB0_10: fma.rn.f64 %fd134, %fd132, %fd117, %fd133; mov.f64 %fd135, 0d3FE000000000000B; fma.rn.f64 %fd136, %fd134, %fd117, %fd135; - mov.f64 %fd137, 0d3FF0000000000000; - fma.rn.f64 %fd138, %fd136, %fd117, %fd137; - fma.rn.f64 %fd139, %fd138, %fd117, %fd137; - { - .reg .b32 %temp; - mov.b64 {%r9, %temp}, %fd139; - } - { - .reg .b32 %temp; - mov.b64 {%temp, %r10}, %fd139; - } - shl.b32 %r64, %r8, 20; - add.s32 %r65, %r10, %r64; - mov.b64 %fd401, {%r9, %r65}; - { - .reg .b32 %temp; - mov.b64 {%temp, %r66}, %fd16; - } - mov.b32 %f33, %r66; - abs.f32 %f4, %f33; - setp.lt.f32 %p9, %f4, 0f4086232B; - @%p9 bra BB0_13; - - setp.lt.f64 %p10, %fd16, 0d0000000000000000; - add.f64 %fd140, %fd16, 0d7FF0000000000000; - selp.f64 %fd401, 0d0000000000000000, %fd140, %p10; - setp.geu.f32 %p11, %f4, 0f40874800; - @%p11 bra BB0_13; - - shr.u32 %r67, %r8, 31; - add.s32 %r68, %r8, %r67; - shr.s32 %r69, %r68, 1; - shl.b32 %r70, %r69, 20; - add.s32 %r71, %r70, %r10; - mov.b64 %fd141, {%r9, %r71}; - sub.s32 %r72, %r8, %r69; - shl.b32 %r73, %r72, 20; - add.s32 %r74, %r73, 1072693248; - mov.u32 %r75, 0; - mov.b64 %fd142, {%r75, %r74}; - mul.f64 %fd401, %fd141, %fd142; - -BB0_13: - fma.rn.f64 %fd143, %fd2, %fd401, %fd1; - cvt.rn.f32.f64 %f34, %fd143; - add.s32 %r76, %r146, %r3; - cvta.to.global.u64 %rd30, %rd16; - mul.wide.s32 %rd31, %r76, 4; - add.s64 %rd32, %rd30, %rd31; - st.global.f32 [%rd32], %f34; - add.s32 %r147, %r146, 1; - -BB0_14: - cvt.rn.f32.s32 %f35, %r147; - sub.f32 %f36, %f2, %f35; - cvt.f64.f32 %fd144, %f36; - add.f64 %fd145, %fd144, 0dBFE0000000000000; - mul.f64 %fd146, %fd145, %fd145; - div.rn.f64 %fd147, %fd146, %fd5; - add.f64 %fd148, %fd9, %fd147; - mul.f64 %fd149, %fd10, %fd145; - div.rn.f64 %fd150, %fd149, %fd7; - sub.f64 %fd151, %fd148, %fd150; - mul.f64 %fd21, %fd3, %fd151; - mov.f64 %fd152, 0d4338000000000000; - mov.f64 %fd153, 0d3FF71547652B82FE; - fma.rn.f64 %fd154, %fd21, %fd153, %fd152; - { - .reg .b32 %temp; - mov.b64 {%r13, %temp}, %fd154; - } - mov.f64 %fd155, 0dC338000000000000; - add.rn.f64 %fd156, %fd154, %fd155; - mov.f64 %fd157, 0dBFE62E42FEFA39EF; - fma.rn.f64 %fd158, %fd156, %fd157, %fd21; - mov.f64 %fd159, 0dBC7ABC9E3B39803F; - fma.rn.f64 %fd160, %fd156, %fd159, %fd158; - mov.f64 %fd161, 0d3E928AF3FCA213EA; - mov.f64 %fd162, 0d3E5ADE1569CE2BDF; - fma.rn.f64 %fd163, %fd162, %fd160, %fd161; - mov.f64 %fd164, 0d3EC71DEE62401315; - fma.rn.f64 %fd165, %fd163, %fd160, %fd164; - mov.f64 %fd166, 0d3EFA01997C89EB71; - fma.rn.f64 %fd167, %fd165, %fd160, %fd166; - mov.f64 %fd168, 0d3F2A01A014761F65; - fma.rn.f64 %fd169, %fd167, %fd160, %fd168; - mov.f64 %fd170, 0d3F56C16C1852B7AF; - fma.rn.f64 %fd171, %fd169, %fd160, %fd170; - mov.f64 %fd172, 0d3F81111111122322; - fma.rn.f64 %fd173, %fd171, %fd160, %fd172; - mov.f64 %fd174, 0d3FA55555555502A1; - fma.rn.f64 %fd175, %fd173, %fd160, %fd174; - mov.f64 %fd176, 0d3FC5555555555511; - fma.rn.f64 %fd177, %fd175, %fd160, %fd176; - mov.f64 %fd178, 0d3FE000000000000B; - fma.rn.f64 %fd179, %fd177, %fd160, %fd178; - mov.f64 %fd180, 0d3FF0000000000000; - fma.rn.f64 %fd181, %fd179, %fd160, %fd180; - fma.rn.f64 %fd182, %fd181, %fd160, %fd180; - { - .reg .b32 %temp; - mov.b64 {%r14, %temp}, %fd182; - } - { - .reg .b32 %temp; - mov.b64 {%temp, %r15}, %fd182; - } - shl.b32 %r77, %r13, 20; - add.s32 %r78, %r15, %r77; - mov.b64 %fd402, {%r14, %r78}; - { - .reg .b32 %temp; - mov.b64 {%temp, %r79}, %fd21; - } - mov.b32 %f37, %r79; - abs.f32 %f5, %f37; - setp.lt.f32 %p12, %f5, 0f4086232B; - @%p12 bra BB0_17; - - setp.lt.f64 %p13, %fd21, 0d0000000000000000; - add.f64 %fd183, %fd21, 0d7FF0000000000000; - selp.f64 %fd402, 0d0000000000000000, %fd183, %p13; - setp.geu.f32 %p14, %f5, 0f40874800; - @%p14 bra BB0_17; - - shr.u32 %r80, %r13, 31; - add.s32 %r81, %r13, %r80; - shr.s32 %r82, %r81, 1; - shl.b32 %r83, %r82, 20; - add.s32 %r84, %r83, %r15; - mov.b64 %fd184, {%r14, %r84}; - sub.s32 %r85, %r13, %r82; - shl.b32 %r86, %r85, 20; - add.s32 %r87, %r86, 1072693248; - mov.u32 %r88, 0; - mov.b64 %fd185, {%r88, %r87}; - mul.f64 %fd402, %fd184, %fd185; - -BB0_17: - fma.rn.f64 %fd186, %fd2, %fd402, %fd1; - cvt.rn.f32.f64 %f38, %fd186; - add.s32 %r89, %r147, %r3; - cvta.to.global.u64 %rd33, %rd16; - mul.wide.s32 %rd34, %r89, 4; - add.s64 %rd35, %rd33, %rd34; - st.global.f32 [%rd35], %f38; - add.s32 %r149, %r147, 1; - -BB0_18: - setp.lt.u32 %p15, %r36, 4; - @%p15 bra BB0_33; - - mad.lo.s32 %r94, %r36, %r1, %r145; - mad.lo.s32 %r95, %r36, %r94, %r149; - cvta.to.global.u64 %rd36, %rd16; - mul.wide.s32 %rd37, %r95, 4; - add.s64 %rd38, %rd36, %rd37; - -BB0_20: - mov.u64 %rd7, %rd38; - cvt.rn.f32.s32 %f39, %r149; - sub.f32 %f40, %f2, %f39; - cvt.f64.f32 %fd187, %f40; - add.f64 %fd188, %fd187, 0dBFE0000000000000; - mul.f64 %fd189, %fd188, %fd188; - div.rn.f64 %fd190, %fd189, %fd5; - add.f64 %fd191, %fd9, %fd190; - mul.f64 %fd192, %fd10, %fd188; - div.rn.f64 %fd193, %fd192, %fd7; - sub.f64 %fd194, %fd191, %fd193; - mul.f64 %fd26, %fd3, %fd194; - mov.f64 %fd195, 0d4338000000000000; - mov.f64 %fd196, 0d3FF71547652B82FE; - fma.rn.f64 %fd197, %fd26, %fd196, %fd195; - { - .reg .b32 %temp; - mov.b64 {%r19, %temp}, %fd197; - } - mov.f64 %fd198, 0dC338000000000000; - add.rn.f64 %fd199, %fd197, %fd198; - mov.f64 %fd200, 0dBFE62E42FEFA39EF; - fma.rn.f64 %fd201, %fd199, %fd200, %fd26; - mov.f64 %fd202, 0dBC7ABC9E3B39803F; - fma.rn.f64 %fd203, %fd199, %fd202, %fd201; - mov.f64 %fd204, 0d3E928AF3FCA213EA; - mov.f64 %fd205, 0d3E5ADE1569CE2BDF; - fma.rn.f64 %fd206, %fd205, %fd203, %fd204; - mov.f64 %fd207, 0d3EC71DEE62401315; - fma.rn.f64 %fd208, %fd206, %fd203, %fd207; - mov.f64 %fd209, 0d3EFA01997C89EB71; - fma.rn.f64 %fd210, %fd208, %fd203, %fd209; - mov.f64 %fd211, 0d3F2A01A014761F65; - fma.rn.f64 %fd212, %fd210, %fd203, %fd211; - mov.f64 %fd213, 0d3F56C16C1852B7AF; - fma.rn.f64 %fd214, %fd212, %fd203, %fd213; - mov.f64 %fd215, 0d3F81111111122322; - fma.rn.f64 %fd216, %fd214, %fd203, %fd215; - mov.f64 %fd217, 0d3FA55555555502A1; - fma.rn.f64 %fd218, %fd216, %fd203, %fd217; - mov.f64 %fd219, 0d3FC5555555555511; - fma.rn.f64 %fd220, %fd218, %fd203, %fd219; - mov.f64 %fd221, 0d3FE000000000000B; - fma.rn.f64 %fd222, %fd220, %fd203, %fd221; - mov.f64 %fd223, 0d3FF0000000000000; - fma.rn.f64 %fd224, %fd222, %fd203, %fd223; - fma.rn.f64 %fd225, %fd224, %fd203, %fd223; - { - .reg .b32 %temp; - mov.b64 {%r20, %temp}, %fd225; - } - { - .reg .b32 %temp; - mov.b64 {%temp, %r21}, %fd225; - } - shl.b32 %r96, %r19, 20; - add.s32 %r97, %r21, %r96; - mov.b64 %fd403, {%r20, %r97}; - { - .reg .b32 %temp; - mov.b64 {%temp, %r98}, %fd26; - } - mov.b32 %f41, %r98; - abs.f32 %f6, %f41; - setp.lt.f32 %p16, %f6, 0f4086232B; - @%p16 bra BB0_23; - - setp.lt.f64 %p17, %fd26, 0d0000000000000000; - add.f64 %fd226, %fd26, 0d7FF0000000000000; - selp.f64 %fd403, 0d0000000000000000, %fd226, %p17; - setp.geu.f32 %p18, %f6, 0f40874800; - @%p18 bra BB0_23; - - mov.f64 %fd399, 0d4338000000000000; - mov.f64 %fd398, 0d3FF71547652B82FE; - fma.rn.f64 %fd397, %fd26, %fd398, %fd399; - { - .reg .b32 %temp; - mov.b64 {%r144, %temp}, %fd397; - } - shr.u32 %r99, %r144, 31; - add.s32 %r100, %r144, %r99; - shr.s32 %r101, %r100, 1; - shl.b32 %r102, %r101, 20; - add.s32 %r103, %r102, %r21; - mov.b64 %fd227, {%r20, %r103}; - sub.s32 %r104, %r144, %r101; - shl.b32 %r105, %r104, 20; - add.s32 %r106, %r105, 1072693248; - mov.u32 %r107, 0; - mov.b64 %fd228, {%r107, %r106}; - mul.f64 %fd403, %fd227, %fd228; - -BB0_23: - mov.f64 %fd394, 0d3FF0000000000000; - mov.f64 %fd388, 0d3FE000000000000B; - mov.f64 %fd387, 0d3FC5555555555511; - mov.f64 %fd386, 0d3FA55555555502A1; - mov.f64 %fd385, 0d3F81111111122322; - mov.f64 %fd384, 0d3F56C16C1852B7AF; - fma.rn.f64 %fd229, %fd2, %fd403, %fd1; - cvt.rn.f32.f64 %f42, %fd229; - st.global.f32 [%rd7], %f42; - add.s32 %r22, %r149, 1; - cvt.rn.f32.s32 %f43, %r22; - sub.f32 %f44, %f2, %f43; - cvt.f64.f32 %fd230, %f44; - add.f64 %fd231, %fd230, 0dBFE0000000000000; - mul.f64 %fd232, %fd231, %fd231; - div.rn.f64 %fd233, %fd232, %fd5; - add.f64 %fd234, %fd9, %fd233; - mul.f64 %fd235, %fd10, %fd231; - div.rn.f64 %fd236, %fd235, %fd7; - sub.f64 %fd237, %fd234, %fd236; - mul.f64 %fd31, %fd3, %fd237; - fma.rn.f64 %fd240, %fd31, %fd196, %fd195; - { - .reg .b32 %temp; - mov.b64 {%r23, %temp}, %fd240; - } - add.rn.f64 %fd242, %fd240, %fd198; - fma.rn.f64 %fd244, %fd242, %fd200, %fd31; - fma.rn.f64 %fd246, %fd242, %fd202, %fd244; - fma.rn.f64 %fd249, %fd205, %fd246, %fd204; - fma.rn.f64 %fd251, %fd249, %fd246, %fd207; - fma.rn.f64 %fd253, %fd251, %fd246, %fd209; - fma.rn.f64 %fd255, %fd253, %fd246, %fd211; - fma.rn.f64 %fd257, %fd255, %fd246, %fd384; - fma.rn.f64 %fd259, %fd257, %fd246, %fd385; - fma.rn.f64 %fd261, %fd259, %fd246, %fd386; - fma.rn.f64 %fd263, %fd261, %fd246, %fd387; - fma.rn.f64 %fd265, %fd263, %fd246, %fd388; - fma.rn.f64 %fd267, %fd265, %fd246, %fd394; - fma.rn.f64 %fd268, %fd267, %fd246, %fd394; - { - .reg .b32 %temp; - mov.b64 {%r24, %temp}, %fd268; - } - { - .reg .b32 %temp; - mov.b64 {%temp, %r25}, %fd268; - } - shl.b32 %r108, %r23, 20; - add.s32 %r109, %r25, %r108; - mov.b64 %fd404, {%r24, %r109}; - { - .reg .b32 %temp; - mov.b64 {%temp, %r110}, %fd31; - } - mov.b32 %f45, %r110; - abs.f32 %f7, %f45; - setp.lt.f32 %p19, %f7, 0f4086232B; - @%p19 bra BB0_26; - - setp.lt.f64 %p20, %fd31, 0d0000000000000000; - add.f64 %fd269, %fd31, 0d7FF0000000000000; - selp.f64 %fd404, 0d0000000000000000, %fd269, %p20; - setp.geu.f32 %p21, %f7, 0f40874800; - @%p21 bra BB0_26; - - shr.u32 %r111, %r23, 31; - add.s32 %r112, %r23, %r111; - shr.s32 %r113, %r112, 1; - shl.b32 %r114, %r113, 20; - add.s32 %r115, %r114, %r25; - mov.b64 %fd270, {%r24, %r115}; - sub.s32 %r116, %r23, %r113; - shl.b32 %r117, %r116, 20; - add.s32 %r118, %r117, 1072693248; - mov.u32 %r119, 0; - mov.b64 %fd271, {%r119, %r118}; - mul.f64 %fd404, %fd270, %fd271; - -BB0_26: - mov.f64 %fd395, 0d3FF0000000000000; - mov.f64 %fd393, 0d3FE000000000000B; - mov.f64 %fd392, 0d3FC5555555555511; - mov.f64 %fd391, 0d3FA55555555502A1; - mov.f64 %fd390, 0d3F81111111122322; - mov.f64 %fd389, 0d3F56C16C1852B7AF; - mov.f64 %fd368, 0d3F2A01A014761F65; - mov.f64 %fd367, 0d3EFA01997C89EB71; - mov.f64 %fd366, 0d3EC71DEE62401315; - mov.f64 %fd365, 0d3E928AF3FCA213EA; - mov.f64 %fd364, 0d3E5ADE1569CE2BDF; - mov.f64 %fd363, 0dBC7ABC9E3B39803F; - mov.f64 %fd362, 0dBFE62E42FEFA39EF; - mov.f64 %fd361, 0dC338000000000000; - mov.f64 %fd360, 0d4338000000000000; - mov.f64 %fd359, 0d3FF71547652B82FE; - fma.rn.f64 %fd272, %fd2, %fd404, %fd1; - cvt.rn.f32.f64 %f46, %fd272; - st.global.f32 [%rd7+4], %f46; - add.s32 %r26, %r22, 1; - cvt.rn.f32.s32 %f47, %r26; - sub.f32 %f48, %f2, %f47; - cvt.f64.f32 %fd273, %f48; - add.f64 %fd274, %fd273, 0dBFE0000000000000; - mul.f64 %fd275, %fd274, %fd274; - div.rn.f64 %fd276, %fd275, %fd5; - add.f64 %fd277, %fd9, %fd276; - mul.f64 %fd278, %fd10, %fd274; - div.rn.f64 %fd279, %fd278, %fd7; - sub.f64 %fd280, %fd277, %fd279; - mul.f64 %fd36, %fd3, %fd280; - fma.rn.f64 %fd283, %fd36, %fd359, %fd360; - { - .reg .b32 %temp; - mov.b64 {%r27, %temp}, %fd283; - } - add.rn.f64 %fd285, %fd283, %fd361; - fma.rn.f64 %fd287, %fd285, %fd362, %fd36; - fma.rn.f64 %fd289, %fd285, %fd363, %fd287; - fma.rn.f64 %fd292, %fd364, %fd289, %fd365; - fma.rn.f64 %fd294, %fd292, %fd289, %fd366; - fma.rn.f64 %fd296, %fd294, %fd289, %fd367; - fma.rn.f64 %fd298, %fd296, %fd289, %fd368; - fma.rn.f64 %fd300, %fd298, %fd289, %fd389; - fma.rn.f64 %fd302, %fd300, %fd289, %fd390; - fma.rn.f64 %fd304, %fd302, %fd289, %fd391; - fma.rn.f64 %fd306, %fd304, %fd289, %fd392; - fma.rn.f64 %fd308, %fd306, %fd289, %fd393; - fma.rn.f64 %fd310, %fd308, %fd289, %fd395; - fma.rn.f64 %fd311, %fd310, %fd289, %fd395; - { - .reg .b32 %temp; - mov.b64 {%r28, %temp}, %fd311; - } - { - .reg .b32 %temp; - mov.b64 {%temp, %r29}, %fd311; - } - shl.b32 %r120, %r27, 20; - add.s32 %r121, %r29, %r120; - mov.b64 %fd405, {%r28, %r121}; - { - .reg .b32 %temp; - mov.b64 {%temp, %r122}, %fd36; - } - mov.b32 %f49, %r122; - abs.f32 %f8, %f49; - setp.lt.f32 %p22, %f8, 0f4086232B; - @%p22 bra BB0_29; - - setp.lt.f64 %p23, %fd36, 0d0000000000000000; - add.f64 %fd312, %fd36, 0d7FF0000000000000; - selp.f64 %fd405, 0d0000000000000000, %fd312, %p23; - setp.geu.f32 %p24, %f8, 0f40874800; - @%p24 bra BB0_29; - - shr.u32 %r123, %r27, 31; - add.s32 %r124, %r27, %r123; - shr.s32 %r125, %r124, 1; - shl.b32 %r126, %r125, 20; - add.s32 %r127, %r126, %r29; - mov.b64 %fd313, {%r28, %r127}; - sub.s32 %r128, %r27, %r125; - shl.b32 %r129, %r128, 20; - add.s32 %r130, %r129, 1072693248; - mov.u32 %r131, 0; - mov.b64 %fd314, {%r131, %r130}; - mul.f64 %fd405, %fd313, %fd314; - -BB0_29: - mov.f64 %fd396, 0d3FF0000000000000; - mov.f64 %fd383, 0d3FE000000000000B; - mov.f64 %fd382, 0d3FC5555555555511; - mov.f64 %fd381, 0d3FA55555555502A1; - mov.f64 %fd380, 0d3F81111111122322; - mov.f64 %fd379, 0d3F56C16C1852B7AF; - mov.f64 %fd378, 0d3F2A01A014761F65; - mov.f64 %fd377, 0d3EFA01997C89EB71; - mov.f64 %fd376, 0d3EC71DEE62401315; - mov.f64 %fd375, 0d3E928AF3FCA213EA; - mov.f64 %fd374, 0d3E5ADE1569CE2BDF; - mov.f64 %fd373, 0dBC7ABC9E3B39803F; - mov.f64 %fd372, 0dBFE62E42FEFA39EF; - mov.f64 %fd371, 0dC338000000000000; - mov.f64 %fd370, 0d4338000000000000; - mov.f64 %fd369, 0d3FF71547652B82FE; - fma.rn.f64 %fd315, %fd2, %fd405, %fd1; - cvt.rn.f32.f64 %f50, %fd315; - st.global.f32 [%rd7+8], %f50; - add.s32 %r30, %r26, 1; - cvt.rn.f32.s32 %f51, %r30; - sub.f32 %f52, %f2, %f51; - cvt.f64.f32 %fd316, %f52; - add.f64 %fd317, %fd316, 0dBFE0000000000000; - mul.f64 %fd318, %fd317, %fd317; - div.rn.f64 %fd319, %fd318, %fd5; - add.f64 %fd320, %fd9, %fd319; - mul.f64 %fd321, %fd10, %fd317; - div.rn.f64 %fd322, %fd321, %fd7; - sub.f64 %fd323, %fd320, %fd322; - mul.f64 %fd41, %fd3, %fd323; - fma.rn.f64 %fd326, %fd41, %fd369, %fd370; - { - .reg .b32 %temp; - mov.b64 {%r31, %temp}, %fd326; - } - add.rn.f64 %fd328, %fd326, %fd371; - fma.rn.f64 %fd330, %fd328, %fd372, %fd41; - fma.rn.f64 %fd332, %fd328, %fd373, %fd330; - fma.rn.f64 %fd335, %fd374, %fd332, %fd375; - fma.rn.f64 %fd337, %fd335, %fd332, %fd376; - fma.rn.f64 %fd339, %fd337, %fd332, %fd377; - fma.rn.f64 %fd341, %fd339, %fd332, %fd378; - fma.rn.f64 %fd343, %fd341, %fd332, %fd379; - fma.rn.f64 %fd345, %fd343, %fd332, %fd380; - fma.rn.f64 %fd347, %fd345, %fd332, %fd381; - fma.rn.f64 %fd349, %fd347, %fd332, %fd382; - fma.rn.f64 %fd351, %fd349, %fd332, %fd383; - fma.rn.f64 %fd353, %fd351, %fd332, %fd396; - fma.rn.f64 %fd354, %fd353, %fd332, %fd396; - { - .reg .b32 %temp; - mov.b64 {%r32, %temp}, %fd354; - } - { - .reg .b32 %temp; - mov.b64 {%temp, %r33}, %fd354; - } - shl.b32 %r132, %r31, 20; - add.s32 %r133, %r33, %r132; - mov.b64 %fd406, {%r32, %r133}; - { - .reg .b32 %temp; - mov.b64 {%temp, %r134}, %fd41; - } - mov.b32 %f53, %r134; - abs.f32 %f9, %f53; - setp.lt.f32 %p25, %f9, 0f4086232B; - @%p25 bra BB0_32; - - setp.lt.f64 %p26, %fd41, 0d0000000000000000; - add.f64 %fd355, %fd41, 0d7FF0000000000000; - selp.f64 %fd406, 0d0000000000000000, %fd355, %p26; - setp.geu.f32 %p27, %f9, 0f40874800; - @%p27 bra BB0_32; - - shr.u32 %r135, %r31, 31; - add.s32 %r136, %r31, %r135; - shr.s32 %r137, %r136, 1; - shl.b32 %r138, %r137, 20; - add.s32 %r139, %r138, %r33; - mov.b64 %fd356, {%r32, %r139}; - sub.s32 %r140, %r31, %r137; - shl.b32 %r141, %r140, 20; - add.s32 %r142, %r141, 1072693248; - mov.u32 %r143, 0; - mov.b64 %fd357, {%r143, %r142}; - mul.f64 %fd406, %fd356, %fd357; - -BB0_32: - fma.rn.f64 %fd358, %fd2, %fd406, %fd1; - cvt.rn.f32.f64 %f54, %fd358; - add.s64 %rd38, %rd7, 16; - st.global.f32 [%rd7+12], %f54; - add.s32 %r149, %r30, 1; - setp.lt.s32 %p28, %r149, %r36; - @%p28 bra BB0_20; - -BB0_33: - add.s32 %r145, %r145, 1; - setp.lt.s32 %p29, %r145, %r36; - @%p29 bra BB0_3; - -BB0_34: + fma.rn.f64 %fd137, %fd136, %fd117, %fd156; + fma.rn.f64 %fd138, %fd137, %fd117, %fd156; + { + .reg .b32 %temp; + mov.b64 {%r32, %temp}, %fd138; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r33}, %fd138; + } + shl.b32 %r127, %r31, 20; + add.s32 %r128, %r33, %r127; + mov.b64 %fd174, {%r32, %r128}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r129}, %fd60; + } + mov.b32 %f19, %r129; + abs.f32 %f13, %f19; + setp.lt.f32 %p117, %f13, 0f4086232B; + @%p117 bra $L__BB0_69; + + mul.f64 %fd158, %fd16, %fd108; + setp.lt.f64 %p118, %fd158, 0d0000000000000000; + add.f64 %fd139, %fd158, 0d7FF0000000000000; + selp.f64 %fd174, 0d0000000000000000, %fd139, %p118; + setp.geu.f32 %p119, %f13, 0f40874800; + @%p119 bra $L__BB0_69; + + mov.f64 %fd145, 0d4338000000000000; + mov.f64 %fd144, 0d3FF71547652B82FE; + fma.rn.f64 %fd143, %fd60, %fd144, %fd145; + { + .reg .b32 %temp; + mov.b64 {%r139, %temp}, %fd143; + } + shr.u32 %r130, %r139, 31; + add.s32 %r131, %r139, %r130; + shr.s32 %r132, %r131, 1; + shl.b32 %r133, %r132, 20; + add.s32 %r134, %r33, %r133; + mov.b64 %fd140, {%r32, %r134}; + sub.s32 %r135, %r139, %r132; + shl.b32 %r136, %r135, 20; + add.s32 %r137, %r136, 1072693248; + mov.u32 %r138, 0; + mov.b64 %fd141, {%r138, %r137}; + mul.f64 %fd174, %fd140, %fd141; + +$L__BB0_69: + fma.rn.f64 %fd142, %fd174, %fd15, %fd14; + cvt.rn.f32.f64 %f20, %fd142; + st.global.f32 [%rd29], %f20; + add.s64 %rd29, %rd29, 4; + add.s32 %r158, %r158, 1; + setp.lt.s32 %p120, %r158, %r36; + @%p120 bra $L__BB0_17; + + add.s32 %r157, %r157, 1; + setp.lt.s32 %p121, %r157, %r36; + @%p121 bra $L__BB0_16; + +$L__BB0_71: ret; -} +} // .globl _Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf .visible .entry _Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf( .param .u32 _Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_0, @@ -806,966 +862,915 @@ BB0_34: .param .u64 _Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_8 ) { - .reg .pred %p<41>; - .reg .f32 %f<630>; - .reg .b32 %r<124>; - .reg .b64 %rd<36>; - - - ld.param.u32 %r15, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_0]; - ld.param.u32 %r16, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_1]; - ld.param.u64 %rd5, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_2]; - ld.param.u64 %rd6, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_3]; - ld.param.u64 %rd7, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_4]; - ld.param.u64 %rd8, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_5]; - ld.param.u64 %rd9, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_6]; - ld.param.u64 %rd10, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_7]; - ld.param.u64 %rd11, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_8]; - mov.u32 %r17, %tid.x; - mov.u32 %r18, %ntid.x; - mov.u32 %r19, %ctaid.x; - mad.lo.s32 %r1, %r18, %r19, %r17; - setp.ge.s32 %p1, %r1, %r16; - @%p1 bra BB1_61; - - cvta.to.global.u64 %rd12, %rd5; - mul.wide.s32 %rd13, %r1, 4; - add.s64 %rd14, %rd12, %rd13; - ld.global.f32 %f1, [%rd14]; - cvta.to.global.u64 %rd15, %rd6; - add.s64 %rd16, %rd15, %rd13; - ld.global.f32 %f2, [%rd16]; - cvta.to.global.u64 %rd17, %rd7; - add.s64 %rd1, %rd17, %rd13; - cvta.to.global.u64 %rd18, %rd8; - add.s64 %rd19, %rd18, %rd13; - ld.global.f32 %f3, [%rd19]; - cvta.to.global.u64 %rd20, %rd9; - add.s64 %rd21, %rd20, %rd13; - cvta.to.global.u64 %rd22, %rd10; - add.s64 %rd23, %rd22, %rd13; - ld.global.f32 %f97, [%rd21]; - mul.f32 %f98, %f97, %f97; - fma.rn.f32 %f99, %f97, %f97, %f98; - sqrt.rn.f32 %f100, %f99; - rcp.rn.f32 %f4, %f100; - ld.global.f32 %f101, [%rd23]; - mul.f32 %f102, %f101, %f101; - fma.rn.f32 %f103, %f101, %f101, %f102; - sqrt.rn.f32 %f104, %f103; - rcp.rn.f32 %f5, %f104; - setp.lt.s32 %p2, %r15, 1; - @%p2 bra BB1_61; - - ld.global.f32 %f105, [%rd1]; - mul.f32 %f6, %f5, %f2; - mul.f32 %f7, %f6, %f6; - add.f32 %f106, %f2, 0fBF800000; - mul.f32 %f8, %f5, %f106; - mul.f32 %f9, %f8, %f8; - mul.f32 %f10, %f105, 0f3E800000; - mov.u32 %r119, 0; - -BB1_3: - cvt.rn.f32.s32 %f107, %r119; - sub.f32 %f11, %f1, %f107; - mul.f32 %f12, %f4, %f11; - abs.f32 %f13, %f12; - setp.ltu.f32 %p3, %f13, 0f3F800000; - @%p3 bra BB1_5; - bra.uni BB1_4; - -BB1_5: - mul.f32 %f126, %f12, %f12; - mov.f32 %f127, 0f3BA0C9F8; - mov.f32 %f128, 0fBA1268FB; - fma.rn.f32 %f129, %f128, %f126, %f127; - mov.f32 %f130, 0fBCDABFD4; - fma.rn.f32 %f131, %f129, %f126, %f130; - mov.f32 %f132, 0f3DE70331; - fma.rn.f32 %f133, %f131, %f126, %f132; - mov.f32 %f134, 0fBEC09330; - fma.rn.f32 %f135, %f133, %f126, %f134; - mov.f32 %f136, 0f3F906EBA; - fma.rn.f32 %f137, %f135, %f126, %f136; - mul.f32 %f614, %f12, %f137; - bra.uni BB1_6; - -BB1_4: - mov.f32 %f108, 0f3A03BB71; - mov.f32 %f109, 0fB7B730FB; - fma.rn.f32 %f110, %f109, %f13, %f108; - mov.f32 %f111, 0fBBACA3B3; - fma.rn.f32 %f112, %f110, %f13, %f111; - mov.f32 %f113, 0f3D0A7445; - fma.rn.f32 %f114, %f112, %f13, %f113; - mov.f32 %f115, 0fBE1B3B75; - fma.rn.f32 %f116, %f114, %f13, %f115; - mov.f32 %f117, 0fBF6B385A; - fma.rn.f32 %f118, %f116, %f13, %f117; - mov.f32 %f119, 0fBFD0316E; - fma.rn.f32 %f120, %f118, %f13, %f119; - mov.f32 %f121, 0fBA031CCE; - fma.rn.f32 %f122, %f120, %f13, %f121; - ex2.approx.ftz.f32 %f123, %f122; - mov.f32 %f124, 0f3F800000; - sub.f32 %f125, %f124, %f123; - mov.b32 %r21, %f125; - setp.ltu.f32 %p4, %f13, 0f407AD445; - selp.b32 %r22, %r21, 1065353216, %p4; - mov.b32 %r23, %f12; - and.b32 %r24, %r23, -2147483648; - or.b32 %r25, %r22, %r24; - mov.b32 %f614, %r25; - -BB1_6: - add.f32 %f138, %f11, 0fBF800000; - mul.f32 %f17, %f4, %f138; - abs.f32 %f18, %f17; - setp.ltu.f32 %p5, %f18, 0f3F800000; - @%p5 bra BB1_8; - bra.uni BB1_7; - -BB1_8: - mul.f32 %f157, %f17, %f17; - mov.f32 %f158, 0f3BA0C9F8; - mov.f32 %f159, 0fBA1268FB; - fma.rn.f32 %f160, %f159, %f157, %f158; - mov.f32 %f161, 0fBCDABFD4; - fma.rn.f32 %f162, %f160, %f157, %f161; - mov.f32 %f163, 0f3DE70331; - fma.rn.f32 %f164, %f162, %f157, %f163; - mov.f32 %f165, 0fBEC09330; - fma.rn.f32 %f166, %f164, %f157, %f165; - mov.f32 %f167, 0f3F906EBA; - fma.rn.f32 %f168, %f166, %f157, %f167; - mul.f32 %f615, %f17, %f168; - bra.uni BB1_9; - -BB1_7: - mov.f32 %f139, 0f3A03BB71; - mov.f32 %f140, 0fB7B730FB; - fma.rn.f32 %f141, %f140, %f18, %f139; - mov.f32 %f142, 0fBBACA3B3; - fma.rn.f32 %f143, %f141, %f18, %f142; - mov.f32 %f144, 0f3D0A7445; - fma.rn.f32 %f145, %f143, %f18, %f144; - mov.f32 %f146, 0fBE1B3B75; - fma.rn.f32 %f147, %f145, %f18, %f146; - mov.f32 %f148, 0fBF6B385A; - fma.rn.f32 %f149, %f147, %f18, %f148; - mov.f32 %f150, 0fBFD0316E; - fma.rn.f32 %f151, %f149, %f18, %f150; - mov.f32 %f152, 0fBA031CCE; - fma.rn.f32 %f153, %f151, %f18, %f152; - ex2.approx.ftz.f32 %f154, %f153; - mov.f32 %f155, 0f3F800000; - sub.f32 %f156, %f155, %f154; - mov.b32 %r26, %f156; - setp.ltu.f32 %p6, %f18, 0f407AD445; - selp.b32 %r27, %r26, 1065353216, %p6; - mov.b32 %r28, %f17; - and.b32 %r29, %r28, -2147483648; - or.b32 %r30, %r27, %r29; - mov.b32 %f615, %r30; - -BB1_9: - sub.f32 %f169, %f614, %f615; - mul.f32 %f22, %f10, %f169; - mad.lo.s32 %r39, %r1, %r15, %r119; - mul.lo.s32 %r3, %r39, %r15; - and.b32 %r34, %r15, 3; - mov.u32 %r120, 0; - setp.eq.s32 %p7, %r34, 0; - @%p7 bra BB1_33; - - setp.eq.s32 %p8, %r34, 1; - @%p8 bra BB1_26; - - setp.eq.s32 %p9, %r34, 2; - @%p9 bra BB1_19; - - abs.f32 %f23, %f6; - setp.ltu.f32 %p10, %f23, 0f3F800000; - @%p10 bra BB1_14; - bra.uni BB1_13; - -BB1_14: - mov.f32 %f188, 0f3BA0C9F8; - mov.f32 %f189, 0fBA1268FB; - fma.rn.f32 %f190, %f189, %f7, %f188; - mov.f32 %f191, 0fBCDABFD4; - fma.rn.f32 %f192, %f190, %f7, %f191; - mov.f32 %f193, 0f3DE70331; - fma.rn.f32 %f194, %f192, %f7, %f193; - mov.f32 %f195, 0fBEC09330; - fma.rn.f32 %f196, %f194, %f7, %f195; - mov.f32 %f197, 0f3F906EBA; - fma.rn.f32 %f198, %f196, %f7, %f197; - mul.f32 %f616, %f6, %f198; - bra.uni BB1_15; - -BB1_13: - mov.f32 %f170, 0f3A03BB71; - mov.f32 %f171, 0fB7B730FB; - fma.rn.f32 %f172, %f171, %f23, %f170; - mov.f32 %f173, 0fBBACA3B3; - fma.rn.f32 %f174, %f172, %f23, %f173; - mov.f32 %f175, 0f3D0A7445; - fma.rn.f32 %f176, %f174, %f23, %f175; - mov.f32 %f177, 0fBE1B3B75; - fma.rn.f32 %f178, %f176, %f23, %f177; - mov.f32 %f179, 0fBF6B385A; - fma.rn.f32 %f180, %f178, %f23, %f179; - mov.f32 %f181, 0fBFD0316E; - fma.rn.f32 %f182, %f180, %f23, %f181; - mov.f32 %f183, 0fBA031CCE; - fma.rn.f32 %f184, %f182, %f23, %f183; - ex2.approx.ftz.f32 %f185, %f184; - mov.f32 %f186, 0f3F800000; - sub.f32 %f187, %f186, %f185; - mov.b32 %r40, %f187; - setp.ltu.f32 %p11, %f23, 0f407AD445; - selp.b32 %r41, %r40, 1065353216, %p11; - mov.b32 %r42, %f6; - and.b32 %r43, %r42, -2147483648; - or.b32 %r44, %r41, %r43; - mov.b32 %f616, %r44; - -BB1_15: - abs.f32 %f27, %f8; - setp.ltu.f32 %p12, %f27, 0f3F800000; - @%p12 bra BB1_17; - bra.uni BB1_16; - -BB1_17: - mov.f32 %f217, 0f3BA0C9F8; - mov.f32 %f218, 0fBA1268FB; - fma.rn.f32 %f219, %f218, %f9, %f217; - mov.f32 %f220, 0fBCDABFD4; - fma.rn.f32 %f221, %f219, %f9, %f220; - mov.f32 %f222, 0f3DE70331; - fma.rn.f32 %f223, %f221, %f9, %f222; - mov.f32 %f224, 0fBEC09330; - fma.rn.f32 %f225, %f223, %f9, %f224; - mov.f32 %f226, 0f3F906EBA; - fma.rn.f32 %f227, %f225, %f9, %f226; - mul.f32 %f617, %f8, %f227; - bra.uni BB1_18; - -BB1_16: - mov.f32 %f199, 0f3A03BB71; - mov.f32 %f200, 0fB7B730FB; - fma.rn.f32 %f201, %f200, %f27, %f199; - mov.f32 %f202, 0fBBACA3B3; - fma.rn.f32 %f203, %f201, %f27, %f202; - mov.f32 %f204, 0f3D0A7445; - fma.rn.f32 %f205, %f203, %f27, %f204; - mov.f32 %f206, 0fBE1B3B75; - fma.rn.f32 %f207, %f205, %f27, %f206; - mov.f32 %f208, 0fBF6B385A; - fma.rn.f32 %f209, %f207, %f27, %f208; - mov.f32 %f210, 0fBFD0316E; - fma.rn.f32 %f211, %f209, %f27, %f210; - mov.f32 %f212, 0fBA031CCE; - fma.rn.f32 %f213, %f211, %f27, %f212; - ex2.approx.ftz.f32 %f214, %f213; - mov.f32 %f215, 0f3F800000; - sub.f32 %f216, %f215, %f214; - mov.b32 %r45, %f216; - setp.ltu.f32 %p13, %f27, 0f407AD445; - selp.b32 %r46, %r45, 1065353216, %p13; - mov.b32 %r47, %f8; - and.b32 %r48, %r47, -2147483648; - or.b32 %r49, %r46, %r48; - mov.b32 %f617, %r49; - -BB1_18: - sub.f32 %f228, %f616, %f617; - fma.rn.f32 %f229, %f22, %f228, %f3; - cvta.to.global.u64 %rd24, %rd11; - mul.wide.s32 %rd25, %r3, 4; - add.s64 %rd26, %rd24, %rd25; - st.global.f32 [%rd26], %f229; - mov.u32 %r120, 1; - -BB1_19: - cvt.rn.f32.s32 %f230, %r120; - sub.f32 %f31, %f2, %f230; - mul.f32 %f32, %f5, %f31; - abs.f32 %f33, %f32; - setp.ltu.f32 %p14, %f33, 0f3F800000; - @%p14 bra BB1_21; - bra.uni BB1_20; - -BB1_21: - mul.f32 %f249, %f32, %f32; - mov.f32 %f250, 0f3BA0C9F8; - mov.f32 %f251, 0fBA1268FB; - fma.rn.f32 %f252, %f251, %f249, %f250; - mov.f32 %f253, 0fBCDABFD4; - fma.rn.f32 %f254, %f252, %f249, %f253; - mov.f32 %f255, 0f3DE70331; - fma.rn.f32 %f256, %f254, %f249, %f255; - mov.f32 %f257, 0fBEC09330; - fma.rn.f32 %f258, %f256, %f249, %f257; - mov.f32 %f259, 0f3F906EBA; - fma.rn.f32 %f260, %f258, %f249, %f259; - mul.f32 %f618, %f32, %f260; - bra.uni BB1_22; - -BB1_20: - mov.f32 %f231, 0f3A03BB71; - mov.f32 %f232, 0fB7B730FB; - fma.rn.f32 %f233, %f232, %f33, %f231; - mov.f32 %f234, 0fBBACA3B3; - fma.rn.f32 %f235, %f233, %f33, %f234; - mov.f32 %f236, 0f3D0A7445; - fma.rn.f32 %f237, %f235, %f33, %f236; - mov.f32 %f238, 0fBE1B3B75; - fma.rn.f32 %f239, %f237, %f33, %f238; - mov.f32 %f240, 0fBF6B385A; - fma.rn.f32 %f241, %f239, %f33, %f240; - mov.f32 %f242, 0fBFD0316E; - fma.rn.f32 %f243, %f241, %f33, %f242; - mov.f32 %f244, 0fBA031CCE; - fma.rn.f32 %f245, %f243, %f33, %f244; - ex2.approx.ftz.f32 %f246, %f245; - mov.f32 %f247, 0f3F800000; - sub.f32 %f248, %f247, %f246; - mov.b32 %r51, %f248; - setp.ltu.f32 %p15, %f33, 0f407AD445; - selp.b32 %r52, %r51, 1065353216, %p15; - mov.b32 %r53, %f32; - and.b32 %r54, %r53, -2147483648; - or.b32 %r55, %r52, %r54; - mov.b32 %f618, %r55; - -BB1_22: - add.f32 %f261, %f31, 0fBF800000; - mul.f32 %f37, %f5, %f261; - abs.f32 %f38, %f37; - setp.ltu.f32 %p16, %f38, 0f3F800000; - @%p16 bra BB1_24; - bra.uni BB1_23; - -BB1_24: - mul.f32 %f280, %f37, %f37; - mov.f32 %f281, 0f3BA0C9F8; - mov.f32 %f282, 0fBA1268FB; - fma.rn.f32 %f283, %f282, %f280, %f281; - mov.f32 %f284, 0fBCDABFD4; - fma.rn.f32 %f285, %f283, %f280, %f284; - mov.f32 %f286, 0f3DE70331; - fma.rn.f32 %f287, %f285, %f280, %f286; - mov.f32 %f288, 0fBEC09330; - fma.rn.f32 %f289, %f287, %f280, %f288; - mov.f32 %f290, 0f3F906EBA; - fma.rn.f32 %f291, %f289, %f280, %f290; - mul.f32 %f619, %f37, %f291; - bra.uni BB1_25; - -BB1_23: - mov.f32 %f262, 0f3A03BB71; - mov.f32 %f263, 0fB7B730FB; - fma.rn.f32 %f264, %f263, %f38, %f262; - mov.f32 %f265, 0fBBACA3B3; - fma.rn.f32 %f266, %f264, %f38, %f265; - mov.f32 %f267, 0f3D0A7445; - fma.rn.f32 %f268, %f266, %f38, %f267; - mov.f32 %f269, 0fBE1B3B75; - fma.rn.f32 %f270, %f268, %f38, %f269; - mov.f32 %f271, 0fBF6B385A; - fma.rn.f32 %f272, %f270, %f38, %f271; - mov.f32 %f273, 0fBFD0316E; - fma.rn.f32 %f274, %f272, %f38, %f273; - mov.f32 %f275, 0fBA031CCE; - fma.rn.f32 %f276, %f274, %f38, %f275; - ex2.approx.ftz.f32 %f277, %f276; - mov.f32 %f278, 0f3F800000; - sub.f32 %f279, %f278, %f277; - mov.b32 %r56, %f279; - setp.ltu.f32 %p17, %f38, 0f407AD445; - selp.b32 %r57, %r56, 1065353216, %p17; - mov.b32 %r58, %f37; - and.b32 %r59, %r58, -2147483648; - or.b32 %r60, %r57, %r59; - mov.b32 %f619, %r60; - -BB1_25: - sub.f32 %f292, %f618, %f619; - fma.rn.f32 %f293, %f22, %f292, %f3; - add.s32 %r61, %r120, %r3; - cvta.to.global.u64 %rd27, %rd11; - mul.wide.s32 %rd28, %r61, 4; - add.s64 %rd29, %rd27, %rd28; - st.global.f32 [%rd29], %f293; - add.s32 %r120, %r120, 1; - -BB1_26: - cvt.rn.f32.s32 %f294, %r120; - sub.f32 %f42, %f2, %f294; - mul.f32 %f43, %f5, %f42; - abs.f32 %f44, %f43; - setp.ltu.f32 %p18, %f44, 0f3F800000; - @%p18 bra BB1_28; - bra.uni BB1_27; - -BB1_28: - mul.f32 %f313, %f43, %f43; - mov.f32 %f314, 0f3BA0C9F8; - mov.f32 %f315, 0fBA1268FB; - fma.rn.f32 %f316, %f315, %f313, %f314; - mov.f32 %f317, 0fBCDABFD4; - fma.rn.f32 %f318, %f316, %f313, %f317; - mov.f32 %f319, 0f3DE70331; - fma.rn.f32 %f320, %f318, %f313, %f319; - mov.f32 %f321, 0fBEC09330; - fma.rn.f32 %f322, %f320, %f313, %f321; - mov.f32 %f323, 0f3F906EBA; - fma.rn.f32 %f324, %f322, %f313, %f323; - mul.f32 %f620, %f43, %f324; - bra.uni BB1_29; - -BB1_27: - mov.f32 %f295, 0f3A03BB71; - mov.f32 %f296, 0fB7B730FB; - fma.rn.f32 %f297, %f296, %f44, %f295; - mov.f32 %f298, 0fBBACA3B3; - fma.rn.f32 %f299, %f297, %f44, %f298; - mov.f32 %f300, 0f3D0A7445; - fma.rn.f32 %f301, %f299, %f44, %f300; - mov.f32 %f302, 0fBE1B3B75; - fma.rn.f32 %f303, %f301, %f44, %f302; - mov.f32 %f304, 0fBF6B385A; - fma.rn.f32 %f305, %f303, %f44, %f304; - mov.f32 %f306, 0fBFD0316E; - fma.rn.f32 %f307, %f305, %f44, %f306; - mov.f32 %f308, 0fBA031CCE; - fma.rn.f32 %f309, %f307, %f44, %f308; - ex2.approx.ftz.f32 %f310, %f309; - mov.f32 %f311, 0f3F800000; - sub.f32 %f312, %f311, %f310; - mov.b32 %r62, %f312; - setp.ltu.f32 %p19, %f44, 0f407AD445; - selp.b32 %r63, %r62, 1065353216, %p19; - mov.b32 %r64, %f43; - and.b32 %r65, %r64, -2147483648; - or.b32 %r66, %r63, %r65; - mov.b32 %f620, %r66; - -BB1_29: - add.f32 %f325, %f42, 0fBF800000; - mul.f32 %f48, %f5, %f325; - abs.f32 %f49, %f48; - setp.ltu.f32 %p20, %f49, 0f3F800000; - @%p20 bra BB1_31; - bra.uni BB1_30; - -BB1_31: - mul.f32 %f344, %f48, %f48; - mov.f32 %f345, 0f3BA0C9F8; - mov.f32 %f346, 0fBA1268FB; - fma.rn.f32 %f347, %f346, %f344, %f345; - mov.f32 %f348, 0fBCDABFD4; - fma.rn.f32 %f349, %f347, %f344, %f348; - mov.f32 %f350, 0f3DE70331; - fma.rn.f32 %f351, %f349, %f344, %f350; - mov.f32 %f352, 0fBEC09330; - fma.rn.f32 %f353, %f351, %f344, %f352; - mov.f32 %f354, 0f3F906EBA; - fma.rn.f32 %f355, %f353, %f344, %f354; - mul.f32 %f621, %f48, %f355; - bra.uni BB1_32; - -BB1_30: - mov.f32 %f326, 0f3A03BB71; - mov.f32 %f327, 0fB7B730FB; - fma.rn.f32 %f328, %f327, %f49, %f326; - mov.f32 %f329, 0fBBACA3B3; - fma.rn.f32 %f330, %f328, %f49, %f329; - mov.f32 %f331, 0f3D0A7445; - fma.rn.f32 %f332, %f330, %f49, %f331; - mov.f32 %f333, 0fBE1B3B75; - fma.rn.f32 %f334, %f332, %f49, %f333; - mov.f32 %f335, 0fBF6B385A; - fma.rn.f32 %f336, %f334, %f49, %f335; - mov.f32 %f337, 0fBFD0316E; - fma.rn.f32 %f338, %f336, %f49, %f337; - mov.f32 %f339, 0fBA031CCE; - fma.rn.f32 %f340, %f338, %f49, %f339; - ex2.approx.ftz.f32 %f341, %f340; - mov.f32 %f342, 0f3F800000; - sub.f32 %f343, %f342, %f341; - mov.b32 %r67, %f343; - setp.ltu.f32 %p21, %f49, 0f407AD445; - selp.b32 %r68, %r67, 1065353216, %p21; - mov.b32 %r69, %f48; - and.b32 %r70, %r69, -2147483648; - or.b32 %r71, %r68, %r70; - mov.b32 %f621, %r71; - -BB1_32: - sub.f32 %f356, %f620, %f621; - fma.rn.f32 %f357, %f22, %f356, %f3; - add.s32 %r72, %r120, %r3; - cvta.to.global.u64 %rd30, %rd11; - mul.wide.s32 %rd31, %r72, 4; - add.s64 %rd32, %rd30, %rd31; - st.global.f32 [%rd32], %f357; - add.s32 %r120, %r120, 1; - -BB1_33: - setp.lt.u32 %p22, %r15, 4; - @%p22 bra BB1_60; - - mad.lo.s32 %r77, %r15, %r1, %r119; - mad.lo.s32 %r78, %r15, %r77, %r120; - cvta.to.global.u64 %rd33, %rd11; - mul.wide.s32 %rd34, %r78, 4; - add.s64 %rd35, %rd33, %rd34; - -BB1_35: - cvt.rn.f32.s32 %f358, %r120; - sub.f32 %f53, %f2, %f358; - mul.f32 %f54, %f5, %f53; - abs.f32 %f55, %f54; - setp.ltu.f32 %p23, %f55, 0f3F800000; - @%p23 bra BB1_37; - bra.uni BB1_36; - -BB1_37: - mul.f32 %f377, %f54, %f54; - mov.f32 %f378, 0f3BA0C9F8; - mov.f32 %f379, 0fBA1268FB; - fma.rn.f32 %f380, %f379, %f377, %f378; - mov.f32 %f381, 0fBCDABFD4; - fma.rn.f32 %f382, %f380, %f377, %f381; - mov.f32 %f383, 0f3DE70331; - fma.rn.f32 %f384, %f382, %f377, %f383; - mov.f32 %f385, 0fBEC09330; - fma.rn.f32 %f386, %f384, %f377, %f385; - mov.f32 %f387, 0f3F906EBA; - fma.rn.f32 %f388, %f386, %f377, %f387; - mul.f32 %f622, %f54, %f388; - bra.uni BB1_38; - -BB1_36: - mov.f32 %f359, 0f3A03BB71; - mov.f32 %f360, 0fB7B730FB; - fma.rn.f32 %f361, %f360, %f55, %f359; - mov.f32 %f362, 0fBBACA3B3; - fma.rn.f32 %f363, %f361, %f55, %f362; - mov.f32 %f364, 0f3D0A7445; - fma.rn.f32 %f365, %f363, %f55, %f364; - mov.f32 %f366, 0fBE1B3B75; - fma.rn.f32 %f367, %f365, %f55, %f366; - mov.f32 %f368, 0fBF6B385A; - fma.rn.f32 %f369, %f367, %f55, %f368; - mov.f32 %f370, 0fBFD0316E; - fma.rn.f32 %f371, %f369, %f55, %f370; - mov.f32 %f372, 0fBA031CCE; - fma.rn.f32 %f373, %f371, %f55, %f372; - ex2.approx.ftz.f32 %f374, %f373; - mov.f32 %f375, 0f3F800000; - sub.f32 %f376, %f375, %f374; - mov.b32 %r79, %f376; - setp.ltu.f32 %p24, %f55, 0f407AD445; - selp.b32 %r80, %r79, 1065353216, %p24; - mov.b32 %r81, %f54; - and.b32 %r82, %r81, -2147483648; - or.b32 %r83, %r80, %r82; - mov.b32 %f622, %r83; - -BB1_38: - add.f32 %f389, %f53, 0fBF800000; - mul.f32 %f59, %f5, %f389; - abs.f32 %f60, %f59; - setp.ltu.f32 %p25, %f60, 0f3F800000; - @%p25 bra BB1_40; - bra.uni BB1_39; - -BB1_40: - mul.f32 %f408, %f59, %f59; - mov.f32 %f409, 0f3BA0C9F8; - mov.f32 %f410, 0fBA1268FB; - fma.rn.f32 %f411, %f410, %f408, %f409; - mov.f32 %f412, 0fBCDABFD4; - fma.rn.f32 %f413, %f411, %f408, %f412; - mov.f32 %f414, 0f3DE70331; - fma.rn.f32 %f415, %f413, %f408, %f414; - mov.f32 %f416, 0fBEC09330; - fma.rn.f32 %f417, %f415, %f408, %f416; - mov.f32 %f418, 0f3F906EBA; - fma.rn.f32 %f419, %f417, %f408, %f418; - mul.f32 %f623, %f59, %f419; - bra.uni BB1_41; - -BB1_39: - mov.f32 %f390, 0f3A03BB71; - mov.f32 %f391, 0fB7B730FB; - fma.rn.f32 %f392, %f391, %f60, %f390; - mov.f32 %f393, 0fBBACA3B3; - fma.rn.f32 %f394, %f392, %f60, %f393; - mov.f32 %f395, 0f3D0A7445; - fma.rn.f32 %f396, %f394, %f60, %f395; - mov.f32 %f397, 0fBE1B3B75; - fma.rn.f32 %f398, %f396, %f60, %f397; - mov.f32 %f399, 0fBF6B385A; - fma.rn.f32 %f400, %f398, %f60, %f399; - mov.f32 %f401, 0fBFD0316E; - fma.rn.f32 %f402, %f400, %f60, %f401; - mov.f32 %f403, 0fBA031CCE; - fma.rn.f32 %f404, %f402, %f60, %f403; - ex2.approx.ftz.f32 %f405, %f404; - mov.f32 %f406, 0f3F800000; - sub.f32 %f407, %f406, %f405; - mov.b32 %r84, %f407; - setp.ltu.f32 %p26, %f60, 0f407AD445; - selp.b32 %r85, %r84, 1065353216, %p26; - mov.b32 %r86, %f59; - and.b32 %r87, %r86, -2147483648; - or.b32 %r88, %r85, %r87; - mov.b32 %f623, %r88; - -BB1_41: - sub.f32 %f420, %f622, %f623; - fma.rn.f32 %f421, %f22, %f420, %f3; - st.global.f32 [%rd35], %f421; - add.s32 %r10, %r120, 1; - cvt.rn.f32.s32 %f422, %r10; - sub.f32 %f64, %f2, %f422; - mul.f32 %f65, %f5, %f64; - abs.f32 %f66, %f65; - setp.ltu.f32 %p27, %f66, 0f3F800000; - @%p27 bra BB1_43; - bra.uni BB1_42; - -BB1_43: - mul.f32 %f441, %f65, %f65; - mov.f32 %f442, 0f3BA0C9F8; - mov.f32 %f443, 0fBA1268FB; - fma.rn.f32 %f444, %f443, %f441, %f442; - mov.f32 %f445, 0fBCDABFD4; - fma.rn.f32 %f446, %f444, %f441, %f445; - mov.f32 %f447, 0f3DE70331; - fma.rn.f32 %f448, %f446, %f441, %f447; - mov.f32 %f449, 0fBEC09330; - fma.rn.f32 %f450, %f448, %f441, %f449; - mov.f32 %f451, 0f3F906EBA; - fma.rn.f32 %f452, %f450, %f441, %f451; - mul.f32 %f624, %f65, %f452; - bra.uni BB1_44; - -BB1_42: - mov.f32 %f423, 0f3A03BB71; - mov.f32 %f424, 0fB7B730FB; - fma.rn.f32 %f425, %f424, %f66, %f423; - mov.f32 %f426, 0fBBACA3B3; - fma.rn.f32 %f427, %f425, %f66, %f426; - mov.f32 %f428, 0f3D0A7445; - fma.rn.f32 %f429, %f427, %f66, %f428; - mov.f32 %f430, 0fBE1B3B75; - fma.rn.f32 %f431, %f429, %f66, %f430; - mov.f32 %f432, 0fBF6B385A; - fma.rn.f32 %f433, %f431, %f66, %f432; - mov.f32 %f434, 0fBFD0316E; - fma.rn.f32 %f435, %f433, %f66, %f434; - mov.f32 %f436, 0fBA031CCE; - fma.rn.f32 %f437, %f435, %f66, %f436; - ex2.approx.ftz.f32 %f438, %f437; - mov.f32 %f439, 0f3F800000; - sub.f32 %f440, %f439, %f438; - mov.b32 %r89, %f440; - setp.ltu.f32 %p28, %f66, 0f407AD445; - selp.b32 %r90, %r89, 1065353216, %p28; - mov.b32 %r91, %f65; - and.b32 %r92, %r91, -2147483648; - or.b32 %r93, %r90, %r92; - mov.b32 %f624, %r93; - -BB1_44: - add.f32 %f453, %f64, 0fBF800000; - mul.f32 %f70, %f5, %f453; - abs.f32 %f71, %f70; - setp.ltu.f32 %p29, %f71, 0f3F800000; - @%p29 bra BB1_46; - bra.uni BB1_45; - -BB1_46: - mul.f32 %f472, %f70, %f70; - mov.f32 %f473, 0f3BA0C9F8; - mov.f32 %f474, 0fBA1268FB; - fma.rn.f32 %f475, %f474, %f472, %f473; - mov.f32 %f476, 0fBCDABFD4; - fma.rn.f32 %f477, %f475, %f472, %f476; - mov.f32 %f478, 0f3DE70331; - fma.rn.f32 %f479, %f477, %f472, %f478; - mov.f32 %f480, 0fBEC09330; - fma.rn.f32 %f481, %f479, %f472, %f480; - mov.f32 %f482, 0f3F906EBA; - fma.rn.f32 %f483, %f481, %f472, %f482; - mul.f32 %f625, %f70, %f483; - bra.uni BB1_47; - -BB1_45: - mov.f32 %f454, 0f3A03BB71; - mov.f32 %f455, 0fB7B730FB; - fma.rn.f32 %f456, %f455, %f71, %f454; - mov.f32 %f457, 0fBBACA3B3; - fma.rn.f32 %f458, %f456, %f71, %f457; - mov.f32 %f459, 0f3D0A7445; - fma.rn.f32 %f460, %f458, %f71, %f459; - mov.f32 %f461, 0fBE1B3B75; - fma.rn.f32 %f462, %f460, %f71, %f461; - mov.f32 %f463, 0fBF6B385A; - fma.rn.f32 %f464, %f462, %f71, %f463; - mov.f32 %f465, 0fBFD0316E; - fma.rn.f32 %f466, %f464, %f71, %f465; - mov.f32 %f467, 0fBA031CCE; - fma.rn.f32 %f468, %f466, %f71, %f467; - ex2.approx.ftz.f32 %f469, %f468; - mov.f32 %f470, 0f3F800000; - sub.f32 %f471, %f470, %f469; - mov.b32 %r94, %f471; - setp.ltu.f32 %p30, %f71, 0f407AD445; - selp.b32 %r95, %r94, 1065353216, %p30; - mov.b32 %r96, %f70; - and.b32 %r97, %r96, -2147483648; - or.b32 %r98, %r95, %r97; - mov.b32 %f625, %r98; - -BB1_47: - sub.f32 %f484, %f624, %f625; - fma.rn.f32 %f485, %f22, %f484, %f3; - st.global.f32 [%rd35+4], %f485; - add.s32 %r11, %r10, 1; - cvt.rn.f32.s32 %f486, %r11; - sub.f32 %f75, %f2, %f486; - mul.f32 %f76, %f5, %f75; - abs.f32 %f77, %f76; - setp.ltu.f32 %p31, %f77, 0f3F800000; - @%p31 bra BB1_49; - bra.uni BB1_48; - -BB1_49: - mul.f32 %f505, %f76, %f76; - mov.f32 %f506, 0f3BA0C9F8; - mov.f32 %f507, 0fBA1268FB; - fma.rn.f32 %f508, %f507, %f505, %f506; - mov.f32 %f509, 0fBCDABFD4; - fma.rn.f32 %f510, %f508, %f505, %f509; - mov.f32 %f511, 0f3DE70331; - fma.rn.f32 %f512, %f510, %f505, %f511; - mov.f32 %f513, 0fBEC09330; - fma.rn.f32 %f514, %f512, %f505, %f513; - mov.f32 %f515, 0f3F906EBA; - fma.rn.f32 %f516, %f514, %f505, %f515; - mul.f32 %f626, %f76, %f516; - bra.uni BB1_50; - -BB1_48: - mov.f32 %f487, 0f3A03BB71; - mov.f32 %f488, 0fB7B730FB; - fma.rn.f32 %f489, %f488, %f77, %f487; - mov.f32 %f490, 0fBBACA3B3; - fma.rn.f32 %f491, %f489, %f77, %f490; - mov.f32 %f492, 0f3D0A7445; - fma.rn.f32 %f493, %f491, %f77, %f492; - mov.f32 %f494, 0fBE1B3B75; - fma.rn.f32 %f495, %f493, %f77, %f494; - mov.f32 %f496, 0fBF6B385A; - fma.rn.f32 %f497, %f495, %f77, %f496; - mov.f32 %f498, 0fBFD0316E; - fma.rn.f32 %f499, %f497, %f77, %f498; - mov.f32 %f500, 0fBA031CCE; - fma.rn.f32 %f501, %f499, %f77, %f500; - ex2.approx.ftz.f32 %f502, %f501; - mov.f32 %f503, 0f3F800000; - sub.f32 %f504, %f503, %f502; - mov.b32 %r99, %f504; - setp.ltu.f32 %p32, %f77, 0f407AD445; - selp.b32 %r100, %r99, 1065353216, %p32; - mov.b32 %r101, %f76; + .reg .pred %p<71>; + .reg .f32 %f<242>; + .reg .b32 %r<109>; + .reg .f64 %fd<46>; + .reg .b64 %rd<26>; + + + ld.param.u32 %r19, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_0]; + ld.param.u32 %r20, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_1]; + ld.param.u64 %rd3, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_2]; + ld.param.u64 %rd4, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_3]; + ld.param.u64 %rd5, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_4]; + ld.param.u64 %rd6, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_5]; + ld.param.u64 %rd7, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_6]; + ld.param.u64 %rd8, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_7]; + ld.param.u64 %rd9, [_Z29kernel_guassianintegrateblobsiiPKfS0_S0_S0_S0_S0_Pf_param_8]; + cvta.to.global.u64 %rd1, %rd9; + mov.u32 %r21, %ntid.x; + mov.u32 %r22, %ctaid.x; + mov.u32 %r23, %tid.x; + mad.lo.s32 %r1, %r22, %r21, %r23; + setp.ge.s32 %p3, %r1, %r20; + @%p3 bra $L__BB1_51; + + cvta.to.global.u64 %rd10, %rd3; + mul.wide.s32 %rd11, %r1, 4; + add.s64 %rd12, %rd10, %rd11; + ld.global.f32 %f1, [%rd12]; + cvta.to.global.u64 %rd13, %rd4; + add.s64 %rd14, %rd13, %rd11; + ld.global.f32 %f2, [%rd14]; + cvta.to.global.u64 %rd15, %rd5; + add.s64 %rd16, %rd15, %rd11; + cvta.to.global.u64 %rd17, %rd6; + add.s64 %rd18, %rd17, %rd11; + ld.global.f32 %f3, [%rd18]; + cvta.to.global.u64 %rd19, %rd7; + add.s64 %rd20, %rd19, %rd11; + cvta.to.global.u64 %rd21, %rd8; + add.s64 %rd22, %rd21, %rd11; + ld.global.f32 %f4, [%rd22]; + ld.global.f32 %f5, [%rd16]; + ld.global.f32 %f6, [%rd20]; + cvt.f64.f32 %fd1, %f6; + { + .reg .b32 %temp; + mov.b64 {%temp, %r2}, %fd1; + } + mov.f64 %fd23, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r3}, %fd23; + } + and.b32 %r4, %r3, 2146435072; + setp.eq.s32 %p4, %r4, 1062207488; + abs.f64 %fd2, %fd1; + { // callseq 5, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd2; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0 + ); + ld.param.f64 %fd41, [retval0+0]; + } // callseq 5 + setp.lt.s32 %p5, %r2, 0; + and.pred %p1, %p5, %p4; + not.pred %p6, %p1; + @%p6 bra $L__BB1_3; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r24}, %fd41; + } + xor.b32 %r25, %r24, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r26, %temp}, %fd41; + } + mov.b64 %fd41, {%r26, %r25}; + +$L__BB1_3: + setp.eq.f32 %p7, %f6, 0f00000000; + @%p7 bra $L__BB1_7; + bra.uni $L__BB1_4; + +$L__BB1_7: + selp.b32 %r27, %r2, 0, %p4; + mov.u32 %r28, 0; + or.b32 %r29, %r27, 2146435072; + setp.lt.s32 %p11, %r3, 0; + selp.b32 %r30, %r29, %r27, %p11; + mov.b64 %fd41, {%r28, %r30}; + bra.uni $L__BB1_8; + +$L__BB1_4: + setp.gt.s32 %p8, %r2, -1; + @%p8 bra $L__BB1_8; + + cvt.rzi.f64.f64 %fd25, %fd23; + setp.eq.f64 %p9, %fd25, 0d4000000000000000; + @%p9 bra $L__BB1_8; + + mov.f64 %fd41, 0dFFF8000000000000; + +$L__BB1_8: + add.f64 %fd8, %fd1, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r31}, %fd8; + } + and.b32 %r32, %r31, 2146435072; + setp.ne.s32 %p12, %r32, 2146435072; + mov.f64 %fd42, %fd41; + @%p12 bra $L__BB1_14; + + setp.gtu.f64 %p13, %fd2, 0d7FF0000000000000; + mov.f64 %fd42, %fd8; + @%p13 bra $L__BB1_14; + + { + .reg .b32 %temp; + mov.b64 {%r33, %temp}, %fd23; + } + and.b32 %r5, %r3, 2147483647; + setp.eq.s32 %p14, %r5, 2146435072; + setp.eq.s32 %p15, %r33, 0; + and.pred %p16, %p14, %p15; + @%p16 bra $L__BB1_13; + bra.uni $L__BB1_11; + +$L__BB1_13: + setp.gt.f64 %p23, %fd2, 0d3FF0000000000000; + selp.b32 %r40, 2146435072, 0, %p23; + mov.u32 %r41, 0; + xor.b32 %r42, %r40, 2146435072; + setp.lt.s32 %p24, %r3, 0; + selp.b32 %r43, %r42, %r40, %p24; + setp.eq.f32 %p25, %f6, 0fBF800000; + selp.b32 %r44, 1072693248, %r43, %p25; + mov.b64 %fd42, {%r41, %r44}; + bra.uni $L__BB1_14; + +$L__BB1_11: + { + .reg .b32 %temp; + mov.b64 {%r34, %temp}, %fd1; + } + and.b32 %r35, %r2, 2147483647; + setp.ne.s32 %p17, %r35, 2146435072; + setp.ne.s32 %p18, %r34, 0; + or.pred %p19, %p17, %p18; + mov.f64 %fd42, %fd41; + @%p19 bra $L__BB1_14; + + setp.gt.s32 %p20, %r3, -1; + selp.b32 %r36, 2146435072, 0, %p20; + mov.u32 %r37, 0; + setp.ne.s32 %p21, %r5, 1071644672; + and.pred %p22, %p21, %p1; + or.b32 %r38, %r36, -2147483648; + selp.b32 %r39, %r38, %r36, %p22; + mov.b64 %fd42, {%r37, %r39}; + +$L__BB1_14: + add.f64 %fd28, %fd42, %fd42; + setp.eq.f32 %p26, %f6, 0f3F800000; + selp.f64 %fd29, 0d4000000000000000, %fd28, %p26; + sqrt.rn.f64 %fd30, %fd29; + rcp.rn.f64 %fd31, %fd30; + cvt.rn.f32.f64 %f7, %fd31; + cvt.f64.f32 %fd12, %f4; + { + .reg .b32 %temp; + mov.b64 {%temp, %r6}, %fd12; + } + abs.f64 %fd13, %fd12; + { // callseq 6, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd13; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0 + ); + ld.param.f64 %fd44, [retval0+0]; + } // callseq 6 + setp.lt.s32 %p27, %r6, 0; + and.pred %p2, %p27, %p4; + not.pred %p29, %p2; + @%p29 bra $L__BB1_16; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r45}, %fd44; + } + xor.b32 %r46, %r45, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r47, %temp}, %fd44; + } + mov.b64 %fd44, {%r47, %r46}; + +$L__BB1_16: + setp.eq.f32 %p30, %f4, 0f00000000; + @%p30 bra $L__BB1_20; + bra.uni $L__BB1_17; + +$L__BB1_20: + selp.b32 %r48, %r6, 0, %p4; + mov.u32 %r49, 0; + or.b32 %r50, %r48, 2146435072; + setp.lt.s32 %p34, %r3, 0; + selp.b32 %r51, %r50, %r48, %p34; + mov.b64 %fd44, {%r49, %r51}; + bra.uni $L__BB1_21; + +$L__BB1_17: + setp.gt.s32 %p31, %r6, -1; + @%p31 bra $L__BB1_21; + + cvt.rzi.f64.f64 %fd33, %fd23; + setp.eq.f64 %p32, %fd33, 0d4000000000000000; + @%p32 bra $L__BB1_21; + + mov.f64 %fd44, 0dFFF8000000000000; + +$L__BB1_21: + add.f64 %fd19, %fd12, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r52}, %fd19; + } + and.b32 %r53, %r52, 2146435072; + setp.ne.s32 %p35, %r53, 2146435072; + mov.f64 %fd45, %fd44; + @%p35 bra $L__BB1_27; + + setp.gtu.f64 %p36, %fd13, 0d7FF0000000000000; + mov.f64 %fd45, %fd19; + @%p36 bra $L__BB1_27; + + { + .reg .b32 %temp; + mov.b64 {%r54, %temp}, %fd23; + } + and.b32 %r7, %r3, 2147483647; + setp.eq.s32 %p37, %r7, 2146435072; + setp.eq.s32 %p38, %r54, 0; + and.pred %p39, %p37, %p38; + @%p39 bra $L__BB1_26; + bra.uni $L__BB1_24; + +$L__BB1_26: + setp.gt.f64 %p46, %fd13, 0d3FF0000000000000; + selp.b32 %r61, 2146435072, 0, %p46; + mov.u32 %r62, 0; + xor.b32 %r63, %r61, 2146435072; + setp.lt.s32 %p47, %r3, 0; + selp.b32 %r64, %r63, %r61, %p47; + setp.eq.f32 %p48, %f4, 0fBF800000; + selp.b32 %r65, 1072693248, %r64, %p48; + mov.b64 %fd45, {%r62, %r65}; + bra.uni $L__BB1_27; + +$L__BB1_24: + { + .reg .b32 %temp; + mov.b64 {%r55, %temp}, %fd12; + } + and.b32 %r56, %r6, 2147483647; + setp.ne.s32 %p40, %r56, 2146435072; + setp.ne.s32 %p41, %r55, 0; + or.pred %p42, %p40, %p41; + mov.f64 %fd45, %fd44; + @%p42 bra $L__BB1_27; + + setp.gt.s32 %p43, %r3, -1; + selp.b32 %r57, 2146435072, 0, %p43; + mov.u32 %r58, 0; + setp.ne.s32 %p44, %r7, 1071644672; + and.pred %p45, %p44, %p2; + or.b32 %r59, %r57, -2147483648; + selp.b32 %r60, %r59, %r57, %p45; + mov.b64 %fd45, {%r58, %r60}; + +$L__BB1_27: + add.f64 %fd36, %fd45, %fd45; + setp.eq.f32 %p49, %f4, 0f3F800000; + selp.f64 %fd37, 0d4000000000000000, %fd36, %p49; + sqrt.rn.f64 %fd38, %fd37; + rcp.rn.f64 %fd39, %fd38; + cvt.rn.f32.f64 %f8, %fd39; + setp.lt.s32 %p50, %r19, 1; + @%p50 bra $L__BB1_51; + + and.b32 %r8, %r19, 1; + sub.s32 %r9, %r19, %r8; + mul.lo.s32 %r10, %r1, %r19; + mul.f32 %f9, %f5, 0f3E800000; + mov.u32 %r105, 0; + setp.eq.s32 %p55, %r19, 1; + setp.eq.s32 %p65, %r8, 0; + +$L__BB1_29: + cvt.rn.f32.s32 %f47, %r105; + sub.f32 %f10, %f1, %f47; + add.f32 %f48, %f10, 0f3F000000; + mul.f32 %f11, %f48, %f7; + abs.f32 %f49, %f11; + setp.ltu.f32 %p51, %f49, 0f3F8060FE; + setp.ge.f32 %p52, %f49, 0f3F8060FE; + mul.f32 %f50, %f11, %f11; + selp.f32 %f51, %f49, %f50, %p52; + selp.f32 %f52, 0f3789CA3C, 0f38B1E96A, %p52; + selp.f32 %f53, 0fB9F560B9, 0fBA574D20, %p52; + fma.rn.f32 %f54, %f52, %f51, %f53; + selp.f32 %f55, 0f3BAC840B, 0f3BAAD5EA, %p52; + fma.rn.f32 %f56, %f54, %f51, %f55; + selp.f32 %f57, 0fBD0C8162, 0fBCDC1BE7, %p52; + fma.rn.f32 %f58, %f56, %f51, %f57; + selp.f32 %f59, 0f3E1CF906, 0f3DE718AF, %p52; + fma.rn.f32 %f60, %f58, %f51, %f59; + selp.f32 %f61, 0f3F6A937E, 0fBEC093AC, %p52; + fma.rn.f32 %f62, %f60, %f51, %f61; + selp.f32 %f63, 0f3F20D842, 0f3E0375D3, %p52; + fma.rn.f32 %f64, %f62, %f51, %f63; + neg.f32 %f65, %f49; + selp.f32 %f66, %f65, %f11, %p52; + fma.rn.f32 %f234, %f64, %f66, %f66; + @%p51 bra $L__BB1_31; + + ex2.approx.ftz.f32 %f67, %f234; + mov.f32 %f68, 0f3F800000; + sub.f32 %f69, %f68, %f67; + mov.b32 %r67, %f69; + mov.b32 %r68, %f11; + and.b32 %r69, %r68, -2147483648; + or.b32 %r70, %r69, %r67; + mov.b32 %f234, %r70; + +$L__BB1_31: + add.f32 %f70, %f10, 0fBF000000; + mul.f32 %f15, %f70, %f7; + abs.f32 %f71, %f15; + setp.ltu.f32 %p53, %f71, 0f3F8060FE; + setp.ge.f32 %p54, %f71, 0f3F8060FE; + mul.f32 %f72, %f15, %f15; + selp.f32 %f73, %f71, %f72, %p54; + selp.f32 %f74, 0f3789CA3C, 0f38B1E96A, %p54; + selp.f32 %f75, 0fB9F560B9, 0fBA574D20, %p54; + fma.rn.f32 %f76, %f74, %f73, %f75; + selp.f32 %f77, 0f3BAC840B, 0f3BAAD5EA, %p54; + fma.rn.f32 %f78, %f76, %f73, %f77; + selp.f32 %f79, 0fBD0C8162, 0fBCDC1BE7, %p54; + fma.rn.f32 %f80, %f78, %f73, %f79; + selp.f32 %f81, 0f3E1CF906, 0f3DE718AF, %p54; + fma.rn.f32 %f82, %f80, %f73, %f81; + selp.f32 %f83, 0f3F6A937E, 0fBEC093AC, %p54; + fma.rn.f32 %f84, %f82, %f73, %f83; + selp.f32 %f85, 0f3F20D842, 0f3E0375D3, %p54; + fma.rn.f32 %f86, %f84, %f73, %f85; + neg.f32 %f87, %f71; + selp.f32 %f88, %f87, %f15, %p54; + fma.rn.f32 %f235, %f86, %f88, %f88; + @%p53 bra $L__BB1_33; + + ex2.approx.ftz.f32 %f89, %f235; + mov.f32 %f90, 0f3F800000; + sub.f32 %f91, %f90, %f89; + mov.b32 %r71, %f91; + mov.b32 %r72, %f15; + and.b32 %r73, %r72, -2147483648; + or.b32 %r74, %r73, %r71; + mov.b32 %f235, %r74; + +$L__BB1_33: + sub.f32 %f92, %f234, %f235; + mul.f32 %f19, %f9, %f92; + add.s32 %r76, %r105, %r10; + mul.lo.s32 %r12, %r76, %r19; + mov.u32 %r108, 0; + @%p55 bra $L__BB1_44; + + mov.u32 %r107, %r9; + +$L__BB1_35: + cvt.rn.f32.s32 %f93, %r108; + sub.f32 %f20, %f2, %f93; + add.f32 %f94, %f20, 0f3F000000; + mul.f32 %f21, %f94, %f8; + abs.f32 %f95, %f21; + setp.ltu.f32 %p56, %f95, 0f3F8060FE; + setp.ge.f32 %p57, %f95, 0f3F8060FE; + mul.f32 %f96, %f21, %f21; + selp.f32 %f97, %f95, %f96, %p57; + selp.f32 %f98, 0f3789CA3C, 0f38B1E96A, %p57; + selp.f32 %f99, 0fB9F560B9, 0fBA574D20, %p57; + fma.rn.f32 %f100, %f98, %f97, %f99; + selp.f32 %f101, 0f3BAC840B, 0f3BAAD5EA, %p57; + fma.rn.f32 %f102, %f100, %f97, %f101; + selp.f32 %f103, 0fBD0C8162, 0fBCDC1BE7, %p57; + fma.rn.f32 %f104, %f102, %f97, %f103; + selp.f32 %f105, 0f3E1CF906, 0f3DE718AF, %p57; + fma.rn.f32 %f106, %f104, %f97, %f105; + selp.f32 %f107, 0f3F6A937E, 0fBEC093AC, %p57; + fma.rn.f32 %f108, %f106, %f97, %f107; + selp.f32 %f109, 0f3F20D842, 0f3E0375D3, %p57; + fma.rn.f32 %f110, %f108, %f97, %f109; + neg.f32 %f111, %f95; + selp.f32 %f112, %f111, %f21, %p57; + fma.rn.f32 %f236, %f110, %f112, %f112; + @%p56 bra $L__BB1_37; + + ex2.approx.ftz.f32 %f113, %f236; + mov.f32 %f114, 0f3F800000; + sub.f32 %f115, %f114, %f113; + mov.b32 %r78, %f115; + mov.b32 %r79, %f21; + and.b32 %r80, %r79, -2147483648; + or.b32 %r81, %r80, %r78; + mov.b32 %f236, %r81; + +$L__BB1_37: + add.f32 %f116, %f20, 0fBF000000; + mul.f32 %f25, %f116, %f8; + abs.f32 %f117, %f25; + setp.ltu.f32 %p58, %f117, 0f3F8060FE; + setp.ge.f32 %p59, %f117, 0f3F8060FE; + mul.f32 %f118, %f25, %f25; + selp.f32 %f119, %f117, %f118, %p59; + selp.f32 %f120, 0f3789CA3C, 0f38B1E96A, %p59; + selp.f32 %f121, 0fB9F560B9, 0fBA574D20, %p59; + fma.rn.f32 %f122, %f120, %f119, %f121; + selp.f32 %f123, 0f3BAC840B, 0f3BAAD5EA, %p59; + fma.rn.f32 %f124, %f122, %f119, %f123; + selp.f32 %f125, 0fBD0C8162, 0fBCDC1BE7, %p59; + fma.rn.f32 %f126, %f124, %f119, %f125; + selp.f32 %f127, 0f3E1CF906, 0f3DE718AF, %p59; + fma.rn.f32 %f128, %f126, %f119, %f127; + selp.f32 %f129, 0f3F6A937E, 0fBEC093AC, %p59; + fma.rn.f32 %f130, %f128, %f119, %f129; + selp.f32 %f131, 0f3F20D842, 0f3E0375D3, %p59; + fma.rn.f32 %f132, %f130, %f119, %f131; + neg.f32 %f133, %f117; + selp.f32 %f134, %f133, %f25, %p59; + fma.rn.f32 %f237, %f132, %f134, %f134; + @%p58 bra $L__BB1_39; + + ex2.approx.ftz.f32 %f135, %f237; + mov.f32 %f136, 0f3F800000; + sub.f32 %f137, %f136, %f135; + mov.b32 %r82, %f137; + mov.b32 %r83, %f25; + and.b32 %r84, %r83, -2147483648; + or.b32 %r85, %r84, %r82; + mov.b32 %f237, %r85; + +$L__BB1_39: + sub.f32 %f138, %f236, %f237; + fma.rn.f32 %f139, %f19, %f138, %f3; + add.s32 %r86, %r108, %r12; + mul.wide.s32 %rd23, %r86, 4; + add.s64 %rd2, %rd1, %rd23; + st.global.f32 [%rd2], %f139; + add.s32 %r87, %r108, 1; + cvt.rn.f32.s32 %f140, %r87; + sub.f32 %f29, %f2, %f140; + add.f32 %f141, %f29, 0f3F000000; + mul.f32 %f30, %f141, %f8; + abs.f32 %f142, %f30; + setp.ltu.f32 %p60, %f142, 0f3F8060FE; + setp.ge.f32 %p61, %f142, 0f3F8060FE; + mul.f32 %f143, %f30, %f30; + selp.f32 %f144, %f142, %f143, %p61; + selp.f32 %f145, 0f3789CA3C, 0f38B1E96A, %p61; + selp.f32 %f146, 0fB9F560B9, 0fBA574D20, %p61; + fma.rn.f32 %f147, %f145, %f144, %f146; + selp.f32 %f148, 0f3BAC840B, 0f3BAAD5EA, %p61; + fma.rn.f32 %f149, %f147, %f144, %f148; + selp.f32 %f150, 0fBD0C8162, 0fBCDC1BE7, %p61; + fma.rn.f32 %f151, %f149, %f144, %f150; + selp.f32 %f152, 0f3E1CF906, 0f3DE718AF, %p61; + fma.rn.f32 %f153, %f151, %f144, %f152; + selp.f32 %f154, 0f3F6A937E, 0fBEC093AC, %p61; + fma.rn.f32 %f155, %f153, %f144, %f154; + selp.f32 %f156, 0f3F20D842, 0f3E0375D3, %p61; + fma.rn.f32 %f157, %f155, %f144, %f156; + neg.f32 %f158, %f142; + selp.f32 %f159, %f158, %f30, %p61; + fma.rn.f32 %f238, %f157, %f159, %f159; + @%p60 bra $L__BB1_41; + + ex2.approx.ftz.f32 %f160, %f238; + mov.f32 %f161, 0f3F800000; + sub.f32 %f162, %f161, %f160; + mov.b32 %r88, %f162; + mov.b32 %r89, %f30; + and.b32 %r90, %r89, -2147483648; + or.b32 %r91, %r90, %r88; + mov.b32 %f238, %r91; + +$L__BB1_41: + add.f32 %f163, %f29, 0fBF000000; + mul.f32 %f34, %f163, %f8; + abs.f32 %f164, %f34; + setp.ltu.f32 %p62, %f164, 0f3F8060FE; + setp.ge.f32 %p63, %f164, 0f3F8060FE; + mul.f32 %f165, %f34, %f34; + selp.f32 %f166, %f164, %f165, %p63; + selp.f32 %f167, 0f3789CA3C, 0f38B1E96A, %p63; + selp.f32 %f168, 0fB9F560B9, 0fBA574D20, %p63; + fma.rn.f32 %f169, %f167, %f166, %f168; + selp.f32 %f170, 0f3BAC840B, 0f3BAAD5EA, %p63; + fma.rn.f32 %f171, %f169, %f166, %f170; + selp.f32 %f172, 0fBD0C8162, 0fBCDC1BE7, %p63; + fma.rn.f32 %f173, %f171, %f166, %f172; + selp.f32 %f174, 0f3E1CF906, 0f3DE718AF, %p63; + fma.rn.f32 %f175, %f173, %f166, %f174; + selp.f32 %f176, 0f3F6A937E, 0fBEC093AC, %p63; + fma.rn.f32 %f177, %f175, %f166, %f176; + selp.f32 %f178, 0f3F20D842, 0f3E0375D3, %p63; + fma.rn.f32 %f179, %f177, %f166, %f178; + neg.f32 %f180, %f164; + selp.f32 %f181, %f180, %f34, %p63; + fma.rn.f32 %f239, %f179, %f181, %f181; + @%p62 bra $L__BB1_43; + + ex2.approx.ftz.f32 %f182, %f239; + mov.f32 %f183, 0f3F800000; + sub.f32 %f184, %f183, %f182; + mov.b32 %r92, %f184; + mov.b32 %r93, %f34; + and.b32 %r94, %r93, -2147483648; + or.b32 %r95, %r94, %r92; + mov.b32 %f239, %r95; + +$L__BB1_43: + sub.f32 %f185, %f238, %f239; + fma.rn.f32 %f186, %f19, %f185, %f3; + st.global.f32 [%rd2+4], %f186; + add.s32 %r108, %r108, 2; + add.s32 %r107, %r107, -2; + setp.ne.s32 %p64, %r107, 0; + @%p64 bra $L__BB1_35; + +$L__BB1_44: + @%p65 bra $L__BB1_50; + + cvt.rn.f32.s32 %f187, %r108; + sub.f32 %f38, %f2, %f187; + add.f32 %f188, %f38, 0f3F000000; + mul.f32 %f39, %f188, %f8; + abs.f32 %f189, %f39; + setp.ltu.f32 %p66, %f189, 0f3F8060FE; + setp.ge.f32 %p67, %f189, 0f3F8060FE; + mul.f32 %f190, %f39, %f39; + selp.f32 %f191, %f189, %f190, %p67; + selp.f32 %f192, 0f3789CA3C, 0f38B1E96A, %p67; + selp.f32 %f193, 0fB9F560B9, 0fBA574D20, %p67; + fma.rn.f32 %f194, %f192, %f191, %f193; + selp.f32 %f195, 0f3BAC840B, 0f3BAAD5EA, %p67; + fma.rn.f32 %f196, %f194, %f191, %f195; + selp.f32 %f197, 0fBD0C8162, 0fBCDC1BE7, %p67; + fma.rn.f32 %f198, %f196, %f191, %f197; + selp.f32 %f199, 0f3E1CF906, 0f3DE718AF, %p67; + fma.rn.f32 %f200, %f198, %f191, %f199; + selp.f32 %f201, 0f3F6A937E, 0fBEC093AC, %p67; + fma.rn.f32 %f202, %f200, %f191, %f201; + selp.f32 %f203, 0f3F20D842, 0f3E0375D3, %p67; + fma.rn.f32 %f204, %f202, %f191, %f203; + neg.f32 %f205, %f189; + selp.f32 %f206, %f205, %f39, %p67; + fma.rn.f32 %f240, %f204, %f206, %f206; + @%p66 bra $L__BB1_47; + + ex2.approx.ftz.f32 %f207, %f240; + mov.f32 %f208, 0f3F800000; + sub.f32 %f209, %f208, %f207; + mov.b32 %r96, %f209; + mov.b32 %r97, %f39; + and.b32 %r98, %r97, -2147483648; + or.b32 %r99, %r98, %r96; + mov.b32 %f240, %r99; + +$L__BB1_47: + add.f32 %f210, %f38, 0fBF000000; + mul.f32 %f43, %f210, %f8; + abs.f32 %f211, %f43; + setp.ltu.f32 %p68, %f211, 0f3F8060FE; + setp.ge.f32 %p69, %f211, 0f3F8060FE; + mul.f32 %f212, %f43, %f43; + selp.f32 %f213, %f211, %f212, %p69; + selp.f32 %f214, 0f3789CA3C, 0f38B1E96A, %p69; + selp.f32 %f215, 0fB9F560B9, 0fBA574D20, %p69; + fma.rn.f32 %f216, %f214, %f213, %f215; + selp.f32 %f217, 0f3BAC840B, 0f3BAAD5EA, %p69; + fma.rn.f32 %f218, %f216, %f213, %f217; + selp.f32 %f219, 0fBD0C8162, 0fBCDC1BE7, %p69; + fma.rn.f32 %f220, %f218, %f213, %f219; + selp.f32 %f221, 0f3E1CF906, 0f3DE718AF, %p69; + fma.rn.f32 %f222, %f220, %f213, %f221; + selp.f32 %f223, 0f3F6A937E, 0fBEC093AC, %p69; + fma.rn.f32 %f224, %f222, %f213, %f223; + selp.f32 %f225, 0f3F20D842, 0f3E0375D3, %p69; + fma.rn.f32 %f226, %f224, %f213, %f225; + neg.f32 %f227, %f211; + selp.f32 %f228, %f227, %f43, %p69; + fma.rn.f32 %f241, %f226, %f228, %f228; + @%p68 bra $L__BB1_49; + + ex2.approx.ftz.f32 %f229, %f241; + mov.f32 %f230, 0f3F800000; + sub.f32 %f231, %f230, %f229; + mov.b32 %r100, %f231; + mov.b32 %r101, %f43; and.b32 %r102, %r101, -2147483648; - or.b32 %r103, %r100, %r102; - mov.b32 %f626, %r103; - -BB1_50: - add.f32 %f517, %f75, 0fBF800000; - mul.f32 %f81, %f5, %f517; - abs.f32 %f82, %f81; - setp.ltu.f32 %p33, %f82, 0f3F800000; - @%p33 bra BB1_52; - bra.uni BB1_51; - -BB1_52: - mul.f32 %f536, %f81, %f81; - mov.f32 %f537, 0f3BA0C9F8; - mov.f32 %f538, 0fBA1268FB; - fma.rn.f32 %f539, %f538, %f536, %f537; - mov.f32 %f540, 0fBCDABFD4; - fma.rn.f32 %f541, %f539, %f536, %f540; - mov.f32 %f542, 0f3DE70331; - fma.rn.f32 %f543, %f541, %f536, %f542; - mov.f32 %f544, 0fBEC09330; - fma.rn.f32 %f545, %f543, %f536, %f544; - mov.f32 %f546, 0f3F906EBA; - fma.rn.f32 %f547, %f545, %f536, %f546; - mul.f32 %f627, %f81, %f547; - bra.uni BB1_53; - -BB1_51: - mov.f32 %f518, 0f3A03BB71; - mov.f32 %f519, 0fB7B730FB; - fma.rn.f32 %f520, %f519, %f82, %f518; - mov.f32 %f521, 0fBBACA3B3; - fma.rn.f32 %f522, %f520, %f82, %f521; - mov.f32 %f523, 0f3D0A7445; - fma.rn.f32 %f524, %f522, %f82, %f523; - mov.f32 %f525, 0fBE1B3B75; - fma.rn.f32 %f526, %f524, %f82, %f525; - mov.f32 %f527, 0fBF6B385A; - fma.rn.f32 %f528, %f526, %f82, %f527; - mov.f32 %f529, 0fBFD0316E; - fma.rn.f32 %f530, %f528, %f82, %f529; - mov.f32 %f531, 0fBA031CCE; - fma.rn.f32 %f532, %f530, %f82, %f531; - ex2.approx.ftz.f32 %f533, %f532; - mov.f32 %f534, 0f3F800000; - sub.f32 %f535, %f534, %f533; - mov.b32 %r104, %f535; - setp.ltu.f32 %p34, %f82, 0f407AD445; - selp.b32 %r105, %r104, 1065353216, %p34; - mov.b32 %r106, %f81; - and.b32 %r107, %r106, -2147483648; - or.b32 %r108, %r105, %r107; - mov.b32 %f627, %r108; - -BB1_53: - sub.f32 %f548, %f626, %f627; - fma.rn.f32 %f549, %f22, %f548, %f3; - st.global.f32 [%rd35+8], %f549; - add.s32 %r12, %r11, 1; - cvt.rn.f32.s32 %f550, %r12; - sub.f32 %f86, %f2, %f550; - mul.f32 %f87, %f5, %f86; - abs.f32 %f88, %f87; - setp.ltu.f32 %p35, %f88, 0f3F800000; - @%p35 bra BB1_55; - bra.uni BB1_54; - -BB1_55: - mul.f32 %f569, %f87, %f87; - mov.f32 %f570, 0f3BA0C9F8; - mov.f32 %f571, 0fBA1268FB; - fma.rn.f32 %f572, %f571, %f569, %f570; - mov.f32 %f573, 0fBCDABFD4; - fma.rn.f32 %f574, %f572, %f569, %f573; - mov.f32 %f575, 0f3DE70331; - fma.rn.f32 %f576, %f574, %f569, %f575; - mov.f32 %f577, 0fBEC09330; - fma.rn.f32 %f578, %f576, %f569, %f577; - mov.f32 %f579, 0f3F906EBA; - fma.rn.f32 %f580, %f578, %f569, %f579; - mul.f32 %f628, %f87, %f580; - bra.uni BB1_56; - -BB1_54: - mov.f32 %f551, 0f3A03BB71; - mov.f32 %f552, 0fB7B730FB; - fma.rn.f32 %f553, %f552, %f88, %f551; - mov.f32 %f554, 0fBBACA3B3; - fma.rn.f32 %f555, %f553, %f88, %f554; - mov.f32 %f556, 0f3D0A7445; - fma.rn.f32 %f557, %f555, %f88, %f556; - mov.f32 %f558, 0fBE1B3B75; - fma.rn.f32 %f559, %f557, %f88, %f558; - mov.f32 %f560, 0fBF6B385A; - fma.rn.f32 %f561, %f559, %f88, %f560; - mov.f32 %f562, 0fBFD0316E; - fma.rn.f32 %f563, %f561, %f88, %f562; - mov.f32 %f564, 0fBA031CCE; - fma.rn.f32 %f565, %f563, %f88, %f564; - ex2.approx.ftz.f32 %f566, %f565; - mov.f32 %f567, 0f3F800000; - sub.f32 %f568, %f567, %f566; - mov.b32 %r109, %f568; - setp.ltu.f32 %p36, %f88, 0f407AD445; - selp.b32 %r110, %r109, 1065353216, %p36; - mov.b32 %r111, %f87; - and.b32 %r112, %r111, -2147483648; - or.b32 %r113, %r110, %r112; - mov.b32 %f628, %r113; - -BB1_56: - add.f32 %f581, %f86, 0fBF800000; - mul.f32 %f92, %f5, %f581; - abs.f32 %f93, %f92; - setp.ltu.f32 %p37, %f93, 0f3F800000; - @%p37 bra BB1_58; - bra.uni BB1_57; - -BB1_58: - mul.f32 %f600, %f92, %f92; - mov.f32 %f601, 0f3BA0C9F8; - mov.f32 %f602, 0fBA1268FB; - fma.rn.f32 %f603, %f602, %f600, %f601; - mov.f32 %f604, 0fBCDABFD4; - fma.rn.f32 %f605, %f603, %f600, %f604; - mov.f32 %f606, 0f3DE70331; - fma.rn.f32 %f607, %f605, %f600, %f606; - mov.f32 %f608, 0fBEC09330; - fma.rn.f32 %f609, %f607, %f600, %f608; - mov.f32 %f610, 0f3F906EBA; - fma.rn.f32 %f611, %f609, %f600, %f610; - mul.f32 %f629, %f92, %f611; - bra.uni BB1_59; - -BB1_57: - mov.f32 %f582, 0f3A03BB71; - mov.f32 %f583, 0fB7B730FB; - fma.rn.f32 %f584, %f583, %f93, %f582; - mov.f32 %f585, 0fBBACA3B3; - fma.rn.f32 %f586, %f584, %f93, %f585; - mov.f32 %f587, 0f3D0A7445; - fma.rn.f32 %f588, %f586, %f93, %f587; - mov.f32 %f589, 0fBE1B3B75; - fma.rn.f32 %f590, %f588, %f93, %f589; - mov.f32 %f591, 0fBF6B385A; - fma.rn.f32 %f592, %f590, %f93, %f591; - mov.f32 %f593, 0fBFD0316E; - fma.rn.f32 %f594, %f592, %f93, %f593; - mov.f32 %f595, 0fBA031CCE; - fma.rn.f32 %f596, %f594, %f93, %f595; - ex2.approx.ftz.f32 %f597, %f596; - mov.f32 %f598, 0f3F800000; - sub.f32 %f599, %f598, %f597; - mov.b32 %r114, %f599; - setp.ltu.f32 %p38, %f93, 0f407AD445; - selp.b32 %r115, %r114, 1065353216, %p38; - mov.b32 %r116, %f92; - and.b32 %r117, %r116, -2147483648; - or.b32 %r118, %r115, %r117; - mov.b32 %f629, %r118; - -BB1_59: - sub.f32 %f612, %f628, %f629; - fma.rn.f32 %f613, %f22, %f612, %f3; - add.s64 %rd4, %rd35, 16; - st.global.f32 [%rd35+12], %f613; - add.s32 %r120, %r12, 1; - setp.lt.s32 %p39, %r120, %r15; - mov.u64 %rd35, %rd4; - @%p39 bra BB1_35; - -BB1_60: - add.s32 %r119, %r119, 1; - setp.lt.s32 %p40, %r119, %r15; - @%p40 bra BB1_3; - -BB1_61: + or.b32 %r103, %r102, %r100; + mov.b32 %f241, %r103; + +$L__BB1_49: + sub.f32 %f232, %f240, %f241; + fma.rn.f32 %f233, %f19, %f232, %f3; + add.s32 %r104, %r108, %r12; + mul.wide.s32 %rd24, %r104, 4; + add.s64 %rd25, %rd1, %rd24; + st.global.f32 [%rd25], %f233; + +$L__BB1_50: + add.s32 %r105, %r105, 1; + setp.lt.s32 %p70, %r105, %r19; + @%p70 bra $L__BB1_29; + +$L__BB1_51: ret; + } +.func (.param .b64 func_retval0) __internal_accurate_pow( + .param .b64 __internal_accurate_pow_param_0 +) +{ + .reg .pred %p<10>; + .reg .f32 %f<3>; + .reg .b32 %r<53>; + .reg .f64 %fd<138>; + + + ld.param.f64 %fd12, [__internal_accurate_pow_param_0]; + { + .reg .b32 %temp; + mov.b64 {%temp, %r50}, %fd12; + } + { + .reg .b32 %temp; + mov.b64 {%r49, %temp}, %fd12; + } + shr.u32 %r51, %r50, 20; + setp.ne.s32 %p1, %r51, 0; + @%p1 bra $L__BB2_2; + + mul.f64 %fd13, %fd12, 0d4350000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r50}, %fd13; + } + { + .reg .b32 %temp; + mov.b64 {%r49, %temp}, %fd13; + } + shr.u32 %r16, %r50, 20; + add.s32 %r51, %r16, -54; + +$L__BB2_2: + add.s32 %r52, %r51, -1023; + and.b32 %r17, %r50, -2146435073; + or.b32 %r18, %r17, 1072693248; + mov.b64 %fd135, {%r49, %r18}; + setp.lt.u32 %p2, %r18, 1073127583; + @%p2 bra $L__BB2_4; + + { + .reg .b32 %temp; + mov.b64 {%r19, %temp}, %fd135; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r20}, %fd135; + } + add.s32 %r21, %r20, -1048576; + mov.b64 %fd135, {%r19, %r21}; + add.s32 %r52, %r51, -1022; + +$L__BB2_4: + add.f64 %fd14, %fd135, 0d3FF0000000000000; + mov.f64 %fd15, 0d3FF0000000000000; + rcp.approx.ftz.f64 %fd16, %fd14; + neg.f64 %fd17, %fd14; + fma.rn.f64 %fd18, %fd17, %fd16, %fd15; + fma.rn.f64 %fd19, %fd18, %fd18, %fd18; + fma.rn.f64 %fd20, %fd19, %fd16, %fd16; + add.f64 %fd21, %fd135, 0dBFF0000000000000; + mul.f64 %fd22, %fd21, %fd20; + fma.rn.f64 %fd23, %fd21, %fd20, %fd22; + mul.f64 %fd24, %fd23, %fd23; + mov.f64 %fd25, 0d3ED0F5D241AD3B5A; + mov.f64 %fd26, 0d3EB0F5FF7D2CAFE2; + fma.rn.f64 %fd27, %fd26, %fd24, %fd25; + mov.f64 %fd28, 0d3EF3B20A75488A3F; + fma.rn.f64 %fd29, %fd27, %fd24, %fd28; + mov.f64 %fd30, 0d3F1745CDE4FAECD5; + fma.rn.f64 %fd31, %fd29, %fd24, %fd30; + mov.f64 %fd32, 0d3F3C71C7258A578B; + fma.rn.f64 %fd33, %fd31, %fd24, %fd32; + mov.f64 %fd34, 0d3F6249249242B910; + fma.rn.f64 %fd35, %fd33, %fd24, %fd34; + mov.f64 %fd36, 0d3F89999999999DFB; + fma.rn.f64 %fd37, %fd35, %fd24, %fd36; + sub.f64 %fd38, %fd21, %fd23; + add.f64 %fd39, %fd38, %fd38; + mov.f64 %fd40, 0d4000000000000000; + neg.f64 %fd41, %fd23; + fma.rn.f64 %fd42, %fd41, %fd21, %fd39; + mul.f64 %fd43, %fd20, %fd42; + fma.rn.f64 %fd44, %fd24, %fd37, 0d3FB5555555555555; + mov.f64 %fd45, 0d3FB5555555555555; + sub.f64 %fd46, %fd45, %fd44; + fma.rn.f64 %fd47, %fd24, %fd37, %fd46; + add.f64 %fd48, %fd47, 0d0000000000000000; + add.f64 %fd49, %fd48, 0dBC46A4CB00B9E7B0; + add.f64 %fd50, %fd44, %fd49; + sub.f64 %fd51, %fd44, %fd50; + add.f64 %fd52, %fd49, %fd51; + mul.rn.f64 %fd53, %fd23, %fd23; + neg.f64 %fd54, %fd53; + fma.rn.f64 %fd55, %fd23, %fd23, %fd54; + { + .reg .b32 %temp; + mov.b64 {%r22, %temp}, %fd43; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r23}, %fd43; + } + add.s32 %r24, %r23, 1048576; + mov.b64 %fd56, {%r22, %r24}; + fma.rn.f64 %fd57, %fd23, %fd56, %fd55; + mul.rn.f64 %fd58, %fd53, %fd23; + neg.f64 %fd59, %fd58; + fma.rn.f64 %fd60, %fd53, %fd23, %fd59; + fma.rn.f64 %fd61, %fd53, %fd43, %fd60; + fma.rn.f64 %fd62, %fd57, %fd23, %fd61; + mul.rn.f64 %fd63, %fd50, %fd58; + neg.f64 %fd64, %fd63; + fma.rn.f64 %fd65, %fd50, %fd58, %fd64; + fma.rn.f64 %fd66, %fd50, %fd62, %fd65; + fma.rn.f64 %fd67, %fd52, %fd58, %fd66; + add.f64 %fd68, %fd63, %fd67; + sub.f64 %fd69, %fd63, %fd68; + add.f64 %fd70, %fd67, %fd69; + add.f64 %fd71, %fd23, %fd68; + sub.f64 %fd72, %fd23, %fd71; + add.f64 %fd73, %fd68, %fd72; + add.f64 %fd74, %fd70, %fd73; + add.f64 %fd75, %fd43, %fd74; + add.f64 %fd76, %fd71, %fd75; + sub.f64 %fd77, %fd71, %fd76; + add.f64 %fd78, %fd75, %fd77; + xor.b32 %r25, %r52, -2147483648; + mov.u32 %r26, -2147483648; + mov.u32 %r27, 1127219200; + mov.b64 %fd79, {%r25, %r27}; + mov.b64 %fd80, {%r26, %r27}; + sub.f64 %fd81, %fd79, %fd80; + mov.f64 %fd82, 0d3FE62E42FEFA39EF; + fma.rn.f64 %fd83, %fd81, %fd82, %fd76; + neg.f64 %fd84, %fd81; + fma.rn.f64 %fd85, %fd84, %fd82, %fd83; + sub.f64 %fd86, %fd85, %fd76; + sub.f64 %fd87, %fd78, %fd86; + mov.f64 %fd88, 0d3C7ABC9E3B39803F; + fma.rn.f64 %fd89, %fd81, %fd88, %fd87; + add.f64 %fd90, %fd83, %fd89; + sub.f64 %fd91, %fd83, %fd90; + add.f64 %fd92, %fd89, %fd91; + { + .reg .b32 %temp; + mov.b64 {%temp, %r28}, %fd40; + } + shl.b32 %r29, %r28, 1; + setp.gt.u32 %p3, %r29, -33554433; + and.b32 %r30, %r28, -15728641; + selp.b32 %r31, %r30, %r28, %p3; + { + .reg .b32 %temp; + mov.b64 {%r32, %temp}, %fd40; + } + mov.b64 %fd93, {%r32, %r31}; + mul.rn.f64 %fd94, %fd90, %fd93; + neg.f64 %fd95, %fd94; + fma.rn.f64 %fd96, %fd90, %fd93, %fd95; + fma.rn.f64 %fd97, %fd92, %fd93, %fd96; + add.f64 %fd4, %fd94, %fd97; + sub.f64 %fd98, %fd94, %fd4; + add.f64 %fd5, %fd97, %fd98; + mov.f64 %fd99, 0d4338000000000000; + mov.f64 %fd100, 0d3FF71547652B82FE; + fma.rn.f64 %fd101, %fd4, %fd100, %fd99; + { + .reg .b32 %temp; + mov.b64 {%r13, %temp}, %fd101; + } + mov.f64 %fd102, 0dC338000000000000; + add.rn.f64 %fd103, %fd101, %fd102; + mov.f64 %fd104, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd105, %fd103, %fd104, %fd4; + mov.f64 %fd106, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd107, %fd103, %fd106, %fd105; + mov.f64 %fd108, 0d3E928AF3FCA213EA; + mov.f64 %fd109, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd110, %fd109, %fd107, %fd108; + mov.f64 %fd111, 0d3EC71DEE62401315; + fma.rn.f64 %fd112, %fd110, %fd107, %fd111; + mov.f64 %fd113, 0d3EFA01997C89EB71; + fma.rn.f64 %fd114, %fd112, %fd107, %fd113; + mov.f64 %fd115, 0d3F2A01A014761F65; + fma.rn.f64 %fd116, %fd114, %fd107, %fd115; + mov.f64 %fd117, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd118, %fd116, %fd107, %fd117; + mov.f64 %fd119, 0d3F81111111122322; + fma.rn.f64 %fd120, %fd118, %fd107, %fd119; + mov.f64 %fd121, 0d3FA55555555502A1; + fma.rn.f64 %fd122, %fd120, %fd107, %fd121; + mov.f64 %fd123, 0d3FC5555555555511; + fma.rn.f64 %fd124, %fd122, %fd107, %fd123; + mov.f64 %fd125, 0d3FE000000000000B; + fma.rn.f64 %fd126, %fd124, %fd107, %fd125; + fma.rn.f64 %fd127, %fd126, %fd107, %fd15; + fma.rn.f64 %fd128, %fd127, %fd107, %fd15; + { + .reg .b32 %temp; + mov.b64 {%r14, %temp}, %fd128; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r15}, %fd128; + } + shl.b32 %r33, %r13, 20; + add.s32 %r34, %r15, %r33; + mov.b64 %fd136, {%r14, %r34}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r35}, %fd4; + } + mov.b32 %f2, %r35; + abs.f32 %f1, %f2; + setp.lt.f32 %p4, %f1, 0f4086232B; + @%p4 bra $L__BB2_7; + + setp.lt.f64 %p5, %fd4, 0d0000000000000000; + add.f64 %fd129, %fd4, 0d7FF0000000000000; + selp.f64 %fd136, 0d0000000000000000, %fd129, %p5; + setp.geu.f32 %p6, %f1, 0f40874800; + @%p6 bra $L__BB2_7; + + mov.f64 %fd134, 0d4338000000000000; + mov.f64 %fd133, 0d3FF71547652B82FE; + fma.rn.f64 %fd132, %fd4, %fd133, %fd134; + { + .reg .b32 %temp; + mov.b64 {%r48, %temp}, %fd132; + } + shr.u32 %r36, %r48, 31; + add.s32 %r37, %r48, %r36; + shr.s32 %r38, %r37, 1; + shl.b32 %r39, %r38, 20; + add.s32 %r40, %r15, %r39; + mov.b64 %fd130, {%r14, %r40}; + sub.s32 %r41, %r48, %r38; + shl.b32 %r42, %r41, 20; + add.s32 %r43, %r42, 1072693248; + mov.u32 %r44, 0; + mov.b64 %fd131, {%r44, %r43}; + mul.f64 %fd136, %fd130, %fd131; + +$L__BB2_7: + { + .reg .b32 %temp; + mov.b64 {%temp, %r45}, %fd136; + } + and.b32 %r46, %r45, 2147483647; + setp.eq.s32 %p7, %r46, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r47, %temp}, %fd136; + } + setp.eq.s32 %p8, %r47, 0; + and.pred %p9, %p8, %p7; + @%p9 bra $L__BB2_9; + fma.rn.f64 %fd136, %fd136, %fd5, %fd136; + +$L__BB2_9: + st.param.f64 [func_retval0+0], %fd136; + ret; + +} diff --git a/MATLAB/ptx/smi_cuda_gaussMLEv2.ptx b/MATLAB/ptx/smi_cuda_gaussMLEv2.ptx index 2539fcf8..acf528f7 100644 --- a/MATLAB/ptx/smi_cuda_gaussMLEv2.ptx +++ b/MATLAB/ptx/smi_cuda_gaussMLEv2.ptx @@ -1,16 +1,22 @@ // // Generated by NVIDIA NVVM Compiler // -// Compiler Build ID: CL-26907403 -// Cuda compilation tools, release 10.1, V10.1.243 -// Based on LLVM 3.4svn +// Compiler Build ID: CL-30672275 +// Cuda compilation tools, release 11.5, V11.5.119 +// Based on NVVM 7.0.1 // -.version 6.4 -.target sm_30 +.version 7.5 +.target sm_52 .address_size 64 // .globl _Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i +.func (.param .b64 func_retval0) __internal_accurate_pow +( + .param .b64 __internal_accurate_pow_param_0, + .param .b64 __internal_accurate_pow_param_1 +) +; .global .align 1 .b8 d_assert[1024]; .visible .entry _Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i( @@ -24,2691 +30,3605 @@ .param .u32 _Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_7 ) { - .local .align 16 .b8 __local_depot0[64]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<229>; - .reg .f32 %f<2167>; - .reg .b32 %r<267>; - .reg .b64 %rd<85>; - - - mov.u64 %SPL, __local_depot0; - ld.param.u64 %rd24, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_0]; - ld.param.f32 %f394, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_1]; - ld.param.u32 %r53, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_2]; - ld.param.u32 %r54, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_3]; - ld.param.u32 %r55, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_7]; - cvta.to.global.u64 %rd1, %rd24; - add.u64 %rd2, %SPL, 0; - mov.u32 %r56, %ntid.x; - mov.u32 %r57, %ctaid.x; - mov.u32 %r58, %tid.x; - mad.lo.s32 %r1, %r56, %r57, %r58; - setp.ge.s32 %p13, %r1, %r55; - @%p13 bra BB0_151; - - mov.u32 %r59, 0; - st.local.v4.u32 [%rd2], {%r59, %r59, %r59, %r59}; - st.local.v4.u32 [%rd2+16], {%r59, %r59, %r59, %r59}; - st.local.v4.u32 [%rd2+32], {%r59, %r59, %r59, %r59}; - st.local.v4.u32 [%rd2+48], {%r59, %r59, %r59, %r59}; - mul.lo.s32 %r60, %r53, %r53; - mul.lo.s32 %r2, %r60, %r1; - mov.f32 %f397, 0f00000000; - setp.lt.s32 %p14, %r53, 1; - mov.f32 %f2056, %f397; - mov.f32 %f2057, %f397; - mov.f32 %f2058, %f397; - @%p14 bra BB0_16; - - and.b32 %r3, %r53, 3; - shl.b32 %r4, %r53, 2; - mov.f32 %f400, 0f00000000; - mov.u32 %r61, 0; - mov.u32 %r246, %r61; - mov.f32 %f2056, %f400; - mov.f32 %f2057, %f400; - mov.f32 %f2058, %f400; - -BB0_3: - cvt.rn.f32.s32 %f4, %r246; - setp.eq.s32 %p15, %r3, 0; - @%p15 bra BB0_4; - - setp.eq.s32 %p16, %r3, 1; - @%p16 bra BB0_6; - bra.uni BB0_7; - -BB0_6: - mov.u32 %r248, %r61; - bra.uni BB0_11; - -BB0_4: - mov.u32 %r251, %r61; - mov.f32 %f2047, %f2056; - mov.f32 %f2048, %f2057; - mov.f32 %f2049, %f2058; - mov.f32 %f2056, %f400; - mov.f32 %f2057, %f400; - mov.f32 %f2058, %f400; - bra.uni BB0_12; - -BB0_7: - setp.eq.s32 %p17, %r3, 2; - @%p17 bra BB0_8; - bra.uni BB0_9; - -BB0_8: - mov.u32 %r247, %r61; - bra.uni BB0_10; - -BB0_9: - add.s32 %r66, %r246, %r2; - mul.wide.s32 %rd31, %r66, 4; - add.s64 %rd32, %rd1, %rd31; - ld.global.f32 %f404, [%rd32]; - fma.rn.f32 %f2058, %f4, %f404, %f2058; - fma.rn.f32 %f2057, %f404, 0f00000000, %f2057; - add.f32 %f2056, %f2056, %f404; - mov.u32 %r247, 1; - -BB0_10: - neg.s32 %r67, %r247; - and.b32 %r68, %r67, %r53; - add.s32 %r69, %r68, %r246; - add.s32 %r70, %r69, %r2; - mul.wide.s32 %rd33, %r70, 4; - add.s64 %rd34, %rd1, %rd33; - ld.global.f32 %f405, [%rd34]; - fma.rn.f32 %f2058, %f4, %f405, %f2058; - cvt.rn.f32.s32 %f406, %r247; - fma.rn.f32 %f2057, %f406, %f405, %f2057; - add.f32 %f2056, %f2056, %f405; - add.s32 %r248, %r247, 1; - -BB0_11: - mad.lo.s32 %r71, %r248, %r53, %r246; - add.s32 %r72, %r71, %r2; - mul.wide.s32 %rd35, %r72, 4; - add.s64 %rd36, %rd1, %rd35; - ld.global.f32 %f407, [%rd36]; - fma.rn.f32 %f2049, %f4, %f407, %f2058; - cvt.rn.f32.s32 %f408, %r248; - fma.rn.f32 %f2048, %f408, %f407, %f2057; - add.f32 %f2047, %f2056, %f407; - add.s32 %r251, %r248, 1; - mov.f32 %f2056, %f2047; - mov.f32 %f2057, %f2048; - mov.f32 %f2058, %f2049; - -BB0_12: - setp.lt.u32 %p18, %r53, 4; - @%p18 bra BB0_15; - - mad.lo.s32 %r250, %r53, %r251, %r246; - mov.f32 %f2056, %f2047; - mov.f32 %f2057, %f2048; - mov.f32 %f2058, %f2049; - -BB0_14: - add.s32 %r73, %r250, %r2; - mul.wide.s32 %rd37, %r73, 4; - add.s64 %rd38, %rd1, %rd37; - ld.global.f32 %f409, [%rd38]; - fma.rn.f32 %f410, %f4, %f409, %f2058; - cvt.rn.f32.s32 %f411, %r251; - fma.rn.f32 %f412, %f411, %f409, %f2057; - add.f32 %f413, %f2056, %f409; - cvt.s64.s32 %rd39, %r4; - add.s64 %rd40, %rd38, %rd39; - ld.global.f32 %f414, [%rd40]; - fma.rn.f32 %f415, %f4, %f414, %f410; - add.s32 %r74, %r251, 1; - cvt.rn.f32.s32 %f416, %r74; - fma.rn.f32 %f417, %f416, %f414, %f412; - add.f32 %f418, %f413, %f414; - add.s64 %rd41, %rd40, %rd39; - ld.global.f32 %f419, [%rd41]; - fma.rn.f32 %f420, %f4, %f419, %f415; - add.s32 %r75, %r251, 2; - cvt.rn.f32.s32 %f421, %r75; - fma.rn.f32 %f422, %f421, %f419, %f417; - add.f32 %f423, %f418, %f419; - add.s64 %rd42, %rd41, %rd39; - ld.global.f32 %f424, [%rd42]; - fma.rn.f32 %f2058, %f4, %f424, %f420; - add.s32 %r76, %r251, 3; - cvt.rn.f32.s32 %f425, %r76; - fma.rn.f32 %f2057, %f425, %f424, %f422; - add.f32 %f2056, %f423, %f424; - add.s32 %r250, %r250, %r4; - add.s32 %r251, %r251, 4; - setp.lt.s32 %p19, %r251, %r53; - @%p19 bra BB0_14; - -BB0_15: - add.s32 %r246, %r246, 1; - setp.lt.s32 %p20, %r246, %r53; - @%p20 bra BB0_3; - -BB0_16: - div.rn.f32 %f2133, %f2058, %f2056; - div.rn.f32 %f2132, %f2057, %f2056; - mov.f32 %f428, 0f3F000000; - div.rn.f32 %f429, %f428, %f394; - div.rn.f32 %f40, %f429, %f394; - mov.f32 %f2064, 0f51BA43B7; - mov.f32 %f2065, %f397; - @%p14 bra BB0_35; - - and.b32 %r18, %r53, 3; - mov.f32 %f2065, 0f00000000; - mov.u32 %r77, 0; - mov.f32 %f2064, 0f51BA43B7; - mov.u32 %r252, %r77; - -BB0_18: - mov.u32 %r253, %r77; - -BB0_19: - cvt.rn.f32.s32 %f434, %r253; - mul.f32 %f435, %f434, %f434; - mul.f32 %f45, %f40, %f435; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f433, 0f00000000; - mov.f32 %f2082, %f433; - mov.f32 %f2083, %f433; - mov.u32 %r254, %r77; - -BB0_20: - sub.s32 %r81, %r254, %r252; - cvt.rn.f32.s32 %f50, %r81; - mul.lo.s32 %r22, %r254, %r53; - setp.eq.s32 %p22, %r18, 0; - @%p22 bra BB0_21; - - setp.eq.s32 %p23, %r18, 1; - @%p23 bra BB0_25; - bra.uni BB0_23; - -BB0_25: - mul.f32 %f450, %f50, %f50; - mul.f32 %f2073, %f40, %f450; - neg.f32 %f451, %f2073; - mul.f32 %f452, %f2073, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f453, %f452; - mov.f32 %f454, 0fBF317200; - fma.rn.f32 %f455, %f453, %f454, %f451; - mov.f32 %f456, 0fB5BFBE8E; - fma.rn.f32 %f457, %f453, %f456, %f455; - mul.f32 %f458, %f457, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f459, %f458; - add.f32 %f460, %f453, 0f00000000; - ex2.approx.f32 %f461, %f460; - mul.f32 %f2072, %f459, %f461; - mov.u32 %r256, 0; - bra.uni BB0_28; - -BB0_21: - mov.f32 %f2076, %f2082; - mov.f32 %f2077, %f2083; - mov.u32 %r258, %r77; - mov.f32 %f2082, %f433; - mov.f32 %f2083, %f433; - bra.uni BB0_29; - -BB0_23: - setp.ne.s32 %p24, %r18, 2; - @%p24 bra BB0_26; - - mul.f32 %f438, %f50, %f50; - mul.f32 %f2073, %f40, %f438; - neg.f32 %f439, %f2073; - mul.f32 %f440, %f2073, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f441, %f440; - mov.f32 %f442, 0fBF317200; - fma.rn.f32 %f443, %f441, %f442, %f439; - mov.f32 %f444, 0fB5BFBE8E; - fma.rn.f32 %f445, %f441, %f444, %f443; - mul.f32 %f446, %f445, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f447, %f446; - add.f32 %f448, %f441, 0f00000000; - ex2.approx.f32 %f449, %f448; - mul.f32 %f2072, %f447, %f449; - mov.u32 %r255, 0; - bra.uni BB0_27; - -BB0_26: - setp.lt.f32 %p25, %f45, 0fC2D20000; - mul.f32 %f462, %f50, %f50; - mul.f32 %f2073, %f40, %f462; - neg.f32 %f463, %f2073; - mul.f32 %f464, %f2073, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f465, %f464; - mov.f32 %f466, 0fBF317200; - fma.rn.f32 %f467, %f465, %f466, %f463; - mov.f32 %f468, 0fB5BFBE8E; - fma.rn.f32 %f469, %f465, %f468, %f467; - mul.f32 %f470, %f469, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f471, %f470; - add.f32 %f472, %f465, 0f00000000; - ex2.approx.f32 %f473, %f472; - mul.f32 %f2072, %f471, %f473; - setp.gt.f32 %p26, %f2073, 0f42D20000; - selp.f32 %f474, 0f00000000, %f2072, %p26; - setp.lt.f32 %p27, %f2073, 0fC2D20000; - selp.f32 %f475, 0f7F800000, %f474, %p27; - cvt.rzi.f32.f32 %f476, %f47; - fma.rn.f32 %f477, %f476, %f466, %f46; - fma.rn.f32 %f478, %f476, %f468, %f477; - mul.f32 %f479, %f478, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f480, %f479; - add.f32 %f481, %f476, 0f00000000; - ex2.approx.f32 %f482, %f481; - mul.f32 %f483, %f480, %f482; - setp.gt.f32 %p28, %f45, 0f42D20000; - selp.f32 %f484, 0f00000000, %f483, %p28; - selp.f32 %f485, 0f7F800000, %f484, %p25; - mul.f32 %f486, %f475, %f485; - add.s32 %r85, %r22, %r2; - mul.wide.s32 %rd43, %r85, 4; - add.s64 %rd44, %rd1, %rd43; - ld.global.f32 %f487, [%rd44]; - fma.rn.f32 %f2083, %f487, %f486, %f2083; - add.f32 %f2082, %f2082, %f486; - mov.u32 %r255, 1; - -BB0_27: - sub.s32 %r86, %r253, %r255; - cvt.rn.f32.s32 %f488, %r86; - mul.f32 %f489, %f488, %f488; - setp.gt.f32 %p29, %f2073, 0f42D20000; - selp.f32 %f490, 0f00000000, %f2072, %p29; - setp.lt.f32 %p30, %f2073, 0fC2D20000; - selp.f32 %f491, 0f7F800000, %f490, %p30; - mul.f32 %f492, %f40, %f489; - neg.f32 %f493, %f492; - mul.f32 %f494, %f492, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f495, %f494; - mov.f32 %f496, 0fBF317200; - fma.rn.f32 %f497, %f495, %f496, %f493; - mov.f32 %f498, 0fB5BFBE8E; - fma.rn.f32 %f499, %f495, %f498, %f497; - mul.f32 %f500, %f499, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f501, %f500; - add.f32 %f502, %f495, 0f00000000; - ex2.approx.f32 %f503, %f502; - mul.f32 %f504, %f501, %f503; - setp.gt.f32 %p31, %f492, 0f42D20000; - selp.f32 %f505, 0f00000000, %f504, %p31; - setp.lt.f32 %p32, %f492, 0fC2D20000; - selp.f32 %f506, 0f7F800000, %f505, %p32; - mul.f32 %f507, %f491, %f506; - add.s32 %r87, %r255, %r22; - add.s32 %r88, %r87, %r2; - mul.wide.s32 %rd45, %r88, 4; - add.s64 %rd46, %rd1, %rd45; - ld.global.f32 %f508, [%rd46]; - fma.rn.f32 %f2083, %f508, %f507, %f2083; - add.f32 %f2082, %f2082, %f507; - add.s32 %r256, %r255, 1; - -BB0_28: - sub.s32 %r89, %r253, %r256; - cvt.rn.f32.s32 %f509, %r89; - mul.f32 %f510, %f509, %f509; - setp.gt.f32 %p33, %f2073, 0f42D20000; - selp.f32 %f511, 0f00000000, %f2072, %p33; - setp.lt.f32 %p34, %f2073, 0fC2D20000; - selp.f32 %f512, 0f7F800000, %f511, %p34; - mul.f32 %f513, %f40, %f510; - neg.f32 %f514, %f513; - mul.f32 %f515, %f513, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f516, %f515; - mov.f32 %f517, 0fBF317200; - fma.rn.f32 %f518, %f516, %f517, %f514; - mov.f32 %f519, 0fB5BFBE8E; - fma.rn.f32 %f520, %f516, %f519, %f518; - mul.f32 %f521, %f520, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f522, %f521; - add.f32 %f523, %f516, 0f00000000; - ex2.approx.f32 %f524, %f523; - mul.f32 %f525, %f522, %f524; - setp.gt.f32 %p35, %f513, 0f42D20000; - selp.f32 %f526, 0f00000000, %f525, %p35; - setp.lt.f32 %p36, %f513, 0fC2D20000; - selp.f32 %f527, 0f7F800000, %f526, %p36; - mul.f32 %f528, %f512, %f527; - add.s32 %r90, %r256, %r22; - add.s32 %r91, %r90, %r2; - mul.wide.s32 %rd47, %r91, 4; - add.s64 %rd48, %rd1, %rd47; - ld.global.f32 %f529, [%rd48]; - fma.rn.f32 %f2077, %f529, %f528, %f2083; - add.f32 %f2076, %f2082, %f528; - add.s32 %r258, %r256, 1; - mov.f32 %f2082, %f2076; - mov.f32 %f2083, %f2077; - -BB0_29: - setp.lt.u32 %p37, %r53, 4; - @%p37 bra BB0_32; - - mul.f32 %f530, %f50, %f50; - mul.f32 %f531, %f40, %f530; - neg.f32 %f532, %f531; - mul.f32 %f533, %f531, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f534, %f533; - mov.f32 %f535, 0fBF317200; - fma.rn.f32 %f536, %f534, %f535, %f532; - mov.f32 %f537, 0fB5BFBE8E; - fma.rn.f32 %f538, %f534, %f537, %f536; - mul.f32 %f539, %f538, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f540, %f539; - add.f32 %f541, %f534, 0f00000000; - ex2.approx.f32 %f542, %f541; - mul.f32 %f543, %f540, %f542; - setp.gt.f32 %p38, %f531, 0f42D20000; - selp.f32 %f544, 0f00000000, %f543, %p38; - setp.lt.f32 %p39, %f531, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f544, %p39; - mov.f32 %f2082, %f2076; - mov.f32 %f2083, %f2077; - -BB0_31: - sub.s32 %r92, %r253, %r258; - cvt.rn.f32.s32 %f545, %r92; - mul.f32 %f546, %f545, %f545; - mul.f32 %f547, %f40, %f546; - neg.f32 %f548, %f547; - mul.f32 %f549, %f547, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f550, %f549; - fma.rn.f32 %f552, %f550, %f535, %f548; - fma.rn.f32 %f554, %f550, %f537, %f552; - mul.f32 %f555, %f554, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f556, %f555; - add.f32 %f557, %f550, 0f00000000; - ex2.approx.f32 %f558, %f557; - mul.f32 %f559, %f556, %f558; - setp.gt.f32 %p40, %f547, 0f42D20000; - selp.f32 %f560, 0f00000000, %f559, %p40; - setp.lt.f32 %p41, %f547, 0fC2D20000; - selp.f32 %f561, 0f7F800000, %f560, %p41; - mul.f32 %f562, %f75, %f561; - add.s32 %r93, %r258, %r22; - add.s32 %r94, %r93, %r2; - mul.wide.s32 %rd49, %r94, 4; - add.s64 %rd50, %rd1, %rd49; - ld.global.f32 %f563, [%rd50]; - fma.rn.f32 %f564, %f563, %f562, %f2083; - add.f32 %f565, %f2082, %f562; - add.s32 %r95, %r258, 1; - sub.s32 %r96, %r253, %r95; - cvt.rn.f32.s32 %f566, %r96; - mul.f32 %f567, %f566, %f566; - mul.f32 %f568, %f40, %f567; - neg.f32 %f569, %f568; - mul.f32 %f570, %f568, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f571, %f570; - fma.rn.f32 %f572, %f571, %f535, %f569; - fma.rn.f32 %f573, %f571, %f537, %f572; - mul.f32 %f574, %f573, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f575, %f574; - add.f32 %f576, %f571, 0f00000000; - ex2.approx.f32 %f577, %f576; - mul.f32 %f578, %f575, %f577; - setp.gt.f32 %p42, %f568, 0f42D20000; - selp.f32 %f579, 0f00000000, %f578, %p42; - setp.lt.f32 %p43, %f568, 0fC2D20000; - selp.f32 %f580, 0f7F800000, %f579, %p43; - mul.f32 %f581, %f75, %f580; - ld.global.f32 %f582, [%rd50+4]; - fma.rn.f32 %f583, %f582, %f581, %f564; - add.f32 %f584, %f565, %f581; - add.s32 %r97, %r258, 2; - sub.s32 %r98, %r253, %r97; - cvt.rn.f32.s32 %f585, %r98; - mul.f32 %f586, %f585, %f585; - mul.f32 %f587, %f40, %f586; - neg.f32 %f588, %f587; - mul.f32 %f589, %f587, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f590, %f589; - fma.rn.f32 %f591, %f590, %f535, %f588; - fma.rn.f32 %f592, %f590, %f537, %f591; - mul.f32 %f593, %f592, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f594, %f593; - add.f32 %f595, %f590, 0f00000000; - ex2.approx.f32 %f596, %f595; - mul.f32 %f597, %f594, %f596; - setp.gt.f32 %p44, %f587, 0f42D20000; - selp.f32 %f598, 0f00000000, %f597, %p44; - setp.lt.f32 %p45, %f587, 0fC2D20000; - selp.f32 %f599, 0f7F800000, %f598, %p45; - mul.f32 %f600, %f75, %f599; - ld.global.f32 %f601, [%rd50+8]; - fma.rn.f32 %f602, %f601, %f600, %f583; - add.f32 %f603, %f584, %f600; - add.s32 %r99, %r258, 3; - sub.s32 %r100, %r253, %r99; - cvt.rn.f32.s32 %f604, %r100; - mul.f32 %f605, %f604, %f604; - mul.f32 %f606, %f40, %f605; - neg.f32 %f607, %f606; - mul.f32 %f608, %f606, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f609, %f608; - fma.rn.f32 %f610, %f609, %f535, %f607; - fma.rn.f32 %f611, %f609, %f537, %f610; - mul.f32 %f612, %f611, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f613, %f612; - add.f32 %f614, %f609, 0f00000000; - ex2.approx.f32 %f615, %f614; - mul.f32 %f616, %f613, %f615; - setp.gt.f32 %p46, %f606, 0f42D20000; - selp.f32 %f617, 0f00000000, %f616, %p46; - setp.lt.f32 %p47, %f606, 0fC2D20000; - selp.f32 %f618, 0f7F800000, %f617, %p47; - mul.f32 %f619, %f75, %f618; - ld.global.f32 %f620, [%rd50+12]; - fma.rn.f32 %f2083, %f620, %f619, %f602; - add.f32 %f2082, %f603, %f619; - add.s32 %r258, %r258, 4; - setp.lt.s32 %p48, %r258, %r53; - @%p48 bra BB0_31; - -BB0_32: - add.s32 %r254, %r254, 1; - setp.lt.s32 %p49, %r254, %r53; - @%p49 bra BB0_20; - - div.rn.f32 %f621, %f2083, %f2082; - max.f32 %f2065, %f2065, %f621; - min.f32 %f2064, %f2064, %f621; - add.s32 %r253, %r253, 1; - setp.lt.s32 %p50, %r253, %r53; - @%p50 bra BB0_19; - - add.s32 %r252, %r252, 1; - setp.lt.s32 %p51, %r252, %r53; - @%p51 bra BB0_18; - -BB0_35: - sub.f32 %f622, %f2065, %f2064; - add.f32 %f623, %f622, %f622; - fma.rn.f32 %f624, %f622, 0f40000000, %f623; - mul.f32 %f625, %f624, 0f40490FD8; - mul.f32 %f626, %f625, %f394; - mul.f32 %f627, %f626, %f394; - max.f32 %f2131, %f397, %f627; - setp.lt.s32 %p52, %r54, 1; - @%p52 bra BB0_89; - - mul.f32 %f629, %f394, %f394; - mul.f32 %f87, %f629, %f394; - mov.u32 %r259, 0; - -BB0_37: - mov.f32 %f2098, 0f00000000; - mov.f32 %f2099, %f2098; - mov.f32 %f2100, %f2098; - mov.f32 %f2101, %f2098; - mov.f32 %f2102, %f2098; - mov.f32 %f2103, %f2098; - mov.f32 %f2104, %f2098; - mov.f32 %f2105, %f2098; - @%p14 bra BB0_88; - - div.rn.f32 %f646, %f2131, 0fC0206C98; - div.rn.f32 %f92, %f646, %f394; - div.rn.f32 %f93, %f646, %f87; - mov.u32 %r260, 0; - mov.f32 %f2098, 0f00000000; - mov.f32 %f2099, %f2098; - mov.f32 %f2100, %f2098; - mov.f32 %f2101, %f2098; - mov.f32 %f2102, %f2098; - mov.f32 %f2103, %f2098; - mov.f32 %f2104, %f2098; - mov.f32 %f2105, %f2098; - -BB0_39: - mov.u32 %r261, 0; - cvt.rn.f32.s32 %f102, %r260; - sub.f32 %f647, %f102, %f2133; - add.f32 %f648, %f647, 0f3F800000; - sqrt.rn.f32 %f103, %f40; - mul.f32 %f104, %f648, %f103; - abs.f32 %f105, %f104; - mul.f32 %f106, %f104, %f104; - mul.f32 %f107, %f647, %f103; - abs.f32 %f108, %f107; - add.f32 %f649, %f102, 0f3F800000; - sub.f32 %f650, %f649, %f2133; - div.rn.f32 %f110, %f650, %f394; - mov.f32 %f651, 0f3F800000; - cvt.rzi.f32.f32 %f652, %f651; - add.f32 %f653, %f652, %f652; - mov.f32 %f654, 0f40000000; - sub.f32 %f655, %f654, %f653; - abs.f32 %f111, %f655; - setp.eq.f32 %p54, %f111, 0f3F800000; - abs.f32 %f112, %f110; - setp.lt.f32 %p55, %f112, 0f00800000; - mul.f32 %f656, %f112, 0f4B800000; - selp.f32 %f657, 0fC3170000, 0fC2FE0000, %p55; - selp.f32 %f658, %f656, %f112, %p55; - mov.b32 %r104, %f658; - and.b32 %r105, %r104, 8388607; - or.b32 %r106, %r105, 1065353216; - mov.b32 %f659, %r106; - shr.u32 %r107, %r104, 23; - cvt.rn.f32.u32 %f660, %r107; - add.f32 %f661, %f657, %f660; - setp.gt.f32 %p56, %f659, 0f3FB504F3; - mul.f32 %f662, %f659, 0f3F000000; - add.f32 %f663, %f661, 0f3F800000; - selp.f32 %f664, %f662, %f659, %p56; - selp.f32 %f665, %f663, %f661, %p56; - add.f32 %f113, %f664, 0fBF800000; - add.f32 %f114, %f664, 0f3F800000; - add.f32 %f115, %f113, %f113; - mov.f32 %f666, 0f3F317200; - mul.rn.f32 %f116, %f665, %f666; - mov.f32 %f667, 0f35BFBE8E; - mul.rn.f32 %f117, %f665, %f667; - setp.lt.f32 %p57, %f110, 0f00000000; - and.pred %p1, %p57, %p54; - div.rn.f32 %f118, %f647, %f394; - abs.f32 %f119, %f118; - setp.lt.f32 %p58, %f119, 0f00800000; - mul.f32 %f668, %f119, 0f4B800000; - selp.f32 %f669, 0fC3170000, 0fC2FE0000, %p58; - selp.f32 %f670, %f668, %f119, %p58; - mov.b32 %r108, %f670; - and.b32 %r109, %r108, 8388607; - or.b32 %r110, %r109, 1065353216; - mov.b32 %f671, %r110; - shr.u32 %r111, %r108, 23; - cvt.rn.f32.u32 %f672, %r111; - add.f32 %f673, %f669, %f672; - setp.gt.f32 %p59, %f671, 0f3FB504F3; - mul.f32 %f674, %f671, 0f3F000000; - add.f32 %f675, %f673, 0f3F800000; - selp.f32 %f676, %f674, %f671, %p59; - selp.f32 %f677, %f675, %f673, %p59; - add.f32 %f120, %f676, 0fBF800000; - add.f32 %f121, %f676, 0f3F800000; - add.f32 %f122, %f120, %f120; - mul.rn.f32 %f123, %f677, %f666; - mul.rn.f32 %f124, %f677, %f667; - setp.lt.f32 %p60, %f118, 0f00000000; - and.pred %p2, %p60, %p54; - -BB0_40: - setp.ltu.f32 %p61, %f105, 0f3F800000; - @%p61 bra BB0_42; - bra.uni BB0_41; - -BB0_42: - mov.f32 %f696, 0f3BA0C9F8; - mov.f32 %f697, 0fBA1268FB; - fma.rn.f32 %f698, %f697, %f106, %f696; - mov.f32 %f699, 0fBCDABFD4; - fma.rn.f32 %f700, %f698, %f106, %f699; - mov.f32 %f701, 0f3DE70331; - fma.rn.f32 %f702, %f700, %f106, %f701; - mov.f32 %f703, 0fBEC09330; - fma.rn.f32 %f704, %f702, %f106, %f703; - mov.f32 %f705, 0f3F906EBA; - fma.rn.f32 %f706, %f704, %f106, %f705; - mul.f32 %f2106, %f104, %f706; - bra.uni BB0_43; - -BB0_41: - mov.f32 %f1925, 0f3F800000; - setp.ltu.f32 %p62, %f105, 0f407AD445; - mov.f32 %f678, 0f3A03BB71; - mov.f32 %f679, 0fB7B730FB; - fma.rn.f32 %f680, %f679, %f105, %f678; - mov.f32 %f681, 0fBBACA3B3; - fma.rn.f32 %f682, %f680, %f105, %f681; - mov.f32 %f683, 0f3D0A7445; - fma.rn.f32 %f684, %f682, %f105, %f683; - mov.f32 %f685, 0fBE1B3B75; - fma.rn.f32 %f686, %f684, %f105, %f685; - mov.f32 %f687, 0fBF6B385A; - fma.rn.f32 %f688, %f686, %f105, %f687; - mov.f32 %f689, 0fBFD0316E; - fma.rn.f32 %f690, %f688, %f105, %f689; - mov.f32 %f691, 0fBA031CCE; - fma.rn.f32 %f692, %f690, %f105, %f691; - ex2.approx.ftz.f32 %f693, %f692; - sub.f32 %f695, %f1925, %f693; - mov.b32 %r112, %f695; - selp.b32 %r113, %r112, 1065353216, %p62; - mov.b32 %r114, %f104; - and.b32 %r115, %r114, -2147483648; - or.b32 %r116, %r113, %r115; - mov.b32 %f2106, %r116; - -BB0_43: - setp.ltu.f32 %p63, %f108, 0f3F800000; - @%p63 bra BB0_45; - bra.uni BB0_44; - -BB0_45: - cvt.rn.f32.s32 %f1960, %r260; - sub.f32 %f1959, %f1960, %f2133; - mul.f32 %f1958, %f1959, %f103; - mul.f32 %f1957, %f1958, %f1958; - mov.f32 %f725, 0f3BA0C9F8; - mov.f32 %f726, 0fBA1268FB; - fma.rn.f32 %f727, %f726, %f1957, %f725; - mov.f32 %f728, 0fBCDABFD4; - fma.rn.f32 %f729, %f727, %f1957, %f728; - mov.f32 %f730, 0f3DE70331; - fma.rn.f32 %f731, %f729, %f1957, %f730; - mov.f32 %f732, 0fBEC09330; - fma.rn.f32 %f733, %f731, %f1957, %f732; - mov.f32 %f734, 0f3F906EBA; - fma.rn.f32 %f735, %f733, %f1957, %f734; - mul.f32 %f2107, %f1958, %f735; - bra.uni BB0_46; - -BB0_44: - cvt.rn.f32.s32 %f1968, %r260; - sub.f32 %f1967, %f1968, %f2133; - mul.f32 %f1966, %f1967, %f103; - mov.f32 %f1926, 0f3F800000; - setp.ltu.f32 %p64, %f108, 0f407AD445; - mov.f32 %f707, 0f3A03BB71; - mov.f32 %f708, 0fB7B730FB; - fma.rn.f32 %f709, %f708, %f108, %f707; - mov.f32 %f710, 0fBBACA3B3; - fma.rn.f32 %f711, %f709, %f108, %f710; - mov.f32 %f712, 0f3D0A7445; - fma.rn.f32 %f713, %f711, %f108, %f712; - mov.f32 %f714, 0fBE1B3B75; - fma.rn.f32 %f715, %f713, %f108, %f714; - mov.f32 %f716, 0fBF6B385A; - fma.rn.f32 %f717, %f715, %f108, %f716; - mov.f32 %f718, 0fBFD0316E; - fma.rn.f32 %f719, %f717, %f108, %f718; - mov.f32 %f720, 0fBA031CCE; - fma.rn.f32 %f721, %f719, %f108, %f720; - ex2.approx.ftz.f32 %f722, %f721; - sub.f32 %f724, %f1926, %f722; - mov.b32 %r117, %f724; - selp.b32 %r118, %r117, 1065353216, %p64; - mov.b32 %r119, %f1966; - and.b32 %r120, %r119, -2147483648; - or.b32 %r121, %r118, %r120; - mov.b32 %f2107, %r121; - -BB0_46: - sub.f32 %f736, %f2106, %f2107; - mul.f32 %f139, %f736, 0f3F000000; - cvt.rn.f32.s32 %f140, %r261; - sub.f32 %f141, %f140, %f2132; - add.f32 %f737, %f141, 0f3F800000; - mul.f32 %f142, %f737, %f103; + .reg .pred %p<382>; + .reg .f32 %f<1814>; + .reg .b32 %r<539>; + .reg .f64 %fd<372>; + .reg .b64 %rd<48>; + + + ld.param.u64 %rd9, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_0]; + ld.param.f32 %f317, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_1]; + ld.param.u32 %r86, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_2]; + ld.param.u32 %r88, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_7]; + cvta.to.global.u64 %rd1, %rd9; + mov.u32 %r89, %ntid.x; + mov.u32 %r90, %ctaid.x; + mov.u32 %r91, %tid.x; + mad.lo.s32 %r1, %r90, %r89, %r91; + setp.ge.s32 %p19, %r1, %r88; + @%p19 bra $L__BB0_261; + + mul.lo.s32 %r92, %r86, %r86; + mul.lo.s32 %r2, %r92, %r1; + setp.lt.s32 %p20, %r86, 1; + mov.f32 %f1709, 0f00000000; + mov.f32 %f1700, %f1709; + mov.f32 %f1701, %f1709; + mov.f32 %f1702, %f1709; + @%p20 bra $L__BB0_11; + + add.s32 %r3, %r86, -1; + and.b32 %r4, %r86, 3; + sub.s32 %r5, %r86, %r4; + shl.b32 %r6, %r86, 2; + mov.u32 %r93, 0; + setp.lt.u32 %p21, %r3, 3; + setp.eq.s32 %p23, %r4, 0; + setp.eq.s32 %p24, %r4, 1; + setp.eq.s32 %p25, %r4, 2; + cvt.s64.s32 %rd12, %r6; + mov.u32 %r524, %r93; + +$L__BB0_3: + cvt.rn.f32.s32 %f4, %r524; + mov.u32 %r527, %r93; + @%p21 bra $L__BB0_6; + + mov.u32 %r527, %r93; + mov.u32 %r526, %r5; + +$L__BB0_5: + mad.lo.s32 %r96, %r527, %r86, %r524; + add.s32 %r97, %r96, %r2; + mul.wide.s32 %rd10, %r97, 4; + add.s64 %rd11, %rd1, %rd10; + ld.global.f32 %f325, [%rd11]; + fma.rn.f32 %f326, %f325, %f4, %f1700; + cvt.rn.f32.s32 %f327, %r527; + fma.rn.f32 %f328, %f325, %f327, %f1701; + add.f32 %f329, %f1702, %f325; + add.s64 %rd13, %rd11, %rd12; + ld.global.f32 %f330, [%rd13]; + fma.rn.f32 %f331, %f330, %f4, %f326; + add.s32 %r98, %r527, 1; + cvt.rn.f32.s32 %f332, %r98; + fma.rn.f32 %f333, %f330, %f332, %f328; + add.f32 %f334, %f329, %f330; + add.s64 %rd14, %rd13, %rd12; + ld.global.f32 %f335, [%rd14]; + fma.rn.f32 %f336, %f335, %f4, %f331; + add.s32 %r99, %r527, 2; + cvt.rn.f32.s32 %f337, %r99; + fma.rn.f32 %f338, %f335, %f337, %f333; + add.f32 %f339, %f334, %f335; + add.s64 %rd15, %rd14, %rd12; + ld.global.f32 %f340, [%rd15]; + fma.rn.f32 %f1700, %f340, %f4, %f336; + add.s32 %r100, %r527, 3; + cvt.rn.f32.s32 %f341, %r100; + fma.rn.f32 %f1701, %f340, %f341, %f338; + add.f32 %f1702, %f339, %f340; + add.s32 %r527, %r527, 4; + add.s32 %r526, %r526, -4; + setp.ne.s32 %p22, %r526, 0; + @%p22 bra $L__BB0_5; + +$L__BB0_6: + @%p23 bra $L__BB0_10; + + mad.lo.s32 %r13, %r527, %r86, %r524; + add.s32 %r101, %r13, %r2; + mul.wide.s32 %rd16, %r101, 4; + add.s64 %rd17, %rd1, %rd16; + ld.global.f32 %f342, [%rd17]; + fma.rn.f32 %f1700, %f342, %f4, %f1700; + cvt.rn.f32.s32 %f343, %r527; + fma.rn.f32 %f1701, %f342, %f343, %f1701; + add.f32 %f1702, %f1702, %f342; + @%p24 bra $L__BB0_10; + + add.s32 %r14, %r13, %r86; + add.s32 %r102, %r14, %r2; + mul.wide.s32 %rd18, %r102, 4; + add.s64 %rd19, %rd1, %rd18; + ld.global.f32 %f344, [%rd19]; + fma.rn.f32 %f1700, %f344, %f4, %f1700; + add.s32 %r103, %r527, 1; + cvt.rn.f32.s32 %f345, %r103; + fma.rn.f32 %f1701, %f344, %f345, %f1701; + add.f32 %f1702, %f1702, %f344; + @%p25 bra $L__BB0_10; + + add.s32 %r104, %r527, 2; + add.s32 %r105, %r14, %r86; + add.s32 %r106, %r105, %r2; + mul.wide.s32 %rd20, %r106, 4; + add.s64 %rd21, %rd1, %rd20; + ld.global.f32 %f346, [%rd21]; + fma.rn.f32 %f1700, %f346, %f4, %f1700; + cvt.rn.f32.s32 %f347, %r104; + fma.rn.f32 %f1701, %f346, %f347, %f1701; + add.f32 %f1702, %f1702, %f346; + +$L__BB0_10: + add.s32 %r524, %r524, 1; + setp.lt.s32 %p26, %r524, %r86; + @%p26 bra $L__BB0_3; + +$L__BB0_11: + div.rn.f32 %f1763, %f1700, %f1702; + div.rn.f32 %f1762, %f1701, %f1702; + mov.f32 %f350, 0f3F000000; + div.rn.f32 %f351, %f350, %f317; + div.rn.f32 %f34, %f351, %f317; + mov.f32 %f1760, 0f51BA43B7; + @%p20 bra $L__BB0_51; + + cvt.f64.f32 %fd1, %f34; + mov.f64 %fd129, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd129; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p28, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p28; + mov.u32 %r107, 0; + or.b32 %r20, %r19, -2147483648; + mul.wide.s32 %rd22, %r2, 4; + add.s64 %rd2, %rd1, %rd22; + setp.eq.s32 %p30, %r17, 1062207488; + setp.lt.s32 %p31, %r16, 0; + setp.ne.s32 %p36, %r18, 1071644672; + setp.eq.s32 %p63, %r18, 2146435072; + mov.u32 %r528, %r107; + +$L__BB0_13: + mov.u32 %r529, %r107; + +$L__BB0_14: + mov.f32 %f1712, 0f00000000; + mov.f32 %f1713, %f1712; + mov.u32 %r530, %r107; + +$L__BB0_15: + mov.u32 %r508, 1; + sub.s32 %r531, %r508, %r529; + add.s32 %r532, %r529, -1; + sub.s32 %r26, %r530, %r528; + cvt.rn.f32.s32 %f356, %r26; + cvt.f64.f32 %fd2, %f356; + { + .reg .b32 %temp; + mov.b64 {%temp, %r27}, %fd2; + } + abs.f64 %fd130, %fd2; + { // callseq 0, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd130; + .param .b64 param1; + st.param.f64 [param1+0], %fd129; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 0 + setp.lt.s32 %p29, %r27, 0; + and.pred %p1, %p29, %p30; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r114}, %fd4; + } + and.b32 %r29, %r114, 2146435072; + setp.ne.s32 %p32, %r29, 2146435072; + setp.gtu.f64 %p33, %fd130, 0d7FF0000000000000; + setp.gt.f64 %p34, %fd130, 0d3FF0000000000000; + selp.b32 %r115, 2146435072, 0, %p34; + xor.b32 %r116, %r115, 2146435072; + selp.b32 %r117, %r116, %r115, %p31; + setp.eq.s32 %p35, %r26, -1; + selp.b32 %r30, 1072693248, %r117, %p35; + and.pred %p37, %p36, %p1; + selp.b32 %r32, %r20, %r19, %p37; + or.pred %p2, %p32, %p33; + mul.lo.s32 %r118, %r86, %r530; + mul.wide.s32 %rd23, %r118, 4; + add.s64 %rd47, %rd2, %rd23; + mov.u32 %r533, %r107; + +$L__BB0_16: + not.pred %p38, %p1; + mov.f64 %fd340, %fd3; + @%p38 bra $L__BB0_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r119}, %fd3; + } + xor.b32 %r120, %r119, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r121, %temp}, %fd3; + } + mov.b64 %fd340, {%r121, %r120}; + +$L__BB0_18: + sub.s32 %r509, %r530, %r528; + setp.eq.s32 %p39, %r509, 0; + @%p39 bra $L__BB0_22; + + sub.s32 %r511, %r530, %r528; + cvt.rn.f32.s32 %f1683, %r511; + cvt.f64.f32 %fd329, %f1683; + { + .reg .b32 %temp; + mov.b64 {%temp, %r510}, %fd329; + } + setp.gt.s32 %p40, %r510, -1; + @%p40 bra $L__BB0_23; + + cvt.rzi.f64.f64 %fd133, %fd129; + setp.eq.f64 %p41, %fd133, 0d4000000000000000; + @%p41 bra $L__BB0_23; + + mov.f64 %fd340, 0dFFF8000000000000; + bra.uni $L__BB0_23; + +$L__BB0_22: + sub.s32 %r523, %r530, %r528; + cvt.rn.f32.s32 %f1687, %r523; + cvt.f64.f32 %fd338, %f1687; + { + .reg .b32 %temp; + mov.b64 {%temp, %r522}, %fd338; + } + selp.b32 %r521, %r522, 0, %p30; + or.b32 %r520, %r521, 2146435072; + selp.b32 %r519, %r520, %r521, %p31; + mov.u32 %r122, 0; + mov.b64 %fd340, {%r122, %r519}; + +$L__BB0_23: + sub.s32 %r512, %r530, %r528; + cvt.rn.f32.s32 %f1684, %r512; + cvt.f64.f32 %fd331, %f1684; + add.f64 %fd330, %fd331, 0d4000000000000000; + selp.f64 %fd341, %fd340, %fd330, %p32; + @%p2 bra $L__BB0_28; + + { + .reg .b32 %temp; + mov.b64 {%r123, %temp}, %fd129; + } + setp.eq.s32 %p44, %r123, 0; + and.pred %p45, %p63, %p44; + @%p45 bra $L__BB0_27; + bra.uni $L__BB0_25; + +$L__BB0_27: + mov.u32 %r126, 0; + mov.b64 %fd341, {%r126, %r30}; + bra.uni $L__BB0_28; + +$L__BB0_25: + sub.s32 %r518, %r530, %r528; + cvt.rn.f32.s32 %f1686, %r518; + cvt.f64.f32 %fd337, %f1686; + { + .reg .b32 %temp; + mov.b64 {%temp, %r517}, %fd337; + } + and.b32 %r516, %r517, 2147483647; + sub.s32 %r513, %r530, %r528; + cvt.rn.f32.s32 %f1685, %r513; + cvt.f64.f32 %fd332, %f1685; + setp.ne.s32 %p46, %r516, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r124, %temp}, %fd332; + } + setp.ne.s32 %p47, %r124, 0; + or.pred %p48, %p46, %p47; + mov.f64 %fd341, %fd340; + @%p48 bra $L__BB0_28; + + mov.u32 %r125, 0; + mov.b64 %fd341, {%r125, %r32}; + +$L__BB0_28: + sub.s32 %r514, %r530, %r528; + setp.eq.s32 %p49, %r514, 1; + selp.f64 %fd136, 0d3FF0000000000000, %fd341, %p49; + mov.f64 %fd137, 0d3FF0000000000000; + mul.f64 %fd13, %fd136, %fd1; + neg.f64 %fd138, %fd13; + mov.f64 %fd139, 0d4338000000000000; + mov.f64 %fd140, 0d3FF71547652B82FE; + fma.rn.f64 %fd141, %fd138, %fd140, %fd139; + { + .reg .b32 %temp; + mov.b64 {%r36, %temp}, %fd141; + } + mov.f64 %fd142, 0dC338000000000000; + add.rn.f64 %fd143, %fd141, %fd142; + mov.f64 %fd144, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd145, %fd143, %fd144, %fd138; + mov.f64 %fd146, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd147, %fd143, %fd146, %fd145; + mov.f64 %fd148, 0d3E928AF3FCA213EA; + mov.f64 %fd149, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd150, %fd149, %fd147, %fd148; + mov.f64 %fd151, 0d3EC71DEE62401315; + fma.rn.f64 %fd152, %fd150, %fd147, %fd151; + mov.f64 %fd153, 0d3EFA01997C89EB71; + fma.rn.f64 %fd154, %fd152, %fd147, %fd153; + mov.f64 %fd155, 0d3F2A01A014761F65; + fma.rn.f64 %fd156, %fd154, %fd147, %fd155; + mov.f64 %fd157, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd158, %fd156, %fd147, %fd157; + mov.f64 %fd159, 0d3F81111111122322; + fma.rn.f64 %fd160, %fd158, %fd147, %fd159; + mov.f64 %fd161, 0d3FA55555555502A1; + fma.rn.f64 %fd162, %fd160, %fd147, %fd161; + mov.f64 %fd163, 0d3FC5555555555511; + fma.rn.f64 %fd164, %fd162, %fd147, %fd163; + mov.f64 %fd165, 0d3FE000000000000B; + fma.rn.f64 %fd166, %fd164, %fd147, %fd165; + fma.rn.f64 %fd167, %fd166, %fd147, %fd137; + fma.rn.f64 %fd168, %fd167, %fd147, %fd137; + { + .reg .b32 %temp; + mov.b64 {%r37, %temp}, %fd168; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r38}, %fd168; + } + shl.b32 %r127, %r36, 20; + add.s32 %r128, %r38, %r127; + mov.b64 %fd342, {%r37, %r128}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r129}, %fd138; + } + mov.b32 %f357, %r129; + abs.f32 %f43, %f357; + setp.lt.f32 %p50, %f43, 0f4086232B; + @%p50 bra $L__BB0_31; + + setp.gt.f64 %p51, %fd13, 0d8000000000000000; + mov.f64 %fd169, 0d7FF0000000000000; + sub.f64 %fd170, %fd169, %fd13; + selp.f64 %fd342, 0d0000000000000000, %fd170, %p51; + setp.geu.f32 %p52, %f43, 0f40874800; + @%p52 bra $L__BB0_31; + + mov.f64 %fd328, 0d4338000000000000; + mov.f64 %fd327, 0d3FF71547652B82FE; + neg.f64 %fd326, %fd13; + fma.rn.f64 %fd325, %fd326, %fd327, %fd328; + { + .reg .b32 %temp; + mov.b64 {%r505, %temp}, %fd325; + } + shr.u32 %r130, %r505, 31; + add.s32 %r131, %r505, %r130; + shr.s32 %r132, %r131, 1; + shl.b32 %r133, %r132, 20; + add.s32 %r134, %r38, %r133; + mov.b64 %fd171, {%r37, %r134}; + sub.s32 %r135, %r505, %r132; + shl.b32 %r136, %r135, 20; + add.s32 %r137, %r136, 1072693248; + mov.u32 %r138, 0; + mov.b64 %fd172, {%r138, %r137}; + mul.f64 %fd342, %fd171, %fd172; + +$L__BB0_31: + add.s32 %r139, %r532, 1; + cvt.rn.f32.s32 %f358, %r139; + cvt.f64.f32 %fd18, %f358; + { + .reg .b32 %temp; + mov.b64 {%temp, %r39}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 1, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd129; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd344, [retval0+0]; + } // callseq 1 + setp.lt.s32 %p53, %r39, 0; + and.pred %p3, %p53, %p30; + not.pred %p55, %p3; + @%p55 bra $L__BB0_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r140}, %fd344; + } + xor.b32 %r141, %r140, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r142, %temp}, %fd344; + } + mov.b64 %fd344, {%r142, %r141}; + +$L__BB0_33: + setp.eq.s32 %p56, %r531, 1; + @%p56 bra $L__BB0_37; + bra.uni $L__BB0_34; + +$L__BB0_37: + mov.u32 %r143, 0; + selp.b32 %r144, %r39, 0, %p30; + or.b32 %r145, %r144, 2146435072; + selp.b32 %r146, %r145, %r144, %p31; + mov.b64 %fd344, {%r143, %r146}; + bra.uni $L__BB0_38; + +$L__BB0_34: + setp.gt.s32 %p57, %r39, -1; + @%p57 bra $L__BB0_38; + + cvt.rzi.f64.f64 %fd175, %fd129; + setp.eq.f64 %p58, %fd175, 0d4000000000000000; + @%p58 bra $L__BB0_38; + + mov.f64 %fd344, 0dFFF8000000000000; + +$L__BB0_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r147}, %fd25; + } + and.b32 %r148, %r147, 2146435072; + setp.ne.s32 %p61, %r148, 2146435072; + mov.f64 %fd345, %fd344; + @%p61 bra $L__BB0_44; + + setp.gtu.f64 %p62, %fd19, 0d7FF0000000000000; + mov.f64 %fd345, %fd25; + @%p62 bra $L__BB0_44; + + { + .reg .b32 %temp; + mov.b64 {%r149, %temp}, %fd129; + } + setp.eq.s32 %p64, %r149, 0; + and.pred %p65, %p63, %p64; + @%p65 bra $L__BB0_43; + bra.uni $L__BB0_41; + +$L__BB0_43: + mov.u32 %r154, 0; + setp.gt.f64 %p72, %fd19, 0d3FF0000000000000; + selp.b32 %r155, 2146435072, 0, %p72; + xor.b32 %r156, %r155, 2146435072; + selp.b32 %r157, %r156, %r155, %p31; + setp.eq.s32 %p73, %r532, -2; + selp.b32 %r158, 1072693248, %r157, %p73; + mov.b64 %fd345, {%r154, %r158}; + bra.uni $L__BB0_44; + +$L__BB0_41: + { + .reg .b32 %temp; + mov.b64 {%r150, %temp}, %fd18; + } + and.b32 %r151, %r39, 2147483647; + setp.ne.s32 %p66, %r151, 2146435072; + setp.ne.s32 %p67, %r150, 0; + or.pred %p68, %p66, %p67; + mov.f64 %fd345, %fd344; + @%p68 bra $L__BB0_44; + + and.pred %p70, %p36, %p3; + selp.b32 %r152, %r20, %r19, %p70; + mov.u32 %r153, 0; + mov.b64 %fd345, {%r153, %r152}; + +$L__BB0_44: + mov.f64 %fd324, 0d3FF0000000000000; + mov.f64 %fd323, 0d3FE000000000000B; + mov.f64 %fd322, 0d3FC5555555555511; + mov.f64 %fd321, 0d3FA55555555502A1; + mov.f64 %fd320, 0d3F81111111122322; + mov.f64 %fd319, 0d3F56C16C1852B7AF; + mov.f64 %fd318, 0d3F2A01A014761F65; + mov.f64 %fd317, 0d3EFA01997C89EB71; + mov.f64 %fd316, 0d3EC71DEE62401315; + mov.f64 %fd315, 0d3E928AF3FCA213EA; + mov.f64 %fd314, 0d3E5ADE1569CE2BDF; + mov.f64 %fd313, 0dBC7ABC9E3B39803F; + mov.f64 %fd312, 0dBFE62E42FEFA39EF; + mov.f64 %fd311, 0dC338000000000000; + mov.f64 %fd310, 0d4338000000000000; + mov.f64 %fd309, 0d3FF71547652B82FE; + setp.eq.s32 %p74, %r532, 0; + selp.f64 %fd178, 0d3FF0000000000000, %fd345, %p74; + mul.f64 %fd29, %fd178, %fd1; + neg.f64 %fd180, %fd29; + fma.rn.f64 %fd183, %fd180, %fd309, %fd310; + { + .reg .b32 %temp; + mov.b64 {%r40, %temp}, %fd183; + } + add.rn.f64 %fd185, %fd183, %fd311; + fma.rn.f64 %fd187, %fd185, %fd312, %fd180; + fma.rn.f64 %fd189, %fd185, %fd313, %fd187; + fma.rn.f64 %fd192, %fd314, %fd189, %fd315; + fma.rn.f64 %fd194, %fd192, %fd189, %fd316; + fma.rn.f64 %fd196, %fd194, %fd189, %fd317; + fma.rn.f64 %fd198, %fd196, %fd189, %fd318; + fma.rn.f64 %fd200, %fd198, %fd189, %fd319; + fma.rn.f64 %fd202, %fd200, %fd189, %fd320; + fma.rn.f64 %fd204, %fd202, %fd189, %fd321; + fma.rn.f64 %fd206, %fd204, %fd189, %fd322; + fma.rn.f64 %fd208, %fd206, %fd189, %fd323; + fma.rn.f64 %fd209, %fd208, %fd189, %fd324; + fma.rn.f64 %fd210, %fd209, %fd189, %fd324; + { + .reg .b32 %temp; + mov.b64 {%r41, %temp}, %fd210; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r42}, %fd210; + } + shl.b32 %r159, %r40, 20; + add.s32 %r160, %r42, %r159; + mov.b64 %fd346, {%r41, %r160}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r161}, %fd180; + } + mov.b32 %f359, %r161; + abs.f32 %f44, %f359; + setp.lt.f32 %p75, %f44, 0f4086232B; + @%p75 bra $L__BB0_47; + + setp.gt.f64 %p76, %fd29, 0d8000000000000000; + mov.f64 %fd211, 0d7FF0000000000000; + sub.f64 %fd212, %fd211, %fd29; + selp.f64 %fd346, 0d0000000000000000, %fd212, %p76; + setp.geu.f32 %p77, %f44, 0f40874800; + @%p77 bra $L__BB0_47; + + mov.f64 %fd336, 0d4338000000000000; + mov.f64 %fd335, 0d3FF71547652B82FE; + neg.f64 %fd334, %fd29; + fma.rn.f64 %fd333, %fd334, %fd335, %fd336; + { + .reg .b32 %temp; + mov.b64 {%r515, %temp}, %fd333; + } + shr.u32 %r162, %r515, 31; + add.s32 %r163, %r515, %r162; + shr.s32 %r164, %r163, 1; + shl.b32 %r165, %r164, 20; + add.s32 %r166, %r42, %r165; + mov.b64 %fd213, {%r41, %r166}; + sub.s32 %r167, %r515, %r164; + shl.b32 %r168, %r167, 20; + add.s32 %r169, %r168, 1072693248; + mov.u32 %r170, 0; + mov.b64 %fd214, {%r170, %r169}; + mul.f64 %fd346, %fd213, %fd214; + +$L__BB0_47: + ld.global.f32 %f360, [%rd47]; + cvt.f64.f32 %fd215, %f360; + mul.f64 %fd216, %fd342, %fd346; + cvt.f64.f32 %fd217, %f1713; + fma.rn.f64 %fd218, %fd216, %fd215, %fd217; + cvt.rn.f32.f64 %f1713, %fd218; + cvt.f64.f32 %fd219, %f1712; + add.f64 %fd220, %fd216, %fd219; + cvt.rn.f32.f64 %f1712, %fd220; + add.s32 %r532, %r532, -1; + add.s32 %r531, %r531, 1; + add.s64 %rd47, %rd47, 4; + add.s32 %r533, %r533, 1; + setp.lt.s32 %p78, %r533, %r86; + @%p78 bra $L__BB0_16; + + add.s32 %r530, %r530, 1; + setp.lt.s32 %p79, %r530, %r86; + @%p79 bra $L__BB0_15; + + div.rn.f32 %f361, %f1713, %f1712; + max.f32 %f1709, %f1709, %f361; + min.f32 %f1760, %f1760, %f361; + add.s32 %r529, %r529, 1; + setp.lt.s32 %p80, %r529, %r86; + @%p80 bra $L__BB0_14; + + add.s32 %r528, %r528, 1; + setp.lt.s32 %p81, %r528, %r86; + @%p81 bra $L__BB0_13; + +$L__BB0_51: + ld.param.u32 %r506, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_3]; + mov.f32 %f1682, 0f00000000; + sub.f32 %f362, %f1709, %f1760; + add.f32 %f363, %f362, %f362; + fma.rn.f32 %f364, %f362, 0f40000000, %f363; + mul.f32 %f365, %f364, 0f40490FD8; + mul.f32 %f366, %f365, %f317; + mul.f32 %f367, %f366, %f317; + max.f32 %f1761, %f1682, %f367; + setp.lt.s32 %p82, %r506, 1; + @%p82 bra $L__BB0_205; + + cvt.f64.f32 %fd34, %f317; + add.f64 %fd35, %fd34, 0d4008000000000000; + mov.u32 %r171, 0; + mov.f64 %fd221, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r198}, %fd221; + } + setp.lt.s32 %p99, %r198, 0; + setp.eq.f32 %p103, %f317, 0fBF800000; + setp.gt.s32 %p104, %r198, -1; + mov.u32 %r534, %r171; + +$L__BB0_53: + mov.f32 %f1728, 0f00000000; + mov.f32 %f1729, %f1728; + mov.f32 %f1730, %f1728; + mov.f32 %f1731, %f1728; + mov.f32 %f1732, %f1728; + mov.f32 %f1733, %f1728; + mov.f32 %f1734, %f1728; + mov.f32 %f1735, %f1728; + @%p20 bra $L__BB0_204; + + mov.f32 %f1728, 0f00000000; + div.rn.f32 %f385, %f1761, 0fC0206C98; + div.rn.f32 %f56, %f385, %f317; + cvt.f64.f32 %fd36, %f385; + mov.u32 %r535, %r171; + +$L__BB0_55: + mov.f32 %f1627, 0f00000000; + cvt.rn.f32.s32 %f386, %r535; + sub.f32 %f387, %f386, %f1763; + add.f32 %f388, %f387, 0f3F000000; + sqrt.rn.f32 %f65, %f34; + mul.f32 %f389, %f65, %f388; + abs.f32 %f66, %f389; + setp.ge.f32 %p84, %f66, 0f3F8060FE; + mul.f32 %f390, %f389, %f389; + selp.f32 %f391, %f66, %f390, %p84; + selp.f32 %f392, 0f3789CA3C, 0f38B1E96A, %p84; + selp.f32 %f393, 0fB9F560B9, 0fBA574D20, %p84; + fma.rn.f32 %f394, %f392, %f391, %f393; + selp.f32 %f395, 0f3BAC840B, 0f3BAAD5EA, %p84; + fma.rn.f32 %f396, %f394, %f391, %f395; + selp.f32 %f397, 0fBD0C8162, 0fBCDC1BE7, %p84; + fma.rn.f32 %f398, %f396, %f391, %f397; + selp.f32 %f399, 0f3E1CF906, 0f3DE718AF, %p84; + fma.rn.f32 %f400, %f398, %f391, %f399; + selp.f32 %f401, 0f3F6A937E, 0fBEC093AC, %p84; + fma.rn.f32 %f402, %f400, %f391, %f401; + selp.f32 %f403, 0f3F20D842, 0f3E0375D3, %p84; + fma.rn.f32 %f404, %f402, %f391, %f403; + neg.f32 %f405, %f66; + selp.f32 %f406, %f405, %f389, %p84; + fma.rn.f32 %f67, %f404, %f406, %f406; + mov.b32 %r174, %f389; + and.b32 %r51, %r174, -2147483648; + add.f32 %f68, %f387, 0fBF000000; + mul.f32 %f407, %f65, %f68; + abs.f32 %f69, %f407; + setp.ge.f32 %p85, %f69, 0f3F8060FE; + mul.f32 %f408, %f407, %f407; + selp.f32 %f409, %f69, %f408, %p85; + selp.f32 %f410, 0f3789CA3C, 0f38B1E96A, %p85; + selp.f32 %f411, 0fB9F560B9, 0fBA574D20, %p85; + fma.rn.f32 %f412, %f410, %f409, %f411; + selp.f32 %f413, 0f3BAC840B, 0f3BAAD5EA, %p85; + fma.rn.f32 %f414, %f412, %f409, %f413; + selp.f32 %f415, 0fBD0C8162, 0fBCDC1BE7, %p85; + fma.rn.f32 %f416, %f414, %f409, %f415; + selp.f32 %f417, 0f3E1CF906, 0f3DE718AF, %p85; + fma.rn.f32 %f418, %f416, %f409, %f417; + selp.f32 %f419, 0f3F6A937E, 0fBEC093AC, %p85; + fma.rn.f32 %f420, %f418, %f409, %f419; + selp.f32 %f421, 0f3F20D842, 0f3E0375D3, %p85; + fma.rn.f32 %f422, %f420, %f409, %f421; + neg.f32 %f423, %f69; + selp.f32 %f424, %f423, %f407, %p85; + fma.rn.f32 %f70, %f422, %f424, %f424; + mov.b32 %r175, %f407; + and.b32 %r52, %r175, -2147483648; + add.f32 %f425, %f386, 0f3F000000; + sub.f32 %f71, %f425, %f1763; + div.rn.f32 %f72, %f71, %f317; + mov.f32 %f426, 0f3F800000; + cvt.rzi.f32.f32 %f427, %f426; + add.f32 %f428, %f427, %f427; + mov.f32 %f429, 0f40000000; + sub.f32 %f430, %f429, %f428; + abs.f32 %f73, %f430; + setp.eq.f32 %p86, %f73, 0f3F800000; + abs.f32 %f74, %f72; + setp.lt.f32 %p87, %f74, 0f00800000; + mul.f32 %f431, %f74, 0f4B800000; + selp.f32 %f432, %f431, %f74, %p87; + selp.f32 %f433, 0fC3170000, 0fC2FE0000, %p87; + mov.b32 %r176, %f432; + and.b32 %r177, %r176, 8388607; + or.b32 %r178, %r177, 1065353216; + mov.b32 %f434, %r178; + shr.u32 %r179, %r176, 23; + cvt.rn.f32.u32 %f435, %r179; + add.f32 %f436, %f433, %f435; + setp.gt.f32 %p88, %f434, 0f3FB504F3; + mul.f32 %f437, %f434, 0f3F000000; + add.f32 %f438, %f436, 0f3F800000; + selp.f32 %f439, %f438, %f436, %p88; + selp.f32 %f440, %f437, %f434, %p88; + add.f32 %f441, %f440, 0fBF800000; + add.f32 %f442, %f440, 0f3F800000; + rcp.approx.ftz.f32 %f443, %f442; + add.f32 %f444, %f441, %f441; + mul.f32 %f445, %f444, %f443; + mul.f32 %f446, %f445, %f445; + mov.f32 %f447, 0f3C4CAF63; + mov.f32 %f448, 0f3B18F0FE; + fma.rn.f32 %f449, %f448, %f446, %f447; + mov.f32 %f450, 0f3DAAAABD; + fma.rn.f32 %f451, %f449, %f446, %f450; + mul.rn.f32 %f452, %f451, %f446; + mul.rn.f32 %f453, %f452, %f445; + sub.f32 %f454, %f441, %f445; + add.f32 %f455, %f454, %f454; + neg.f32 %f456, %f445; + fma.rn.f32 %f457, %f456, %f441, %f455; + mul.rn.f32 %f458, %f443, %f457; + add.f32 %f459, %f453, %f445; + sub.f32 %f460, %f445, %f459; + add.f32 %f461, %f453, %f460; + add.f32 %f462, %f458, %f461; + add.f32 %f463, %f459, %f462; + sub.f32 %f464, %f459, %f463; + add.f32 %f465, %f462, %f464; + mov.f32 %f466, 0f3F317200; + mul.rn.f32 %f467, %f439, %f466; + mov.f32 %f468, 0f35BFBE8E; + mul.rn.f32 %f469, %f439, %f468; + add.f32 %f470, %f467, %f463; + sub.f32 %f471, %f467, %f470; + add.f32 %f472, %f463, %f471; + add.f32 %f473, %f465, %f472; + add.f32 %f474, %f469, %f473; + add.f32 %f475, %f470, %f474; + sub.f32 %f476, %f470, %f475; + add.f32 %f477, %f474, %f476; + mul.rn.f32 %f478, %f429, %f475; + neg.f32 %f479, %f478; + fma.rn.f32 %f480, %f429, %f475, %f479; + fma.rn.f32 %f481, %f429, %f477, %f480; + fma.rn.f32 %f483, %f1627, %f475, %f481; + add.rn.f32 %f484, %f478, %f483; + neg.f32 %f485, %f484; + add.rn.f32 %f486, %f478, %f485; + add.rn.f32 %f487, %f486, %f483; + mov.b32 %r180, %f484; + setp.eq.s32 %p89, %r180, 1118925336; + add.s32 %r181, %r180, -1; + mov.b32 %f488, %r181; + add.f32 %f489, %f487, 0f37000000; + selp.f32 %f75, %f489, %f487, %p89; + selp.f32 %f490, %f488, %f484, %p89; + mov.f32 %f491, 0f3FB8AA3B; + mul.rn.f32 %f492, %f490, %f491; + cvt.rzi.f32.f32 %f493, %f492; + abs.f32 %f494, %f493; + setp.gt.f32 %p90, %f494, 0f42FC0000; + mov.b32 %r182, %f493; + and.b32 %r183, %r182, -2147483648; + or.b32 %r184, %r183, 1123811328; + mov.b32 %f495, %r184; + selp.f32 %f496, %f495, %f493, %p90; + mov.f32 %f497, 0fBF317218; + fma.rn.f32 %f498, %f496, %f497, %f490; + mov.f32 %f499, 0f3102E308; + fma.rn.f32 %f500, %f496, %f499, %f498; + mul.f32 %f501, %f500, 0f3FB8AA3B; + add.f32 %f502, %f496, 0f4B40007F; + mov.b32 %r185, %f502; + shl.b32 %r186, %r185, 23; + mov.b32 %f503, %r186; + ex2.approx.ftz.f32 %f504, %f501; + mul.f32 %f76, %f504, %f503; + setp.lt.f32 %p91, %f72, 0f00000000; + and.pred %p4, %p91, %p86; + add.f32 %f505, %f72, %f72; + selp.f32 %f77, %f505, 0f00000000, %p86; + div.rn.f32 %f78, %f68, %f317; + abs.f32 %f79, %f78; + setp.lt.f32 %p92, %f79, 0f00800000; + mul.f32 %f507, %f79, 0f4B800000; + selp.f32 %f508, %f507, %f79, %p92; + selp.f32 %f509, 0fC3170000, 0fC2FE0000, %p92; + mov.b32 %r187, %f508; + and.b32 %r188, %r187, 8388607; + or.b32 %r189, %r188, 1065353216; + mov.b32 %f510, %r189; + shr.u32 %r190, %r187, 23; + cvt.rn.f32.u32 %f511, %r190; + add.f32 %f512, %f509, %f511; + setp.gt.f32 %p93, %f510, 0f3FB504F3; + mul.f32 %f513, %f510, 0f3F000000; + add.f32 %f514, %f512, 0f3F800000; + selp.f32 %f515, %f514, %f512, %p93; + selp.f32 %f516, %f513, %f510, %p93; + add.f32 %f517, %f516, 0fBF800000; + add.f32 %f518, %f516, 0f3F800000; + rcp.approx.ftz.f32 %f519, %f518; + add.f32 %f520, %f517, %f517; + mul.f32 %f521, %f520, %f519; + mul.f32 %f522, %f521, %f521; + fma.rn.f32 %f523, %f448, %f522, %f447; + fma.rn.f32 %f524, %f523, %f522, %f450; + mul.rn.f32 %f525, %f524, %f522; + mul.rn.f32 %f526, %f525, %f521; + sub.f32 %f527, %f517, %f521; + add.f32 %f528, %f527, %f527; + neg.f32 %f529, %f521; + fma.rn.f32 %f530, %f529, %f517, %f528; + mul.rn.f32 %f531, %f519, %f530; + add.f32 %f532, %f526, %f521; + sub.f32 %f533, %f521, %f532; + add.f32 %f534, %f526, %f533; + add.f32 %f535, %f531, %f534; + add.f32 %f536, %f532, %f535; + sub.f32 %f537, %f532, %f536; + add.f32 %f538, %f535, %f537; + mul.rn.f32 %f539, %f515, %f466; + mul.rn.f32 %f540, %f515, %f468; + add.f32 %f541, %f539, %f536; + sub.f32 %f542, %f539, %f541; + add.f32 %f543, %f536, %f542; + add.f32 %f544, %f538, %f543; + add.f32 %f545, %f540, %f544; + add.f32 %f546, %f541, %f545; + sub.f32 %f547, %f541, %f546; + add.f32 %f548, %f545, %f547; + mul.rn.f32 %f549, %f429, %f546; + neg.f32 %f550, %f549; + fma.rn.f32 %f551, %f429, %f546, %f550; + fma.rn.f32 %f552, %f429, %f548, %f551; + fma.rn.f32 %f553, %f1627, %f546, %f552; + add.rn.f32 %f554, %f549, %f553; + neg.f32 %f555, %f554; + add.rn.f32 %f556, %f549, %f555; + add.rn.f32 %f557, %f556, %f553; + mov.b32 %r191, %f554; + setp.eq.s32 %p94, %r191, 1118925336; + add.s32 %r192, %r191, -1; + mov.b32 %f558, %r192; + add.f32 %f559, %f557, 0f37000000; + selp.f32 %f80, %f559, %f557, %p94; + selp.f32 %f560, %f558, %f554, %p94; + mul.rn.f32 %f561, %f560, %f491; + cvt.rzi.f32.f32 %f562, %f561; + abs.f32 %f563, %f562; + setp.gt.f32 %p95, %f563, 0f42FC0000; + mov.b32 %r193, %f562; + and.b32 %r194, %r193, -2147483648; + or.b32 %r195, %r194, 1123811328; + mov.b32 %f564, %r195; + selp.f32 %f565, %f564, %f562, %p95; + fma.rn.f32 %f566, %f565, %f497, %f560; + fma.rn.f32 %f567, %f565, %f499, %f566; + mul.f32 %f568, %f567, 0f3FB8AA3B; + add.f32 %f569, %f565, 0f4B40007F; + mov.b32 %r196, %f569; + shl.b32 %r197, %r196, 23; + mov.b32 %f570, %r197; + ex2.approx.ftz.f32 %f571, %f568; + mul.f32 %f81, %f571, %f570; + setp.lt.f32 %p96, %f78, 0f00000000; + and.pred %p5, %p96, %p86; + add.f32 %f572, %f78, %f78; + selp.f32 %f84, %f572, 0f00000000, %p86; + and.b32 %r199, %r198, 2146435072; + setp.eq.s32 %p97, %r199, 1073741824; + abs.f64 %fd222, %fd34; + { // callseq 2, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd222; + .param .b64 param1; + st.param.f64 [param1+0], %fd221; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd37, [retval0+0]; + } // callseq 2 + { + .reg .b32 %temp; + mov.b64 {%temp, %r55}, %fd34; + } + setp.lt.s32 %p98, %r55, 0; + and.pred %p6, %p98, %p97; + { + .reg .b32 %temp; + mov.b64 {%temp, %r202}, %fd35; + } + and.b32 %r57, %r202, 2146435072; + setp.ne.s32 %p100, %r57, 2146435072; + setp.gtu.f64 %p101, %fd222, 0d7FF0000000000000; + and.b32 %r58, %r198, 2147483647; + selp.b32 %r206, 2146435072, 0, %p104; + setp.ne.s32 %p105, %r58, 1071644672; + and.pred %p106, %p105, %p6; + or.b32 %r207, %r206, -2147483648; + selp.b32 %r61, %r207, %r206, %p106; + mov.f64 %fd223, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r62}, %fd223; + } + and.b32 %r64, %r62, 2147483647; + setp.gt.s32 %p107, %r62, -1; + selp.b32 %r65, 2146435072, 0, %p107; + or.b32 %r66, %r65, -2147483648; + or.pred %p9, %p100, %p101; + mov.u32 %r536, %r171; + +$L__BB0_56: + cvt.rn.f32.s32 %f1632, %r535; + sub.f32 %f1631, %f1632, %f1763; + add.f32 %f1630, %f1631, 0f3F000000; + mul.f32 %f1629, %f65, %f1630; + abs.f32 %f1628, %f1629; + setp.ltu.f32 %p108, %f1628, 0f3F8060FE; + mov.f32 %f1736, %f67; + @%p108 bra $L__BB0_58; + + mov.f32 %f1674, 0f3F800000; + ex2.approx.ftz.f32 %f574, %f67; + sub.f32 %f576, %f1674, %f574; + mov.b32 %r209, %f576; + or.b32 %r210, %r51, %r209; + mov.b32 %f1736, %r210; + +$L__BB0_58: + cvt.rn.f32.s32 %f1637, %r535; + sub.f32 %f1636, %f1637, %f1763; + add.f32 %f1635, %f1636, 0fBF000000; + mul.f32 %f1634, %f65, %f1635; + abs.f32 %f1633, %f1634; + setp.ltu.f32 %p109, %f1633, 0f3F8060FE; + mov.f32 %f1737, %f70; + @%p109 bra $L__BB0_60; + + mov.f32 %f1673, 0f3F800000; + ex2.approx.ftz.f32 %f577, %f70; + sub.f32 %f579, %f1673, %f577; + mov.b32 %r211, %f579; + or.b32 %r212, %r52, %r211; + mov.b32 %f1737, %r212; + +$L__BB0_60: + sub.f32 %f580, %f1736, %f1737; + mul.f32 %f99, %f580, 0f3F000000; + cvt.rn.f32.s32 %f100, %r536; + sub.f32 %f101, %f100, %f1762; + add.f32 %f581, %f101, 0f3F000000; + mul.f32 %f102, %f65, %f581; + abs.f32 %f582, %f102; + setp.ltu.f32 %p110, %f582, 0f3F8060FE; + setp.ge.f32 %p111, %f582, 0f3F8060FE; + mul.f32 %f583, %f102, %f102; + selp.f32 %f584, %f582, %f583, %p111; + selp.f32 %f585, 0f3789CA3C, 0f38B1E96A, %p111; + selp.f32 %f586, 0fB9F560B9, 0fBA574D20, %p111; + fma.rn.f32 %f587, %f585, %f584, %f586; + selp.f32 %f588, 0f3BAC840B, 0f3BAAD5EA, %p111; + fma.rn.f32 %f589, %f587, %f584, %f588; + selp.f32 %f590, 0fBD0C8162, 0fBCDC1BE7, %p111; + fma.rn.f32 %f591, %f589, %f584, %f590; + selp.f32 %f592, 0f3E1CF906, 0f3DE718AF, %p111; + fma.rn.f32 %f593, %f591, %f584, %f592; + selp.f32 %f594, 0f3F6A937E, 0fBEC093AC, %p111; + fma.rn.f32 %f595, %f593, %f584, %f594; + selp.f32 %f596, 0f3F20D842, 0f3E0375D3, %p111; + fma.rn.f32 %f597, %f595, %f584, %f596; + neg.f32 %f598, %f582; + selp.f32 %f599, %f598, %f102, %p111; + fma.rn.f32 %f1738, %f597, %f599, %f599; + @%p110 bra $L__BB0_62; + + mov.f32 %f1672, 0f3F800000; + ex2.approx.ftz.f32 %f600, %f1738; + sub.f32 %f602, %f1672, %f600; + mov.b32 %r213, %f602; + mov.b32 %r214, %f102; + and.b32 %r215, %r214, -2147483648; + or.b32 %r216, %r215, %r213; + mov.b32 %f1738, %r216; + +$L__BB0_62: + cvt.rn.f32.s32 %f1639, %r536; + sub.f32 %f1638, %f1639, %f1762; + add.f32 %f106, %f1638, 0fBF000000; + mul.f32 %f107, %f65, %f106; + abs.f32 %f603, %f107; + setp.ltu.f32 %p112, %f603, 0f3F8060FE; + setp.ge.f32 %p113, %f603, 0f3F8060FE; + mul.f32 %f604, %f107, %f107; + selp.f32 %f605, %f603, %f604, %p113; + selp.f32 %f606, 0f3789CA3C, 0f38B1E96A, %p113; + selp.f32 %f607, 0fB9F560B9, 0fBA574D20, %p113; + fma.rn.f32 %f608, %f606, %f605, %f607; + selp.f32 %f609, 0f3BAC840B, 0f3BAAD5EA, %p113; + fma.rn.f32 %f610, %f608, %f605, %f609; + selp.f32 %f611, 0fBD0C8162, 0fBCDC1BE7, %p113; + fma.rn.f32 %f612, %f610, %f605, %f611; + selp.f32 %f613, 0f3E1CF906, 0f3DE718AF, %p113; + fma.rn.f32 %f614, %f612, %f605, %f613; + selp.f32 %f615, 0f3F6A937E, 0fBEC093AC, %p113; + fma.rn.f32 %f616, %f614, %f605, %f615; + selp.f32 %f617, 0f3F20D842, 0f3E0375D3, %p113; + fma.rn.f32 %f618, %f616, %f605, %f617; + neg.f32 %f619, %f603; + selp.f32 %f620, %f619, %f107, %p113; + fma.rn.f32 %f1739, %f618, %f620, %f620; + @%p112 bra $L__BB0_64; + + mov.f32 %f1671, 0f3F800000; + ex2.approx.ftz.f32 %f621, %f1739; + sub.f32 %f623, %f1671, %f621; + mov.b32 %r217, %f623; + mov.b32 %r218, %f107; + and.b32 %r219, %r218, -2147483648; + or.b32 %r220, %r219, %r217; + mov.b32 %f1739, %r220; + +$L__BB0_64: + sub.f32 %f625, %f1738, %f1739; + mul.f32 %f111, %f625, 0f3F000000; + mul.f32 %f626, %f99, %f1761; + fma.rn.f32 %f112, %f111, %f626, %f1760; + mad.lo.s32 %r221, %r536, %r86, %r535; + add.s32 %r222, %r221, %r2; + mul.wide.s32 %rd24, %r222, 4; + add.s64 %rd25, %rd1, %rd24; + ld.global.f32 %f113, [%rd25]; + setp.eq.f32 %p114, %f76, 0f7F800000; + mov.f32 %f1740, 0f7F800000; + @%p114 bra $L__BB0_66; + + fma.rn.f32 %f1740, %f76, %f75, %f76; + +$L__BB0_66: + setp.geu.f32 %p377, %f72, 0f00000000; + mov.b32 %r223, %f1740; + xor.b32 %r224, %r223, -2147483648; + mov.b32 %f627, %r224; + selp.f32 %f116, %f627, %f1740, %p4; + setp.eq.f32 %p115, %f72, 0f00000000; + selp.f32 %f1741, %f77, %f116, %p115; + @%p377 bra $L__BB0_69; + + mov.f32 %f1640, 0f40000000; + cvt.rzi.f32.f32 %f629, %f1640; + setp.eq.f32 %p116, %f629, 0f40000000; + mov.f32 %f1741, %f116; + @%p116 bra $L__BB0_69; + + mov.f32 %f1741, 0f7FFFFFFF; + +$L__BB0_69: + abs.f32 %f1645, %f72; + mov.f32 %f1644, 0f3FB8AA3B; + add.f32 %f1643, %f1645, 0f40000000; + mov.b32 %r479, %f1643; + selp.f32 %f1642, 0fFF800000, 0f7F800000, %p4; + add.f32 %f1641, %f72, 0f40000000; + setp.gtu.f32 %p117, %f1645, 0f7F800000; + mov.f32 %f1742, 0f7F800000; + selp.f32 %f632, %f1641, %f1741, %p117; + setp.neu.f32 %p118, %f1645, 0f7F800000; + selp.f32 %f633, %f632, %f1642, %p118; + setp.gt.s32 %p119, %r479, 2139095039; + selp.f32 %f634, %f633, %f1741, %p119; + mul.f32 %f635, %f634, 0fBF000000; + setp.eq.f32 %p120, %f72, 0f3F800000; + selp.f32 %f636, 0fBF000000, %f635, %p120; + mov.f32 %f638, 0f3BBB989D; + fma.rn.f32 %f639, %f636, %f638, %f350; + mov.f32 %f641, 0f437C0000; + cvt.sat.f32.f32 %f642, %f639; + mov.f32 %f643, 0f4B400001; + fma.rm.f32 %f644, %f642, %f641, %f643; + add.f32 %f645, %f644, 0fCB40007F; + neg.f32 %f646, %f645; + fma.rn.f32 %f647, %f636, %f1644, %f646; + mov.f32 %f648, 0f32A57060; + fma.rn.f32 %f649, %f636, %f648, %f647; + mov.b32 %r225, %f644; + shl.b32 %r226, %r225, 23; + mov.b32 %f650, %r226; + ex2.approx.ftz.f32 %f651, %f649; + mul.f32 %f119, %f651, %f650; + setp.eq.f32 %p121, %f81, 0f7F800000; + @%p121 bra $L__BB0_71; + + fma.rn.f32 %f1742, %f81, %f80, %f81; + +$L__BB0_71: + setp.geu.f32 %p378, %f78, 0f00000000; + mov.b32 %r227, %f1742; + xor.b32 %r228, %r227, -2147483648; + mov.b32 %f652, %r228; + selp.f32 %f122, %f652, %f1742, %p5; + setp.eq.f32 %p122, %f78, 0f00000000; + selp.f32 %f1743, %f84, %f122, %p122; + @%p378 bra $L__BB0_74; + + mov.f32 %f1646, 0f40000000; + cvt.rzi.f32.f32 %f654, %f1646; + setp.eq.f32 %p123, %f654, 0f40000000; + mov.f32 %f1743, %f122; + @%p123 bra $L__BB0_74; + + mov.f32 %f1743, 0f7FFFFFFF; + +$L__BB0_74: + abs.f32 %f1655, %f78; + mov.f32 %f1654, 0f32A57060; + mov.f32 %f1653, 0f4B400001; + mov.f32 %f1652, 0f437C0000; + mov.f32 %f1651, 0f3BBB989D; + add.f32 %f1650, %f1655, 0f40000000; + mov.b32 %r480, %f1650; + selp.f32 %f1649, 0fFF800000, 0f7F800000, %p5; + add.f32 %f1648, %f78, 0f40000000; + mov.f32 %f1647, 0f3FB8AA3B; + setp.gtu.f32 %p124, %f1655, 0f7F800000; + selp.f32 %f656, %f1648, %f1743, %p124; + setp.neu.f32 %p125, %f1655, 0f7F800000; + selp.f32 %f657, %f656, %f1649, %p125; + setp.gt.s32 %p126, %r480, 2139095039; + selp.f32 %f658, %f657, %f1743, %p126; + mul.f32 %f659, %f658, 0fBF000000; + setp.eq.f32 %p127, %f78, 0f3F800000; + selp.f32 %f660, 0fBF000000, %f659, %p127; + fma.rn.f32 %f663, %f660, %f1651, %f350; + cvt.sat.f32.f32 %f666, %f663; + fma.rm.f32 %f668, %f666, %f1652, %f1653; + add.f32 %f669, %f668, 0fCB40007F; + neg.f32 %f670, %f669; + fma.rn.f32 %f671, %f660, %f1647, %f670; + fma.rn.f32 %f673, %f660, %f1654, %f671; + mov.b32 %r229, %f668; + shl.b32 %r230, %r229, 23; + mov.b32 %f674, %r230; + ex2.approx.ftz.f32 %f675, %f673; + mul.f32 %f125, %f675, %f674; + sub.f32 %f676, %f119, %f125; + mul.f32 %f677, %f56, %f676; + mul.f32 %f126, %f111, %f677; + not.pred %p128, %p6; + mov.f64 %fd348, %fd37; + @%p128 bra $L__BB0_76; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r231}, %fd37; + } + xor.b32 %r232, %r231, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r233, %temp}, %fd37; + } + mov.b64 %fd348, {%r233, %r232}; + +$L__BB0_76: + setp.eq.f32 %p129, %f317, 0f00000000; + @%p129 bra $L__BB0_80; + bra.uni $L__BB0_77; + +$L__BB0_80: + and.b32 %r495, %r198, 2146435072; + setp.eq.s32 %p381, %r495, 1073741824; + { + .reg .b32 %temp; + mov.b64 {%temp, %r494}, %fd34; + } + selp.b32 %r493, %r494, 0, %p381; + or.b32 %r492, %r493, 2146435072; + selp.b32 %r491, %r492, %r493, %p99; + mov.u32 %r234, 0; + mov.b64 %fd348, {%r234, %r491}; + bra.uni $L__BB0_81; + +$L__BB0_77: + { + .reg .b32 %temp; + mov.b64 {%temp, %r481}, %fd34; + } + setp.gt.s32 %p130, %r481, -1; + @%p130 bra $L__BB0_81; + + cvt.rzi.f64.f64 %fd225, %fd221; + setp.eq.f64 %p131, %fd225, 0d4008000000000000; + @%p131 bra $L__BB0_81; + + mov.f64 %fd348, 0dFFF8000000000000; + +$L__BB0_81: + { + .reg .b32 %temp; + mov.b64 {%temp, %r483}, %fd35; + } + and.b32 %r482, %r483, 2146435072; + setp.ne.s32 %p379, %r482, 2146435072; + selp.f64 %fd349, %fd348, %fd35, %p379; + @%p9 bra $L__BB0_86; + + and.b32 %r484, %r198, 2147483647; + setp.eq.s32 %p133, %r484, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r235, %temp}, %fd221; + } + setp.eq.s32 %p134, %r235, 0; + and.pred %p135, %p133, %p134; + @%p135 bra $L__BB0_85; + bra.uni $L__BB0_83; + +$L__BB0_85: + abs.f64 %fd307, %fd34; + setp.gt.f64 %p380, %fd307, 0d3FF0000000000000; + selp.b32 %r490, 2146435072, 0, %p380; + xor.b32 %r489, %r490, 2146435072; + selp.b32 %r488, %r489, %r490, %p99; + selp.b32 %r487, 1072693248, %r488, %p103; + mov.u32 %r238, 0; + mov.b64 %fd349, {%r238, %r487}; + bra.uni $L__BB0_86; + +$L__BB0_83: + { + .reg .b32 %temp; + mov.b64 {%temp, %r486}, %fd34; + } + and.b32 %r485, %r486, 2147483647; + setp.ne.s32 %p136, %r485, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r236, %temp}, %fd34; + } + setp.ne.s32 %p137, %r236, 0; + or.pred %p138, %p136, %p137; + mov.f64 %fd349, %fd348; + @%p138 bra $L__BB0_86; + + mov.u32 %r237, 0; + mov.b64 %fd349, {%r237, %r61}; + +$L__BB0_86: + cvt.rn.f32.s32 %f1670, %r535; + cvt.rn.f32.s32 %f1669, %r536; + mov.f32 %f1668, 0f3102E308; + mov.f32 %f1667, 0fBF317218; + mov.f32 %f1666, 0f35BFBE8E; + mov.f32 %f1665, 0f3F317200; + mov.f32 %f1664, 0f3DAAAABD; + mov.f32 %f1663, 0f3C4CAF63; + mov.f32 %f1662, 0f3B18F0FE; + add.f32 %f1661, %f1670, 0f3F000000; + sub.f32 %f1660, %f1661, %f1763; + sub.f32 %f1659, %f1670, %f1763; + add.f32 %f1658, %f1659, 0fBF000000; + mov.f32 %f1657, 0f3FB8AA3B; + mov.f32 %f1656, 0f40000000; + setp.eq.f32 %p139, %f317, 0f3F800000; + selp.f64 %fd228, 0d3FF0000000000000, %fd349, %p139; + div.rn.f64 %fd46, %fd36, %fd228; + mul.f32 %f679, %f1658, %f125; + mul.f32 %f680, %f1660, %f119; + sub.f32 %f681, %f680, %f679; + cvt.f64.f32 %fd229, %f681; + mul.f64 %fd230, %fd46, %fd229; + cvt.f64.f32 %fd231, %f111; + mul.f64 %fd232, %fd230, %fd231; + cvt.rn.f32.f64 %f127, %fd232; + add.f32 %f682, %f1669, 0f3F000000; + sub.f32 %f128, %f682, %f1762; + div.rn.f32 %f129, %f128, %f317; + abs.f32 %f130, %f129; + setp.lt.f32 %p140, %f130, 0f00800000; + mul.f32 %f683, %f130, 0f4B800000; + selp.f32 %f684, %f683, %f130, %p140; + selp.f32 %f685, 0fC3170000, 0fC2FE0000, %p140; + mov.b32 %r239, %f684; + and.b32 %r240, %r239, 8388607; + or.b32 %r241, %r240, 1065353216; + mov.b32 %f686, %r241; + shr.u32 %r242, %r239, 23; + cvt.rn.f32.u32 %f687, %r242; + add.f32 %f688, %f685, %f687; + setp.gt.f32 %p141, %f686, 0f3FB504F3; + mul.f32 %f689, %f686, 0f3F000000; + add.f32 %f690, %f688, 0f3F800000; + selp.f32 %f691, %f690, %f688, %p141; + selp.f32 %f692, %f689, %f686, %p141; + add.f32 %f693, %f692, 0fBF800000; + add.f32 %f694, %f692, 0f3F800000; + rcp.approx.ftz.f32 %f695, %f694; + add.f32 %f696, %f693, %f693; + mul.f32 %f698, %f696, %f695; + mul.f32 %f699, %f698, %f698; + fma.rn.f32 %f702, %f1662, %f699, %f1663; + fma.rn.f32 %f704, %f702, %f699, %f1664; + mul.rn.f32 %f705, %f704, %f699; + mul.rn.f32 %f706, %f705, %f698; + sub.f32 %f707, %f693, %f698; + add.f32 %f708, %f707, %f707; + neg.f32 %f709, %f698; + fma.rn.f32 %f710, %f709, %f693, %f708; + mul.rn.f32 %f711, %f695, %f710; + add.f32 %f712, %f706, %f698; + sub.f32 %f713, %f698, %f712; + add.f32 %f714, %f706, %f713; + add.f32 %f715, %f711, %f714; + add.f32 %f716, %f712, %f715; + sub.f32 %f717, %f712, %f716; + add.f32 %f718, %f715, %f717; + mul.rn.f32 %f720, %f691, %f1665; + mul.rn.f32 %f722, %f691, %f1666; + add.f32 %f723, %f720, %f716; + sub.f32 %f724, %f720, %f723; + add.f32 %f725, %f716, %f724; + add.f32 %f726, %f718, %f725; + add.f32 %f727, %f722, %f726; + add.f32 %f728, %f723, %f727; + sub.f32 %f729, %f723, %f728; + add.f32 %f730, %f727, %f729; + mul.rn.f32 %f731, %f1656, %f728; + neg.f32 %f732, %f731; + fma.rn.f32 %f733, %f1656, %f728, %f732; + fma.rn.f32 %f734, %f1656, %f730, %f733; + mov.f32 %f735, 0f00000000; + fma.rn.f32 %f736, %f735, %f728, %f734; + add.rn.f32 %f737, %f731, %f736; + neg.f32 %f738, %f737; + add.rn.f32 %f739, %f731, %f738; + add.rn.f32 %f740, %f739, %f736; + mov.b32 %r243, %f737; + setp.eq.s32 %p142, %r243, 1118925336; + add.s32 %r244, %r243, -1; + mov.b32 %f741, %r244; + add.f32 %f742, %f740, 0f37000000; + selp.f32 %f131, %f742, %f740, %p142; + selp.f32 %f743, %f741, %f737, %p142; + mul.rn.f32 %f745, %f743, %f1657; + cvt.rzi.f32.f32 %f746, %f745; + abs.f32 %f747, %f746; + setp.gt.f32 %p143, %f747, 0f42FC0000; + mov.b32 %r245, %f746; + and.b32 %r246, %r245, -2147483648; + or.b32 %r247, %r246, 1123811328; + mov.b32 %f748, %r247; + selp.f32 %f749, %f748, %f746, %p143; + fma.rn.f32 %f751, %f749, %f1667, %f743; + fma.rn.f32 %f753, %f749, %f1668, %f751; + mul.f32 %f754, %f753, 0f3FB8AA3B; + add.f32 %f755, %f749, 0f4B40007F; + mov.b32 %r248, %f755; + shl.b32 %r249, %r248, 23; + mov.b32 %f756, %r249; + ex2.approx.ftz.f32 %f757, %f754; + mul.f32 %f132, %f757, %f756; + setp.eq.f32 %p144, %f132, 0f7F800000; + mov.f32 %f1744, 0f7F800000; + @%p144 bra $L__BB0_88; + + fma.rn.f32 %f1744, %f132, %f131, %f132; + +$L__BB0_88: + setp.lt.f32 %p145, %f129, 0f00000000; + and.pred %p10, %p145, %p86; + setp.eq.f32 %p147, %f129, 0f00000000; + @%p147 bra $L__BB0_92; + bra.uni $L__BB0_89; + +$L__BB0_92: + add.f32 %f762, %f129, %f129; + selp.f32 %f1746, %f762, 0f00000000, %p86; + bra.uni $L__BB0_93; + +$L__BB0_89: + mov.b32 %r250, %f1744; + xor.b32 %r251, %r250, -2147483648; + mov.b32 %f758, %r251; + selp.f32 %f1746, %f758, %f1744, %p10; + setp.geu.f32 %p148, %f129, 0f00000000; + @%p148 bra $L__BB0_93; + + mov.f32 %f1678, 0f40000000; + cvt.rzi.f32.f32 %f760, %f1678; + setp.eq.f32 %p149, %f760, 0f40000000; + @%p149 bra $L__BB0_93; + + mov.f32 %f1746, 0f7FFFFFFF; + +$L__BB0_93: + abs.f32 %f1596, %f129; + add.f32 %f763, %f1596, 0f40000000; + mov.b32 %r252, %f763; + setp.lt.s32 %p151, %r252, 2139095040; + @%p151 bra $L__BB0_98; + + abs.f32 %f1676, %f129; + setp.gtu.f32 %p152, %f1676, 0f7F800000; + @%p152 bra $L__BB0_97; + bra.uni $L__BB0_95; + +$L__BB0_97: + add.f32 %f1746, %f129, 0f40000000; + bra.uni $L__BB0_98; + +$L__BB0_95: + abs.f32 %f1677, %f129; + setp.neu.f32 %p153, %f1677, 0f7F800000; + @%p153 bra $L__BB0_98; + + selp.f32 %f1746, 0fFF800000, 0f7F800000, %p10; + +$L__BB0_98: + mov.f32 %f1613, 0f00000000; + cvt.rn.f32.s32 %f1612, %r536; + sub.f32 %f1611, %f1612, %f1762; + add.f32 %f1610, %f1611, 0fBF000000; + mov.f32 %f1609, 0f3102E308; + mov.f32 %f1608, 0fBF317218; + mov.f32 %f1607, 0f35BFBE8E; + mov.f32 %f1606, 0f3F317200; + mov.f32 %f1605, 0f3DAAAABD; + mov.f32 %f1604, 0f3C4CAF63; + mov.f32 %f1603, 0f3B18F0FE; + mov.f32 %f1602, 0f32A57060; + mov.f32 %f1601, 0f4B400001; + mov.f32 %f1600, 0f437C0000; + mov.f32 %f1599, 0f3BBB989D; + mov.f32 %f1598, 0f3FB8AA3B; + mov.f32 %f1597, 0f40000000; + mul.f32 %f765, %f1746, 0fBF000000; + setp.eq.f32 %p154, %f129, 0f3F800000; + selp.f32 %f766, 0fBF000000, %f765, %p154; + fma.rn.f32 %f769, %f766, %f1599, %f350; + cvt.sat.f32.f32 %f772, %f769; + fma.rm.f32 %f774, %f772, %f1600, %f1601; + add.f32 %f775, %f774, 0fCB40007F; + neg.f32 %f776, %f775; + fma.rn.f32 %f777, %f766, %f1598, %f776; + fma.rn.f32 %f779, %f766, %f1602, %f777; + mov.b32 %r253, %f774; + shl.b32 %r254, %r253, 23; + mov.b32 %f780, %r254; + ex2.approx.ftz.f32 %f781, %f779; + mul.f32 %f141, %f781, %f780; + div.rn.f32 %f142, %f1610, %f317; abs.f32 %f143, %f142; - setp.ltu.f32 %p65, %f143, 0f3F800000; - @%p65 bra BB0_48; - bra.uni BB0_47; - -BB0_48: - mul.f32 %f756, %f142, %f142; - mov.f32 %f757, 0f3BA0C9F8; - mov.f32 %f758, 0fBA1268FB; - fma.rn.f32 %f759, %f758, %f756, %f757; - mov.f32 %f760, 0fBCDABFD4; - fma.rn.f32 %f761, %f759, %f756, %f760; - mov.f32 %f762, 0f3DE70331; - fma.rn.f32 %f763, %f761, %f756, %f762; - mov.f32 %f764, 0fBEC09330; - fma.rn.f32 %f765, %f763, %f756, %f764; - mov.f32 %f766, 0f3F906EBA; - fma.rn.f32 %f767, %f765, %f756, %f766; - mul.f32 %f2108, %f142, %f767; - bra.uni BB0_49; - -BB0_47: - mov.f32 %f1927, 0f3F800000; - mov.f32 %f738, 0f3A03BB71; - mov.f32 %f739, 0fB7B730FB; - fma.rn.f32 %f740, %f739, %f143, %f738; - mov.f32 %f741, 0fBBACA3B3; - fma.rn.f32 %f742, %f740, %f143, %f741; - mov.f32 %f743, 0f3D0A7445; - fma.rn.f32 %f744, %f742, %f143, %f743; - mov.f32 %f745, 0fBE1B3B75; - fma.rn.f32 %f746, %f744, %f143, %f745; - mov.f32 %f747, 0fBF6B385A; - fma.rn.f32 %f748, %f746, %f143, %f747; - mov.f32 %f749, 0fBFD0316E; - fma.rn.f32 %f750, %f748, %f143, %f749; - mov.f32 %f751, 0fBA031CCE; - fma.rn.f32 %f752, %f750, %f143, %f751; - ex2.approx.ftz.f32 %f753, %f752; - sub.f32 %f755, %f1927, %f753; - mov.b32 %r122, %f755; - setp.ltu.f32 %p66, %f143, 0f407AD445; - selp.b32 %r123, %r122, 1065353216, %p66; - mov.b32 %r124, %f142; - and.b32 %r125, %r124, -2147483648; - or.b32 %r126, %r123, %r125; - mov.b32 %f2108, %r126; - -BB0_49: - cvt.rn.f32.s32 %f1929, %r261; - sub.f32 %f1928, %f1929, %f2132; - mul.f32 %f147, %f1928, %f103; - abs.f32 %f148, %f147; - setp.ltu.f32 %p67, %f148, 0f3F800000; - @%p67 bra BB0_51; - bra.uni BB0_50; - -BB0_51: - mul.f32 %f786, %f147, %f147; - mov.f32 %f787, 0f3BA0C9F8; - mov.f32 %f788, 0fBA1268FB; - fma.rn.f32 %f789, %f788, %f786, %f787; - mov.f32 %f790, 0fBCDABFD4; - fma.rn.f32 %f791, %f789, %f786, %f790; - mov.f32 %f792, 0f3DE70331; - fma.rn.f32 %f793, %f791, %f786, %f792; - mov.f32 %f794, 0fBEC09330; - fma.rn.f32 %f795, %f793, %f786, %f794; - mov.f32 %f796, 0f3F906EBA; - fma.rn.f32 %f797, %f795, %f786, %f796; - mul.f32 %f2109, %f147, %f797; - bra.uni BB0_52; - -BB0_50: - mov.f32 %f1930, 0f3F800000; - mov.f32 %f768, 0f3A03BB71; - mov.f32 %f769, 0fB7B730FB; - fma.rn.f32 %f770, %f769, %f148, %f768; - mov.f32 %f771, 0fBBACA3B3; - fma.rn.f32 %f772, %f770, %f148, %f771; - mov.f32 %f773, 0f3D0A7445; - fma.rn.f32 %f774, %f772, %f148, %f773; - mov.f32 %f775, 0fBE1B3B75; - fma.rn.f32 %f776, %f774, %f148, %f775; - mov.f32 %f777, 0fBF6B385A; - fma.rn.f32 %f778, %f776, %f148, %f777; - mov.f32 %f779, 0fBFD0316E; - fma.rn.f32 %f780, %f778, %f148, %f779; - mov.f32 %f781, 0fBA031CCE; - fma.rn.f32 %f782, %f780, %f148, %f781; - ex2.approx.ftz.f32 %f783, %f782; - sub.f32 %f785, %f1930, %f783; - mov.b32 %r127, %f785; - setp.ltu.f32 %p68, %f148, 0f407AD445; - selp.b32 %r128, %r127, 1065353216, %p68; - mov.b32 %r129, %f147; - and.b32 %r130, %r129, -2147483648; - or.b32 %r131, %r128, %r130; - mov.b32 %f2109, %r131; - -BB0_52: - mov.f32 %f1931, 0f40000000; - sub.f32 %f800, %f2108, %f2109; - mul.f32 %f152, %f800, 0f3F000000; - mul.f32 %f801, %f139, %f2131; - fma.rn.f32 %f153, %f152, %f801, %f2064; - mad.lo.s32 %r132, %r261, %r53, %r260; - add.s32 %r133, %r132, %r2; - mul.wide.s32 %rd52, %r133, 4; - add.s64 %rd53, %rd1, %rd52; - ld.global.f32 %f154, [%rd53]; - // inline asm - rcp.approx.ftz.f32 %f798,%f114; - // inline asm - mul.f32 %f802, %f798, %f115; - mul.f32 %f803, %f802, %f802; - mov.f32 %f804, 0f3C4CAF63; - mov.f32 %f805, 0f3B18F0FE; - fma.rn.f32 %f806, %f805, %f803, %f804; - mov.f32 %f807, 0f3DAAAABD; - fma.rn.f32 %f808, %f806, %f803, %f807; - mul.rn.f32 %f809, %f808, %f803; - mul.rn.f32 %f810, %f809, %f802; - sub.f32 %f811, %f113, %f802; - neg.f32 %f812, %f802; - add.f32 %f813, %f811, %f811; - fma.rn.f32 %f814, %f812, %f113, %f813; - mul.rn.f32 %f815, %f798, %f814; - add.f32 %f816, %f810, %f802; - sub.f32 %f817, %f802, %f816; - add.f32 %f818, %f810, %f817; - add.f32 %f819, %f815, %f818; - add.f32 %f820, %f816, %f819; - sub.f32 %f821, %f816, %f820; - add.f32 %f822, %f819, %f821; - add.f32 %f823, %f116, %f820; - sub.f32 %f824, %f116, %f823; - add.f32 %f825, %f820, %f824; - add.f32 %f826, %f822, %f825; - add.f32 %f827, %f117, %f826; - add.f32 %f828, %f823, %f827; - sub.f32 %f829, %f823, %f828; - add.f32 %f830, %f827, %f829; - mul.rn.f32 %f832, %f1931, %f828; - neg.f32 %f833, %f832; - fma.rn.f32 %f834, %f1931, %f828, %f833; - fma.rn.f32 %f835, %f1931, %f830, %f834; - mov.f32 %f836, 0f00000000; - fma.rn.f32 %f837, %f836, %f828, %f835; - add.rn.f32 %f838, %f832, %f837; - neg.f32 %f839, %f838; - add.rn.f32 %f840, %f832, %f839; - add.rn.f32 %f841, %f840, %f837; - mov.b32 %r134, %f838; - setp.eq.s32 %p69, %r134, 1118925336; - add.s32 %r135, %r134, -1; - mov.b32 %f842, %r135; - add.f32 %f843, %f841, 0f37000000; - selp.f32 %f844, %f842, %f838, %p69; - selp.f32 %f155, %f843, %f841, %p69; - mul.f32 %f845, %f844, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f846, %f845; - mov.f32 %f847, 0fBF317200; - fma.rn.f32 %f848, %f846, %f847, %f844; - mov.f32 %f849, 0fB5BFBE8E; - fma.rn.f32 %f850, %f846, %f849, %f848; - mul.f32 %f851, %f850, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f852, %f851; - add.f32 %f853, %f846, 0f00000000; - ex2.approx.f32 %f854, %f853; - mul.f32 %f855, %f852, %f854; - setp.lt.f32 %p70, %f844, 0fC2D20000; - selp.f32 %f856, 0f00000000, %f855, %p70; - setp.gt.f32 %p71, %f844, 0f42D20000; - selp.f32 %f2110, 0f7F800000, %f856, %p71; - setp.eq.f32 %p72, %f2110, 0f7F800000; - @%p72 bra BB0_54; - - fma.rn.f32 %f2110, %f2110, %f155, %f2110; - -BB0_54: - setp.geu.f32 %p225, %f110, 0f00000000; - mov.b32 %r136, %f2110; - xor.b32 %r137, %r136, -2147483648; - mov.b32 %f857, %r137; - selp.f32 %f159, %f857, %f2110, %p1; - add.f32 %f858, %f110, %f110; - selp.f32 %f859, %f858, 0f00000000, %p54; - setp.eq.f32 %p74, %f110, 0f00000000; - selp.f32 %f2111, %f859, %f159, %p74; - @%p225 bra BB0_56; - - mov.f32 %f1932, 0f40000000; - cvt.rzi.f32.f32 %f861, %f1932; - setp.neu.f32 %p75, %f861, 0f40000000; - selp.f32 %f2111, 0f7FFFFFFF, %f159, %p75; - -BB0_56: - mov.f32 %f1940, 0f00000000; - mov.f32 %f1939, 0f3DAAAABD; - mov.f32 %f1938, 0f3C4CAF63; - mov.f32 %f1937, 0f3B18F0FE; - mov.f32 %f1936, 0fB5BFBE8E; - mov.f32 %f1935, 0fBF317200; - abs.f32 %f1934, %f110; - mov.f32 %f1933, 0f40000000; - add.f32 %f864, %f1934, 0f40000000; - mov.b32 %r138, %f864; - setp.gt.s32 %p76, %r138, 2139095039; - add.f32 %f865, %f110, 0f40000000; - setp.gtu.f32 %p77, %f1934, 0f7F800000; - selp.f32 %f866, %f865, %f2111, %p77; - selp.f32 %f867, 0fFF800000, 0f7F800000, %p1; - setp.neu.f32 %p78, %f1934, 0f7F800000; - selp.f32 %f868, %f866, %f867, %p78; - selp.f32 %f869, %f868, %f2111, %p76; - mul.f32 %f870, %f869, 0fBF000000; - setp.eq.f32 %p79, %f110, 0f3F800000; - selp.f32 %f871, 0fBF000000, %f870, %p79; - mul.f32 %f872, %f871, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f873, %f872; - fma.rn.f32 %f875, %f873, %f1935, %f871; - fma.rn.f32 %f877, %f873, %f1936, %f875; - mul.f32 %f878, %f877, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f879, %f878; - add.f32 %f880, %f873, 0f00000000; - ex2.approx.f32 %f881, %f880; - mul.f32 %f882, %f879, %f881; - setp.lt.f32 %p80, %f871, 0fC2D20000; - selp.f32 %f883, 0f00000000, %f882, %p80; - setp.gt.f32 %p81, %f871, 0f42D20000; - selp.f32 %f163, 0f7F800000, %f883, %p81; - // inline asm - rcp.approx.ftz.f32 %f862,%f121; - // inline asm - mul.f32 %f884, %f862, %f122; - mul.f32 %f885, %f884, %f884; - fma.rn.f32 %f888, %f1937, %f885, %f1938; - fma.rn.f32 %f890, %f888, %f885, %f1939; - mul.rn.f32 %f891, %f890, %f885; - mul.rn.f32 %f892, %f891, %f884; - sub.f32 %f893, %f120, %f884; - neg.f32 %f894, %f884; - add.f32 %f895, %f893, %f893; - fma.rn.f32 %f896, %f894, %f120, %f895; - mul.rn.f32 %f897, %f862, %f896; - add.f32 %f898, %f892, %f884; - sub.f32 %f899, %f884, %f898; - add.f32 %f900, %f892, %f899; - add.f32 %f901, %f897, %f900; - add.f32 %f902, %f898, %f901; - sub.f32 %f903, %f898, %f902; - add.f32 %f904, %f901, %f903; - add.f32 %f905, %f123, %f902; - sub.f32 %f906, %f123, %f905; - add.f32 %f907, %f902, %f906; - add.f32 %f908, %f904, %f907; - add.f32 %f909, %f124, %f908; - add.f32 %f910, %f905, %f909; - sub.f32 %f911, %f905, %f910; - add.f32 %f912, %f909, %f911; - mul.rn.f32 %f914, %f1933, %f910; - neg.f32 %f915, %f914; - fma.rn.f32 %f916, %f1933, %f910, %f915; - fma.rn.f32 %f917, %f1933, %f912, %f916; - fma.rn.f32 %f919, %f1940, %f910, %f917; - add.rn.f32 %f920, %f914, %f919; - neg.f32 %f921, %f920; - add.rn.f32 %f922, %f914, %f921; - add.rn.f32 %f923, %f922, %f919; - mov.b32 %r139, %f920; - setp.eq.s32 %p82, %r139, 1118925336; - add.s32 %r140, %r139, -1; - mov.b32 %f924, %r140; - add.f32 %f925, %f923, 0f37000000; - selp.f32 %f926, %f924, %f920, %p82; - selp.f32 %f164, %f925, %f923, %p82; - mul.f32 %f927, %f926, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f928, %f927; - fma.rn.f32 %f929, %f928, %f1935, %f926; - fma.rn.f32 %f930, %f928, %f1936, %f929; - mul.f32 %f931, %f930, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f932, %f931; - add.f32 %f933, %f928, 0f00000000; - ex2.approx.f32 %f934, %f933; - mul.f32 %f935, %f932, %f934; - setp.lt.f32 %p83, %f926, 0fC2D20000; - selp.f32 %f936, 0f00000000, %f935, %p83; - setp.gt.f32 %p84, %f926, 0f42D20000; - selp.f32 %f2112, 0f7F800000, %f936, %p84; - setp.eq.f32 %p85, %f2112, 0f7F800000; - @%p85 bra BB0_58; - - fma.rn.f32 %f2112, %f2112, %f164, %f2112; - -BB0_58: - setp.geu.f32 %p226, %f118, 0f00000000; - mov.b32 %r141, %f2112; - xor.b32 %r142, %r141, -2147483648; - mov.b32 %f937, %r142; - selp.f32 %f168, %f937, %f2112, %p2; - add.f32 %f938, %f118, %f118; - selp.f32 %f939, %f938, 0f00000000, %p54; - setp.eq.f32 %p87, %f118, 0f00000000; - selp.f32 %f2113, %f939, %f168, %p87; - @%p226 bra BB0_60; - - mov.f32 %f1941, 0f40000000; - cvt.rzi.f32.f32 %f941, %f1941; - setp.neu.f32 %p88, %f941, 0f40000000; - selp.f32 %f2113, 0f7FFFFFFF, %f168, %p88; - -BB0_60: - cvt.rn.f32.s32 %f1956, %r260; - cvt.rn.f32.s32 %f1955, %r261; - mov.f32 %f1954, 0f35BFBE8E; - mov.f32 %f1953, 0f3F317200; - sub.f32 %f1952, %f1956, %f2133; - add.f32 %f1951, %f1956, 0f3F800000; - sub.f32 %f1950, %f1951, %f2133; - abs.f32 %f1949, %f118; - mov.f32 %f1948, 0f00000000; - mov.f32 %f1947, 0f3DAAAABD; - mov.f32 %f1946, 0f3C4CAF63; - mov.f32 %f1945, 0f3B18F0FE; - mov.f32 %f1944, 0fB5BFBE8E; - mov.f32 %f1943, 0fBF317200; - mov.f32 %f1942, 0f40000000; - add.f32 %f944, %f1949, 0f40000000; - mov.b32 %r143, %f944; - setp.gt.s32 %p89, %r143, 2139095039; - add.f32 %f945, %f118, 0f40000000; - setp.gtu.f32 %p90, %f1949, 0f7F800000; - selp.f32 %f946, %f945, %f2113, %p90; - selp.f32 %f947, 0fFF800000, 0f7F800000, %p2; - setp.neu.f32 %p91, %f1949, 0f7F800000; - selp.f32 %f948, %f946, %f947, %p91; - selp.f32 %f949, %f948, %f2113, %p89; - mul.f32 %f950, %f949, 0fBF000000; - setp.eq.f32 %p92, %f118, 0f3F800000; - selp.f32 %f951, 0fBF000000, %f950, %p92; - mul.f32 %f952, %f951, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f953, %f952; - fma.rn.f32 %f955, %f953, %f1943, %f951; - fma.rn.f32 %f957, %f953, %f1944, %f955; - mul.f32 %f958, %f957, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f959, %f958; - add.f32 %f960, %f953, 0f00000000; - ex2.approx.f32 %f961, %f960; - mul.f32 %f962, %f959, %f961; - setp.lt.f32 %p93, %f951, 0fC2D20000; - selp.f32 %f963, 0f00000000, %f962, %p93; - setp.gt.f32 %p94, %f951, 0f42D20000; - selp.f32 %f964, 0f7F800000, %f963, %p94; - sub.f32 %f965, %f163, %f964; - mul.f32 %f966, %f92, %f965; - mul.f32 %f172, %f152, %f966; - mul.f32 %f969, %f1950, %f163; - mul.f32 %f971, %f1952, %f964; - sub.f32 %f972, %f969, %f971; - mul.f32 %f973, %f972, %f93; - mul.f32 %f173, %f152, %f973; - add.f32 %f974, %f1955, 0f3F800000; - sub.f32 %f174, %f974, %f2132; - div.rn.f32 %f175, %f174, %f394; - abs.f32 %f176, %f175; - setp.lt.f32 %p95, %f176, 0f00800000; - mul.f32 %f975, %f176, 0f4B800000; - selp.f32 %f976, 0fC3170000, 0fC2FE0000, %p95; - selp.f32 %f977, %f975, %f176, %p95; - mov.b32 %r144, %f977; - and.b32 %r145, %r144, 8388607; - or.b32 %r146, %r145, 1065353216; - mov.b32 %f978, %r146; - shr.u32 %r147, %r144, 23; - cvt.rn.f32.u32 %f979, %r147; - add.f32 %f980, %f976, %f979; - setp.gt.f32 %p96, %f978, 0f3FB504F3; - mul.f32 %f981, %f978, 0f3F000000; - add.f32 %f982, %f980, 0f3F800000; - selp.f32 %f983, %f981, %f978, %p96; - selp.f32 %f984, %f982, %f980, %p96; - add.f32 %f985, %f983, 0fBF800000; - add.f32 %f943, %f983, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f942,%f943; - // inline asm - add.f32 %f986, %f985, %f985; - mul.f32 %f987, %f942, %f986; - mul.f32 %f988, %f987, %f987; - fma.rn.f32 %f991, %f1945, %f988, %f1946; - fma.rn.f32 %f993, %f991, %f988, %f1947; - mul.rn.f32 %f994, %f993, %f988; - mul.rn.f32 %f995, %f994, %f987; - sub.f32 %f996, %f985, %f987; - neg.f32 %f997, %f987; - add.f32 %f998, %f996, %f996; - fma.rn.f32 %f999, %f997, %f985, %f998; - mul.rn.f32 %f1000, %f942, %f999; - add.f32 %f1001, %f995, %f987; - sub.f32 %f1002, %f987, %f1001; - add.f32 %f1003, %f995, %f1002; - add.f32 %f1004, %f1000, %f1003; - add.f32 %f1005, %f1001, %f1004; - sub.f32 %f1006, %f1001, %f1005; - add.f32 %f1007, %f1004, %f1006; - mul.rn.f32 %f1009, %f984, %f1953; - mul.rn.f32 %f1011, %f984, %f1954; - add.f32 %f1012, %f1009, %f1005; - sub.f32 %f1013, %f1009, %f1012; - add.f32 %f1014, %f1005, %f1013; + setp.lt.f32 %p155, %f143, 0f00800000; + mul.f32 %f782, %f143, 0f4B800000; + selp.f32 %f783, %f782, %f143, %p155; + selp.f32 %f784, 0fC3170000, 0fC2FE0000, %p155; + mov.b32 %r255, %f783; + and.b32 %r256, %r255, 8388607; + or.b32 %r257, %r256, 1065353216; + mov.b32 %f785, %r257; + shr.u32 %r258, %r255, 23; + cvt.rn.f32.u32 %f786, %r258; + add.f32 %f787, %f784, %f786; + setp.gt.f32 %p156, %f785, 0f3FB504F3; + mul.f32 %f788, %f785, 0f3F000000; + add.f32 %f789, %f787, 0f3F800000; + selp.f32 %f790, %f789, %f787, %p156; + selp.f32 %f791, %f788, %f785, %p156; + add.f32 %f792, %f791, 0fBF800000; + add.f32 %f793, %f791, 0f3F800000; + rcp.approx.ftz.f32 %f794, %f793; + add.f32 %f795, %f792, %f792; + mul.f32 %f797, %f795, %f794; + mul.f32 %f798, %f797, %f797; + fma.rn.f32 %f801, %f1603, %f798, %f1604; + fma.rn.f32 %f803, %f801, %f798, %f1605; + mul.rn.f32 %f804, %f803, %f798; + mul.rn.f32 %f805, %f804, %f797; + sub.f32 %f806, %f792, %f797; + add.f32 %f807, %f806, %f806; + neg.f32 %f808, %f797; + fma.rn.f32 %f809, %f808, %f792, %f807; + mul.rn.f32 %f810, %f794, %f809; + add.f32 %f811, %f805, %f797; + sub.f32 %f812, %f797, %f811; + add.f32 %f813, %f805, %f812; + add.f32 %f814, %f810, %f813; + add.f32 %f815, %f811, %f814; + sub.f32 %f816, %f811, %f815; + add.f32 %f817, %f814, %f816; + mul.rn.f32 %f819, %f790, %f1606; + mul.rn.f32 %f821, %f790, %f1607; + add.f32 %f822, %f819, %f815; + sub.f32 %f823, %f819, %f822; + add.f32 %f824, %f815, %f823; + add.f32 %f825, %f817, %f824; + add.f32 %f826, %f821, %f825; + add.f32 %f827, %f822, %f826; + sub.f32 %f828, %f822, %f827; + add.f32 %f829, %f826, %f828; + mul.rn.f32 %f830, %f1597, %f827; + neg.f32 %f831, %f830; + fma.rn.f32 %f832, %f1597, %f827, %f831; + fma.rn.f32 %f833, %f1597, %f829, %f832; + fma.rn.f32 %f835, %f1613, %f827, %f833; + add.rn.f32 %f836, %f830, %f835; + neg.f32 %f837, %f836; + add.rn.f32 %f838, %f830, %f837; + add.rn.f32 %f839, %f838, %f835; + mov.b32 %r259, %f836; + setp.eq.s32 %p157, %r259, 1118925336; + add.s32 %r260, %r259, -1; + mov.b32 %f840, %r260; + add.f32 %f841, %f839, 0f37000000; + selp.f32 %f144, %f841, %f839, %p157; + selp.f32 %f842, %f840, %f836, %p157; + mul.rn.f32 %f843, %f842, %f1598; + cvt.rzi.f32.f32 %f844, %f843; + abs.f32 %f845, %f844; + setp.gt.f32 %p158, %f845, 0f42FC0000; + mov.b32 %r261, %f844; + and.b32 %r262, %r261, -2147483648; + or.b32 %r263, %r262, 1123811328; + mov.b32 %f846, %r263; + selp.f32 %f847, %f846, %f844, %p158; + fma.rn.f32 %f849, %f847, %f1608, %f842; + fma.rn.f32 %f851, %f847, %f1609, %f849; + mul.f32 %f852, %f851, 0f3FB8AA3B; + add.f32 %f853, %f847, 0f4B40007F; + mov.b32 %r264, %f853; + shl.b32 %r265, %r264, 23; + mov.b32 %f854, %r265; + ex2.approx.ftz.f32 %f855, %f852; + mul.f32 %f145, %f855, %f854; + setp.eq.f32 %p159, %f145, 0f7F800000; + mov.f32 %f1747, 0f7F800000; + @%p159 bra $L__BB0_100; + + fma.rn.f32 %f1747, %f145, %f144, %f145; + +$L__BB0_100: + setp.lt.f32 %p160, %f142, 0f00000000; + and.pred %p11, %p160, %p86; + setp.eq.f32 %p162, %f142, 0f00000000; + @%p162 bra $L__BB0_104; + bra.uni $L__BB0_101; + +$L__BB0_104: + add.f32 %f860, %f142, %f142; + selp.f32 %f1749, %f860, 0f00000000, %p86; + bra.uni $L__BB0_105; + +$L__BB0_101: + mov.b32 %r266, %f1747; + xor.b32 %r267, %r266, -2147483648; + mov.b32 %f856, %r267; + selp.f32 %f1749, %f856, %f1747, %p11; + setp.geu.f32 %p163, %f142, 0f00000000; + @%p163 bra $L__BB0_105; + + mov.f32 %f1675, 0f40000000; + cvt.rzi.f32.f32 %f858, %f1675; + setp.eq.f32 %p164, %f858, 0f40000000; + @%p164 bra $L__BB0_105; + + mov.f32 %f1749, 0f7FFFFFFF; + +$L__BB0_105: + abs.f32 %f1679, %f142; + add.f32 %f861, %f1679, 0f40000000; + mov.b32 %r268, %f861; + setp.lt.s32 %p166, %r268, 2139095040; + @%p166 bra $L__BB0_110; + + abs.f32 %f1680, %f142; + setp.gtu.f32 %p167, %f1680, 0f7F800000; + @%p167 bra $L__BB0_109; + bra.uni $L__BB0_107; + +$L__BB0_109: + add.f32 %f1749, %f142, 0f40000000; + bra.uni $L__BB0_110; + +$L__BB0_107: + abs.f32 %f1681, %f142; + setp.neu.f32 %p168, %f1681, 0f7F800000; + @%p168 bra $L__BB0_110; + + selp.f32 %f1749, 0fFF800000, 0f7F800000, %p11; + +$L__BB0_110: + cvt.rn.f32.s32 %f1624, %r536; + add.f32 %f1623, %f1624, 0f3F000000; + sub.f32 %f1622, %f1623, %f1762; + mov.f32 %f1750, 0f00000000; + sub.f32 %f1620, %f1624, %f1762; + add.f32 %f1619, %f1620, 0fBF000000; + mov.f32 %f1618, 0f32A57060; + mov.f32 %f1617, 0f4B400001; + mov.f32 %f1616, 0f437C0000; + mov.f32 %f1615, 0f3BBB989D; + mov.f32 %f1614, 0f3FB8AA3B; + mul.f32 %f863, %f1749, 0fBF000000; + setp.eq.f32 %p169, %f142, 0f3F800000; + selp.f32 %f864, 0fBF000000, %f863, %p169; + fma.rn.f32 %f867, %f864, %f1615, %f350; + cvt.sat.f32.f32 %f870, %f867; + fma.rm.f32 %f872, %f870, %f1616, %f1617; + add.f32 %f873, %f872, 0fCB40007F; + neg.f32 %f874, %f873; + fma.rn.f32 %f875, %f864, %f1614, %f874; + fma.rn.f32 %f877, %f864, %f1618, %f875; + mov.b32 %r269, %f872; + shl.b32 %r270, %r269, 23; + mov.b32 %f878, %r270; + ex2.approx.ftz.f32 %f879, %f877; + mul.f32 %f880, %f879, %f878; + sub.f32 %f881, %f141, %f880; + mul.f32 %f882, %f56, %f881; + mul.f32 %f154, %f99, %f882; + mul.f32 %f883, %f1619, %f880; + mul.f32 %f884, %f1622, %f141; + sub.f32 %f885, %f884, %f883; + cvt.f64.f32 %fd233, %f885; + mul.f64 %fd234, %fd46, %fd233; + cvt.f64.f32 %fd235, %f99; + mul.f64 %fd236, %fd234, %fd235; + cvt.rn.f32.f64 %f155, %fd236; + mul.f32 %f156, %f99, %f111; + setp.leu.f32 %p170, %f112, 0f3C23D70A; + @%p170 bra $L__BB0_112; + + div.rn.f32 %f886, %f113, %f112; + add.f32 %f1750, %f886, 0fBF800000; + +$L__BB0_112: + mov.f32 %f1751, 0f00000000; + @%p170 bra $L__BB0_127; + + mov.f64 %fd308, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r503}, %fd308; + } + and.b32 %r502, %r503, 2146435072; + setp.eq.s32 %p172, %r502, 1062207488; + cvt.f64.f32 %fd47, %f112; + { + .reg .b32 %temp; + mov.b64 {%temp, %r69}, %fd47; + } + abs.f64 %fd48, %fd47; + { // callseq 3, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd48; + .param .b64 param1; + st.param.f64 [param1+0], %fd308; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd351, [retval0+0]; + } // callseq 3 + setp.lt.s32 %p173, %r69, 0; + and.pred %p12, %p173, %p172; + not.pred %p174, %p12; + @%p174 bra $L__BB0_115; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r271}, %fd351; + } + xor.b32 %r272, %r271, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r273, %temp}, %fd351; + } + mov.b64 %fd351, {%r273, %r272}; + +$L__BB0_115: + setp.eq.f32 %p175, %f112, 0f00000000; + @%p175 bra $L__BB0_119; + bra.uni $L__BB0_116; + +$L__BB0_119: + setp.lt.s32 %p178, %r62, 0; + mov.u32 %r274, 0; + selp.b32 %r275, %r69, 0, %p172; + or.b32 %r276, %r275, 2146435072; + selp.b32 %r277, %r276, %r275, %p178; + mov.b64 %fd351, {%r274, %r277}; + bra.uni $L__BB0_120; + +$L__BB0_116: + setp.gt.s32 %p176, %r69, -1; + @%p176 bra $L__BB0_120; + + cvt.rzi.f64.f64 %fd239, %fd223; + setp.eq.f64 %p177, %fd239, 0d4000000000000000; + @%p177 bra $L__BB0_120; + + mov.f64 %fd351, 0dFFF8000000000000; + +$L__BB0_120: + add.f64 %fd54, %fd47, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r278}, %fd54; + } + and.b32 %r279, %r278, 2146435072; + setp.ne.s32 %p180, %r279, 2146435072; + mov.f64 %fd352, %fd351; + @%p180 bra $L__BB0_126; + + setp.gtu.f64 %p181, %fd48, 0d7FF0000000000000; + mov.f64 %fd352, %fd54; + @%p181 bra $L__BB0_126; + + setp.eq.s32 %p182, %r64, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r280, %temp}, %fd223; + } + setp.eq.s32 %p183, %r280, 0; + and.pred %p184, %p182, %p183; + @%p184 bra $L__BB0_125; + bra.uni $L__BB0_123; + +$L__BB0_125: + setp.lt.s32 %p190, %r62, 0; + mov.u32 %r285, 0; + setp.gt.f64 %p191, %fd48, 0d3FF0000000000000; + selp.b32 %r286, 2146435072, 0, %p191; + xor.b32 %r287, %r286, 2146435072; + selp.b32 %r288, %r287, %r286, %p190; + setp.eq.f32 %p192, %f112, 0fBF800000; + selp.b32 %r289, 1072693248, %r288, %p192; + mov.b64 %fd352, {%r285, %r289}; + bra.uni $L__BB0_126; + +$L__BB0_123: + { + .reg .b32 %temp; + mov.b64 {%r281, %temp}, %fd47; + } + and.b32 %r282, %r69, 2147483647; + setp.ne.s32 %p185, %r282, 2146435072; + setp.ne.s32 %p186, %r281, 0; + or.pred %p187, %p185, %p186; + mov.f64 %fd352, %fd351; + @%p187 bra $L__BB0_126; + + setp.ne.s32 %p188, %r64, 1071644672; + and.pred %p189, %p188, %p12; + selp.b32 %r283, %r66, %r65, %p189; + mov.u32 %r284, 0; + mov.b64 %fd352, {%r284, %r283}; + +$L__BB0_126: + setp.eq.f32 %p193, %f112, 0f3F800000; + selp.f64 %fd242, 0d3FF0000000000000, %fd352, %p193; + cvt.f64.f32 %fd243, %f113; + div.rn.f64 %fd244, %fd243, %fd242; + cvt.rn.f32.f64 %f1751, %fd244; + +$L__BB0_127: + mov.f64 %fd306, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r472}, %fd306; + } + and.b32 %r471, %r472, 2146435072; + mov.f32 %f888, 0f47C35000; + min.f32 %f889, %f1751, %f888; + cvt.f64.f32 %fd58, %f889; + min.f32 %f161, %f1750, %f888; + fma.rn.f32 %f1731, %f161, %f126, %f1731; + mul.f32 %f890, %f161, %f127; + cvt.f64.f32 %fd59, %f890; + cvt.f64.f32 %fd60, %f126; + { + .reg .b32 %temp; + mov.b64 {%temp, %r70}, %fd60; + } + abs.f64 %fd61, %fd60; + { // callseq 4, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd61; + .param .b64 param1; + st.param.f64 [param1+0], %fd306; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd353, [retval0+0]; + } // callseq 4 + setp.eq.s32 %p194, %r471, 1062207488; + @%p194 bra $L__BB0_162; + bra.uni $L__BB0_128; + +$L__BB0_162: + setp.gt.s32 %p240, %r70, -1; + @%p240 bra $L__BB0_164; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r326}, %fd353; + } + xor.b32 %r327, %r326, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r328, %temp}, %fd353; + } + mov.b64 %fd353, {%r328, %r327}; + +$L__BB0_164: + setp.eq.f32 %p241, %f126, 0f00000000; + @%p241 bra $L__BB0_168; + bra.uni $L__BB0_165; + +$L__BB0_168: + setp.lt.s32 %p244, %r62, 0; + mov.u32 %r329, 0; + or.b32 %r330, %r70, 2146435072; + selp.b32 %r331, %r330, %r70, %p244; + mov.b64 %fd353, {%r329, %r331}; + bra.uni $L__BB0_169; + +$L__BB0_128: + setp.eq.f32 %p195, %f126, 0f00000000; + @%p195 bra $L__BB0_132; + bra.uni $L__BB0_129; + +$L__BB0_132: + shr.s32 %r501, %r62, 31; + and.b32 %r500, %r501, 2146435072; + mov.u32 %r290, 0; + mov.b64 %fd353, {%r290, %r500}; + bra.uni $L__BB0_133; + +$L__BB0_165: + @%p240 bra $L__BB0_169; + + cvt.rzi.f64.f64 %fd277, %fd223; + setp.eq.f64 %p243, %fd277, 0d4000000000000000; + @%p243 bra $L__BB0_169; + + mov.f64 %fd353, 0dFFF8000000000000; + +$L__BB0_169: + add.f64 %fd95, %fd60, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r332}, %fd95; + } + and.b32 %r333, %r332, 2146435072; + setp.ne.s32 %p245, %r333, 2146435072; + mov.f64 %fd361, %fd353; + @%p245 bra $L__BB0_175; + + setp.gtu.f64 %p246, %fd61, 0d7FF0000000000000; + mov.f64 %fd361, %fd95; + @%p246 bra $L__BB0_175; + + setp.eq.s32 %p247, %r64, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r334, %temp}, %fd223; + } + setp.eq.s32 %p248, %r334, 0; + and.pred %p249, %p247, %p248; + @%p249 bra $L__BB0_174; + bra.uni $L__BB0_172; + +$L__BB0_174: + setp.lt.s32 %p256, %r62, 0; + mov.u32 %r339, 0; + setp.gt.f64 %p257, %fd61, 0d3FF0000000000000; + selp.b32 %r340, 2146435072, 0, %p257; + xor.b32 %r341, %r340, 2146435072; + selp.b32 %r342, %r341, %r340, %p256; + setp.eq.f32 %p258, %f126, 0fBF800000; + selp.b32 %r343, 1072693248, %r342, %p258; + mov.b64 %fd361, {%r339, %r343}; + bra.uni $L__BB0_175; + +$L__BB0_129: + setp.gt.s32 %p196, %r70, -1; + @%p196 bra $L__BB0_133; + + cvt.rzi.f64.f64 %fd247, %fd223; + setp.eq.f64 %p197, %fd247, 0d4000000000000000; + @%p197 bra $L__BB0_133; + + mov.f64 %fd353, 0dFFF8000000000000; + +$L__BB0_133: + add.f64 %fd65, %fd60, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r291}, %fd65; + } + and.b32 %r292, %r291, 2146435072; + setp.ne.s32 %p198, %r292, 2146435072; + mov.f64 %fd354, %fd353; + @%p198 bra $L__BB0_139; + + setp.gtu.f64 %p199, %fd61, 0d7FF0000000000000; + mov.f64 %fd354, %fd65; + @%p199 bra $L__BB0_139; + + setp.eq.s32 %p200, %r64, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r293, %temp}, %fd223; + } + setp.eq.s32 %p201, %r293, 0; + and.pred %p202, %p200, %p201; + @%p202 bra $L__BB0_138; + bra.uni $L__BB0_136; + +$L__BB0_138: + setp.lt.s32 %p206, %r62, 0; + mov.u32 %r297, 0; + setp.gt.f64 %p207, %fd61, 0d3FF0000000000000; + selp.b32 %r298, 2146435072, 0, %p207; + xor.b32 %r299, %r298, 2146435072; + selp.b32 %r300, %r299, %r298, %p206; + setp.eq.f32 %p208, %f126, 0fBF800000; + selp.b32 %r301, 1072693248, %r300, %p208; + mov.b64 %fd354, {%r297, %r301}; + bra.uni $L__BB0_139; + +$L__BB0_172: + { + .reg .b32 %temp; + mov.b64 {%r335, %temp}, %fd60; + } + and.b32 %r336, %r70, 2147483647; + setp.ne.s32 %p250, %r336, 2146435072; + setp.ne.s32 %p251, %r335, 0; + or.pred %p252, %p250, %p251; + mov.f64 %fd361, %fd353; + @%p252 bra $L__BB0_175; + + setp.lt.s32 %p253, %r70, 0; + mov.u32 %r337, 0; + setp.ne.s32 %p254, %r64, 1071644672; + and.pred %p255, %p254, %p253; + selp.b32 %r338, %r66, %r65, %p255; + mov.b64 %fd361, {%r337, %r338}; + +$L__BB0_175: + setp.eq.f32 %p259, %f126, 0f3F800000; + selp.f64 %fd280, 0d3FF0000000000000, %fd361, %p259; + mul.f64 %fd281, %fd280, %fd58; + sub.f64 %fd282, %fd59, %fd281; + cvt.f64.f32 %fd283, %f1735; + add.f64 %fd371, %fd282, %fd283; + cvt.f64.f32 %fd100, %f154; + { + .reg .b32 %temp; + mov.b64 {%temp, %r73}, %fd100; + } + abs.f64 %fd101, %fd100; + { // callseq 7, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd101; + .param .b64 param1; + st.param.f64 [param1+0], %fd223; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd363, [retval0+0]; + } // callseq 7 + setp.gt.s32 %p260, %r73, -1; + @%p260 bra $L__BB0_177; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r344}, %fd363; + } + xor.b32 %r345, %r344, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r346, %temp}, %fd363; + } + mov.b64 %fd363, {%r346, %r345}; + +$L__BB0_177: + setp.eq.f32 %p261, %f154, 0f00000000; + @%p261 bra $L__BB0_181; + bra.uni $L__BB0_178; + +$L__BB0_181: + setp.lt.s32 %p264, %r62, 0; + mov.u32 %r347, 0; + or.b32 %r348, %r73, 2146435072; + selp.b32 %r349, %r348, %r73, %p264; + mov.b64 %fd363, {%r347, %r349}; + bra.uni $L__BB0_182; + +$L__BB0_178: + @%p260 bra $L__BB0_182; + + cvt.rzi.f64.f64 %fd286, %fd223; + setp.eq.f64 %p263, %fd286, 0d4000000000000000; + @%p263 bra $L__BB0_182; + + mov.f64 %fd363, 0dFFF8000000000000; + +$L__BB0_182: + add.f64 %fd107, %fd100, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r350}, %fd107; + } + and.b32 %r351, %r350, 2146435072; + setp.ne.s32 %p265, %r351, 2146435072; + mov.f64 %fd364, %fd363; + @%p265 bra $L__BB0_188; + + setp.gtu.f64 %p266, %fd101, 0d7FF0000000000000; + mov.f64 %fd364, %fd107; + @%p266 bra $L__BB0_188; + + setp.eq.s32 %p267, %r64, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r352, %temp}, %fd223; + } + setp.eq.s32 %p268, %r352, 0; + and.pred %p269, %p267, %p268; + @%p269 bra $L__BB0_187; + bra.uni $L__BB0_185; + +$L__BB0_187: + setp.lt.s32 %p276, %r62, 0; + mov.u32 %r357, 0; + setp.gt.f64 %p277, %fd101, 0d3FF0000000000000; + selp.b32 %r358, 2146435072, 0, %p277; + xor.b32 %r359, %r358, 2146435072; + selp.b32 %r360, %r359, %r358, %p276; + setp.eq.f32 %p278, %f154, 0fBF800000; + selp.b32 %r361, 1072693248, %r360, %p278; + mov.b64 %fd364, {%r357, %r361}; + bra.uni $L__BB0_188; + +$L__BB0_136: + { + .reg .b32 %temp; + mov.b64 {%r294, %temp}, %fd60; + } + and.b32 %r295, %r70, 2147483647; + setp.ne.s32 %p203, %r295, 2146435072; + setp.ne.s32 %p204, %r294, 0; + or.pred %p205, %p203, %p204; + mov.f64 %fd354, %fd353; + @%p205 bra $L__BB0_139; + + mov.u32 %r296, 0; + mov.b64 %fd354, {%r296, %r65}; + +$L__BB0_139: + setp.eq.f32 %p209, %f126, 0f3F800000; + selp.f64 %fd250, 0d3FF0000000000000, %fd354, %p209; + mul.f64 %fd251, %fd250, %fd58; + sub.f64 %fd252, %fd59, %fd251; + cvt.f64.f32 %fd253, %f1735; + add.f64 %fd371, %fd252, %fd253; + cvt.f64.f32 %fd70, %f154; + { + .reg .b32 %temp; + mov.b64 {%temp, %r71}, %fd70; + } + abs.f64 %fd71, %fd70; + setp.eq.f32 %p210, %f154, 0f00000000; + @%p210 bra $L__BB0_143; + bra.uni $L__BB0_140; + +$L__BB0_143: + shr.s32 %r499, %r62, 31; + and.b32 %r498, %r499, 2146435072; + mov.u32 %r302, 0; + mov.b64 %fd355, {%r302, %r498}; + bra.uni $L__BB0_144; + +$L__BB0_140: + { // callseq 5, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd71; + .param .b64 param1; + st.param.f64 [param1+0], %fd223; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd355, [retval0+0]; + } // callseq 5 + setp.gt.s32 %p211, %r71, -1; + @%p211 bra $L__BB0_144; + + cvt.rzi.f64.f64 %fd256, %fd223; + setp.eq.f64 %p212, %fd256, 0d4000000000000000; + @%p212 bra $L__BB0_144; + + mov.f64 %fd355, 0dFFF8000000000000; + +$L__BB0_144: + add.f64 %fd75, %fd70, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r303}, %fd75; + } + and.b32 %r304, %r303, 2146435072; + setp.ne.s32 %p213, %r304, 2146435072; + mov.f64 %fd356, %fd355; + @%p213 bra $L__BB0_150; + + setp.gtu.f64 %p214, %fd71, 0d7FF0000000000000; + mov.f64 %fd356, %fd75; + @%p214 bra $L__BB0_150; + + setp.eq.s32 %p215, %r64, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r305, %temp}, %fd223; + } + setp.eq.s32 %p216, %r305, 0; + and.pred %p217, %p215, %p216; + @%p217 bra $L__BB0_149; + bra.uni $L__BB0_147; + +$L__BB0_149: + setp.lt.s32 %p221, %r62, 0; + mov.u32 %r309, 0; + setp.gt.f64 %p222, %fd71, 0d3FF0000000000000; + selp.b32 %r310, 2146435072, 0, %p222; + xor.b32 %r311, %r310, 2146435072; + selp.b32 %r312, %r311, %r310, %p221; + setp.eq.f32 %p223, %f154, 0fBF800000; + selp.b32 %r313, 1072693248, %r312, %p223; + mov.b64 %fd356, {%r309, %r313}; + bra.uni $L__BB0_150; + +$L__BB0_185: + { + .reg .b32 %temp; + mov.b64 {%r353, %temp}, %fd100; + } + and.b32 %r354, %r73, 2147483647; + setp.ne.s32 %p270, %r354, 2146435072; + setp.ne.s32 %p271, %r353, 0; + or.pred %p272, %p270, %p271; + mov.f64 %fd364, %fd363; + @%p272 bra $L__BB0_188; + + setp.lt.s32 %p273, %r73, 0; + mov.u32 %r355, 0; + setp.ne.s32 %p274, %r64, 1071644672; + and.pred %p275, %p274, %p273; + selp.b32 %r356, %r66, %r65, %p275; + mov.b64 %fd364, {%r355, %r356}; + +$L__BB0_188: + setp.eq.f32 %p279, %f154, 0f3F800000; + selp.f64 %fd289, 0d3FF0000000000000, %fd364, %p279; + mul.f64 %fd290, %fd289, %fd58; + mul.f32 %f893, %f161, %f155; + cvt.f64.f32 %fd291, %f893; + sub.f64 %fd292, %fd291, %fd290; + cvt.f64.f32 %fd293, %f1734; + add.f64 %fd370, %fd292, %fd293; + cvt.f64.f32 %fd112, %f156; + { + .reg .b32 %temp; + mov.b64 {%temp, %r74}, %fd112; + } + abs.f64 %fd113, %fd112; + { // callseq 8, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd113; + .param .b64 param1; + st.param.f64 [param1+0], %fd223; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd366, [retval0+0]; + } // callseq 8 + setp.gt.s32 %p280, %r74, -1; + @%p280 bra $L__BB0_190; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r362}, %fd366; + } + xor.b32 %r363, %r362, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r364, %temp}, %fd366; + } + mov.b64 %fd366, {%r364, %r363}; + +$L__BB0_190: + setp.eq.f32 %p281, %f156, 0f00000000; + @%p281 bra $L__BB0_194; + bra.uni $L__BB0_191; + +$L__BB0_194: + setp.lt.s32 %p284, %r62, 0; + mov.u32 %r365, 0; + or.b32 %r366, %r74, 2146435072; + selp.b32 %r367, %r366, %r74, %p284; + mov.b64 %fd366, {%r365, %r367}; + bra.uni $L__BB0_195; + +$L__BB0_191: + @%p280 bra $L__BB0_195; + + cvt.rzi.f64.f64 %fd296, %fd223; + setp.eq.f64 %p283, %fd296, 0d4000000000000000; + @%p283 bra $L__BB0_195; + + mov.f64 %fd366, 0dFFF8000000000000; + +$L__BB0_195: + add.f64 %fd119, %fd112, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r368}, %fd119; + } + and.b32 %r369, %r368, 2146435072; + setp.ne.s32 %p285, %r369, 2146435072; + mov.f64 %fd367, %fd366; + @%p285 bra $L__BB0_201; + + setp.gtu.f64 %p286, %fd113, 0d7FF0000000000000; + mov.f64 %fd367, %fd119; + @%p286 bra $L__BB0_201; + + setp.eq.s32 %p287, %r64, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r370, %temp}, %fd223; + } + setp.eq.s32 %p288, %r370, 0; + and.pred %p289, %p287, %p288; + @%p289 bra $L__BB0_200; + bra.uni $L__BB0_198; + +$L__BB0_200: + setp.lt.s32 %p296, %r62, 0; + mov.u32 %r375, 0; + setp.gt.f64 %p297, %fd113, 0d3FF0000000000000; + selp.b32 %r376, 2146435072, 0, %p297; + xor.b32 %r377, %r376, 2146435072; + selp.b32 %r378, %r377, %r376, %p296; + setp.eq.f32 %p298, %f156, 0fBF800000; + selp.b32 %r379, 1072693248, %r378, %p298; + mov.b64 %fd367, {%r375, %r379}; + bra.uni $L__BB0_201; + +$L__BB0_147: + { + .reg .b32 %temp; + mov.b64 {%r306, %temp}, %fd70; + } + and.b32 %r307, %r71, 2147483647; + setp.ne.s32 %p218, %r307, 2146435072; + setp.ne.s32 %p219, %r306, 0; + or.pred %p220, %p218, %p219; + mov.f64 %fd356, %fd355; + @%p220 bra $L__BB0_150; + + mov.u32 %r308, 0; + mov.b64 %fd356, {%r308, %r65}; + +$L__BB0_150: + setp.eq.f32 %p224, %f154, 0f3F800000; + selp.f64 %fd259, 0d3FF0000000000000, %fd356, %p224; + mul.f64 %fd260, %fd259, %fd58; + mul.f32 %f891, %f161, %f155; + cvt.f64.f32 %fd261, %f891; + sub.f64 %fd262, %fd261, %fd260; + cvt.f64.f32 %fd263, %f1734; + add.f64 %fd370, %fd262, %fd263; + cvt.f64.f32 %fd80, %f156; + { + .reg .b32 %temp; + mov.b64 {%temp, %r72}, %fd80; + } + abs.f64 %fd81, %fd80; + setp.eq.f32 %p225, %f156, 0f00000000; + @%p225 bra $L__BB0_154; + bra.uni $L__BB0_151; + +$L__BB0_154: + shr.s32 %r497, %r62, 31; + and.b32 %r496, %r497, 2146435072; + mov.u32 %r314, 0; + mov.b64 %fd357, {%r314, %r496}; + bra.uni $L__BB0_155; + +$L__BB0_151: + { // callseq 6, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd81; + .param .b64 param1; + st.param.f64 [param1+0], %fd223; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd357, [retval0+0]; + } // callseq 6 + setp.gt.s32 %p226, %r72, -1; + @%p226 bra $L__BB0_155; + + cvt.rzi.f64.f64 %fd266, %fd223; + setp.eq.f64 %p227, %fd266, 0d4000000000000000; + @%p227 bra $L__BB0_155; + + mov.f64 %fd357, 0dFFF8000000000000; + +$L__BB0_155: + add.f64 %fd85, %fd80, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r315}, %fd85; + } + and.b32 %r316, %r315, 2146435072; + setp.ne.s32 %p228, %r316, 2146435072; + mov.f64 %fd358, %fd357; + @%p228 bra $L__BB0_161; + + setp.gtu.f64 %p229, %fd81, 0d7FF0000000000000; + mov.f64 %fd358, %fd85; + @%p229 bra $L__BB0_161; + + setp.eq.s32 %p230, %r64, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r317, %temp}, %fd223; + } + setp.eq.s32 %p231, %r317, 0; + and.pred %p232, %p230, %p231; + @%p232 bra $L__BB0_160; + bra.uni $L__BB0_158; + +$L__BB0_160: + setp.lt.s32 %p236, %r62, 0; + mov.u32 %r321, 0; + setp.gt.f64 %p237, %fd81, 0d3FF0000000000000; + selp.b32 %r322, 2146435072, 0, %p237; + xor.b32 %r323, %r322, 2146435072; + selp.b32 %r324, %r323, %r322, %p236; + setp.eq.f32 %p238, %f156, 0fBF800000; + selp.b32 %r325, 1072693248, %r324, %p238; + mov.b64 %fd358, {%r321, %r325}; + bra.uni $L__BB0_161; + +$L__BB0_198: + { + .reg .b32 %temp; + mov.b64 {%r371, %temp}, %fd112; + } + and.b32 %r372, %r74, 2147483647; + setp.ne.s32 %p290, %r372, 2146435072; + setp.ne.s32 %p291, %r371, 0; + or.pred %p292, %p290, %p291; + mov.f64 %fd367, %fd366; + @%p292 bra $L__BB0_201; + + setp.lt.s32 %p293, %r74, 0; + mov.u32 %r373, 0; + setp.ne.s32 %p294, %r64, 1071644672; + and.pred %p295, %p294, %p293; + selp.b32 %r374, %r66, %r65, %p295; + mov.b64 %fd367, {%r373, %r374}; + +$L__BB0_201: + mul.f32 %f894, %f161, 0f00000000; + cvt.f64.f32 %fd299, %f894; + setp.eq.f32 %p299, %f156, 0f3F800000; + selp.f64 %fd300, 0d3FF0000000000000, %fd367, %p299; + mul.f64 %fd301, %fd300, %fd58; + sub.f64 %fd302, %fd299, %fd301; + cvt.f64.f32 %fd303, %f1733; + add.f64 %fd369, %fd302, %fd303; + cvt.f64.f32 %fd304, %f1732; + sub.f64 %fd305, %fd299, %fd58; + add.f64 %fd368, %fd305, %fd304; + bra.uni $L__BB0_202; + +$L__BB0_158: + { + .reg .b32 %temp; + mov.b64 {%r318, %temp}, %fd80; + } + and.b32 %r319, %r72, 2147483647; + setp.ne.s32 %p233, %r319, 2146435072; + setp.ne.s32 %p234, %r318, 0; + or.pred %p235, %p233, %p234; + mov.f64 %fd358, %fd357; + @%p235 bra $L__BB0_161; + + mov.u32 %r320, 0; + mov.b64 %fd358, {%r320, %r65}; + +$L__BB0_161: + mul.f32 %f892, %f161, 0f00000000; + cvt.f64.f32 %fd269, %f892; + setp.eq.f32 %p239, %f156, 0f3F800000; + selp.f64 %fd270, 0d3FF0000000000000, %fd358, %p239; + mul.f64 %fd271, %fd270, %fd58; + sub.f64 %fd272, %fd269, %fd271; + cvt.f64.f32 %fd273, %f1733; + add.f64 %fd369, %fd272, %fd273; + cvt.f64.f32 %fd274, %f1732; + sub.f64 %fd275, %fd269, %fd58; + add.f64 %fd368, %fd275, %fd274; + +$L__BB0_202: + cvt.rn.f32.f64 %f1735, %fd371; + cvt.rn.f32.f64 %f1734, %fd370; + cvt.rn.f32.f64 %f1733, %fd369; + cvt.rn.f32.f64 %f1732, %fd368; + fma.rn.f32 %f1730, %f161, %f154, %f1730; + fma.rn.f32 %f1729, %f161, %f156, %f1729; + add.f32 %f1728, %f1728, %f161; + add.s32 %r536, %r536, 1; + setp.lt.s32 %p300, %r536, %r86; + @%p300 bra $L__BB0_56; + + add.s32 %r535, %r535, 1; + setp.lt.s32 %p301, %r535, %r86; + @%p301 bra $L__BB0_55; + +$L__BB0_204: + ld.param.u32 %r473, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_3]; + div.rn.f32 %f895, %f1731, %f1735; + mov.f32 %f896, 0fBF800000; + max.f32 %f897, %f895, %f896; + mov.f32 %f898, 0f3F800000; + min.f32 %f899, %f897, %f898; + sub.f32 %f1763, %f1763, %f899; + div.rn.f32 %f900, %f1730, %f1734; + max.f32 %f901, %f900, %f896; + min.f32 %f902, %f901, %f898; + sub.f32 %f1762, %f1762, %f902; + neg.f32 %f903, %f1761; + div.rn.f32 %f904, %f1729, %f1733; + max.f32 %f905, %f904, %f903; + min.f32 %f906, %f905, %f1761; + sub.f32 %f907, %f1761, %f906; + neg.f32 %f908, %f1760; + div.rn.f32 %f909, %f1728, %f1732; + max.f32 %f910, %f909, %f908; + min.f32 %f911, %f910, %f1760; + sub.f32 %f912, %f1760, %f911; + max.f32 %f1761, %f907, %f898; + mov.f32 %f913, 0f3C23D70A; + max.f32 %f1760, %f912, %f913; + add.s32 %r534, %r534, 1; + setp.lt.s32 %p302, %r534, %r473; + @%p302 bra $L__BB0_53; + +$L__BB0_205: + mov.f32 %f924, 0f00000000; + mov.f32 %f1775, %f924; + mov.f32 %f1776, %f924; + mov.f32 %f1777, %f924; + mov.f32 %f1780, %f924; + mov.f32 %f1778, %f924; + mov.f32 %f1779, %f924; + mov.f32 %f1781, %f924; + mov.f32 %f1782, %f924; + mov.f32 %f1783, %f924; + mov.f32 %f1784, %f924; + mov.f32 %f1802, %f924; + @%p20 bra $L__BB0_260; + + div.rn.f32 %f936, %f1761, 0fC0206C98; + div.rn.f32 %f186, %f936, %f317; + sqrt.rn.f32 %f187, %f34; + mov.f32 %f937, 0f3F800000; + cvt.rzi.f32.f32 %f938, %f937; + add.f32 %f939, %f938, %f938; + mov.f32 %f940, 0f40000000; + sub.f32 %f941, %f940, %f939; + abs.f32 %f188, %f941; + mov.u32 %r380, 0; + setp.eq.f32 %p311, %f188, 0f3F800000; + mov.u32 %r537, %r380; + +$L__BB0_207: + cvt.rn.f32.s32 %f942, %r537; + sub.f32 %f943, %f942, %f1763; + add.f32 %f944, %f943, 0f3F000000; + mul.f32 %f945, %f187, %f944; + abs.f32 %f200, %f945; + setp.ge.f32 %p304, %f200, 0f3F8060FE; + mul.f32 %f946, %f945, %f945; + selp.f32 %f947, %f200, %f946, %p304; + selp.f32 %f948, 0f3789CA3C, 0f38B1E96A, %p304; + selp.f32 %f949, 0fB9F560B9, 0fBA574D20, %p304; + fma.rn.f32 %f950, %f948, %f947, %f949; + selp.f32 %f951, 0f3BAC840B, 0f3BAAD5EA, %p304; + fma.rn.f32 %f952, %f950, %f947, %f951; + selp.f32 %f953, 0fBD0C8162, 0fBCDC1BE7, %p304; + fma.rn.f32 %f954, %f952, %f947, %f953; + selp.f32 %f955, 0f3E1CF906, 0f3DE718AF, %p304; + fma.rn.f32 %f956, %f954, %f947, %f955; + selp.f32 %f957, 0f3F6A937E, 0fBEC093AC, %p304; + fma.rn.f32 %f958, %f956, %f947, %f957; + selp.f32 %f959, 0f3F20D842, 0f3E0375D3, %p304; + fma.rn.f32 %f960, %f958, %f947, %f959; + neg.f32 %f961, %f200; + selp.f32 %f962, %f961, %f945, %p304; + fma.rn.f32 %f201, %f960, %f962, %f962; + mov.b32 %r382, %f945; + and.b32 %r79, %r382, -2147483648; + add.f32 %f963, %f943, 0fBF000000; + mul.f32 %f964, %f187, %f963; + abs.f32 %f202, %f964; + setp.ge.f32 %p305, %f202, 0f3F8060FE; + mul.f32 %f965, %f964, %f964; + selp.f32 %f966, %f202, %f965, %p305; + selp.f32 %f967, 0f3789CA3C, 0f38B1E96A, %p305; + selp.f32 %f968, 0fB9F560B9, 0fBA574D20, %p305; + fma.rn.f32 %f969, %f967, %f966, %f968; + selp.f32 %f970, 0f3BAC840B, 0f3BAAD5EA, %p305; + fma.rn.f32 %f971, %f969, %f966, %f970; + selp.f32 %f972, 0fBD0C8162, 0fBCDC1BE7, %p305; + fma.rn.f32 %f973, %f971, %f966, %f972; + selp.f32 %f974, 0f3E1CF906, 0f3DE718AF, %p305; + fma.rn.f32 %f975, %f973, %f966, %f974; + selp.f32 %f976, 0f3F6A937E, 0fBEC093AC, %p305; + fma.rn.f32 %f977, %f975, %f966, %f976; + selp.f32 %f978, 0f3F20D842, 0f3E0375D3, %p305; + fma.rn.f32 %f979, %f977, %f966, %f978; + neg.f32 %f980, %f202; + selp.f32 %f981, %f980, %f964, %p305; + fma.rn.f32 %f203, %f979, %f981, %f981; + mov.b32 %r383, %f964; + and.b32 %r80, %r383, -2147483648; + add.f32 %f982, %f942, 0f3F000000; + sub.f32 %f983, %f982, %f1763; + div.rn.f32 %f204, %f983, %f317; + abs.f32 %f205, %f204; + setp.lt.f32 %p306, %f205, 0f00800000; + mul.f32 %f984, %f205, 0f4B800000; + selp.f32 %f985, %f984, %f205, %p306; + selp.f32 %f986, 0fC3170000, 0fC2FE0000, %p306; + mov.b32 %r384, %f985; + and.b32 %r385, %r384, 8388607; + or.b32 %r386, %r385, 1065353216; + mov.b32 %f987, %r386; + shr.u32 %r387, %r384, 23; + cvt.rn.f32.u32 %f988, %r387; + add.f32 %f989, %f986, %f988; + setp.gt.f32 %p307, %f987, 0f3FB504F3; + mul.f32 %f990, %f987, 0f3F000000; + add.f32 %f991, %f989, 0f3F800000; + selp.f32 %f992, %f991, %f989, %p307; + selp.f32 %f993, %f990, %f987, %p307; + add.f32 %f994, %f993, 0fBF800000; + add.f32 %f995, %f993, 0f3F800000; + rcp.approx.ftz.f32 %f996, %f995; + add.f32 %f997, %f994, %f994; + mul.f32 %f999, %f997, %f996; + mul.f32 %f1000, %f999, %f999; + mov.f32 %f1001, 0f3C4CAF63; + mov.f32 %f1002, 0f3B18F0FE; + fma.rn.f32 %f1003, %f1002, %f1000, %f1001; + mov.f32 %f1004, 0f3DAAAABD; + fma.rn.f32 %f1005, %f1003, %f1000, %f1004; + mul.rn.f32 %f1006, %f1005, %f1000; + mul.rn.f32 %f1007, %f1006, %f999; + sub.f32 %f1008, %f994, %f999; + add.f32 %f1009, %f1008, %f1008; + neg.f32 %f1010, %f999; + fma.rn.f32 %f1011, %f1010, %f994, %f1009; + mul.rn.f32 %f1012, %f996, %f1011; + add.f32 %f1013, %f1007, %f999; + sub.f32 %f1014, %f999, %f1013; add.f32 %f1015, %f1007, %f1014; - add.f32 %f1016, %f1011, %f1015; - add.f32 %f1017, %f1012, %f1016; - sub.f32 %f1018, %f1012, %f1017; + add.f32 %f1016, %f1012, %f1015; + add.f32 %f1017, %f1013, %f1016; + sub.f32 %f1018, %f1013, %f1017; add.f32 %f1019, %f1016, %f1018; - mul.rn.f32 %f1021, %f1942, %f1017; - neg.f32 %f1022, %f1021; - fma.rn.f32 %f1023, %f1942, %f1017, %f1022; - fma.rn.f32 %f1024, %f1942, %f1019, %f1023; - fma.rn.f32 %f1026, %f1948, %f1017, %f1024; - add.rn.f32 %f1027, %f1021, %f1026; - neg.f32 %f1028, %f1027; - add.rn.f32 %f1029, %f1021, %f1028; - add.rn.f32 %f1030, %f1029, %f1026; - mov.b32 %r148, %f1027; - setp.eq.s32 %p97, %r148, 1118925336; - add.s32 %r149, %r148, -1; - mov.b32 %f1031, %r149; - add.f32 %f1032, %f1030, 0f37000000; - selp.f32 %f1033, %f1031, %f1027, %p97; - selp.f32 %f177, %f1032, %f1030, %p97; - mul.f32 %f1034, %f1033, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1035, %f1034; - fma.rn.f32 %f1036, %f1035, %f1943, %f1033; - fma.rn.f32 %f1037, %f1035, %f1944, %f1036; - mul.f32 %f1038, %f1037, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1039, %f1038; - add.f32 %f1040, %f1035, 0f00000000; - ex2.approx.f32 %f1041, %f1040; - mul.f32 %f1042, %f1039, %f1041; - setp.lt.f32 %p98, %f1033, 0fC2D20000; - selp.f32 %f1043, 0f00000000, %f1042, %p98; - setp.gt.f32 %p99, %f1033, 0f42D20000; - selp.f32 %f2114, 0f7F800000, %f1043, %p99; - setp.eq.f32 %p100, %f2114, 0f7F800000; - @%p100 bra BB0_62; - - fma.rn.f32 %f2114, %f2114, %f177, %f2114; - -BB0_62: - setp.lt.f32 %p101, %f175, 0f00000000; - and.pred %p5, %p101, %p54; - mov.b32 %r150, %f2114; - xor.b32 %r151, %r150, -2147483648; - mov.b32 %f1044, %r151; - selp.f32 %f2116, %f1044, %f2114, %p5; - setp.eq.f32 %p103, %f175, 0f00000000; - @%p103 bra BB0_65; - bra.uni BB0_63; - -BB0_65: - add.f32 %f1047, %f175, %f175; - selp.f32 %f2116, %f1047, 0f00000000, %p54; - bra.uni BB0_66; - -BB0_63: - setp.geu.f32 %p104, %f175, 0f00000000; - @%p104 bra BB0_66; - - mov.f32 %f1964, 0f40000000; - cvt.rzi.f32.f32 %f1046, %f1964; - setp.neu.f32 %p105, %f1046, 0f40000000; - selp.f32 %f2116, 0f7FFFFFFF, %f2116, %p105; - -BB0_66: - abs.f32 %f1905, %f175; - add.f32 %f1048, %f1905, 0f40000000; - mov.b32 %r152, %f1048; - setp.lt.s32 %p107, %r152, 2139095040; - @%p107 bra BB0_71; - - abs.f32 %f1962, %f175; - setp.gtu.f32 %p108, %f1962, 0f7F800000; - @%p108 bra BB0_70; - bra.uni BB0_68; - -BB0_70: - add.f32 %f2116, %f175, 0f40000000; - bra.uni BB0_71; - -BB0_68: - abs.f32 %f1963, %f175; - setp.neu.f32 %p109, %f1963, 0f7F800000; - @%p109 bra BB0_71; - - selp.f32 %f2116, 0fFF800000, 0f7F800000, %p5; - -BB0_71: - mov.f32 %f1916, 0f35BFBE8E; - mov.f32 %f1915, 0f3F317200; - mov.f32 %f1914, 0f00000000; - mov.f32 %f1913, 0f3DAAAABD; - mov.f32 %f1912, 0f3C4CAF63; - mov.f32 %f1911, 0f3B18F0FE; - mov.f32 %f1910, 0fB5BFBE8E; - mov.f32 %f1909, 0fBF317200; - mov.f32 %f1908, 0f40000000; - cvt.rn.f32.s32 %f1907, %r261; - sub.f32 %f1906, %f1907, %f2132; - mul.f32 %f1051, %f2116, 0fBF000000; - setp.eq.f32 %p110, %f175, 0f3F800000; - selp.f32 %f1052, 0fBF000000, %f1051, %p110; - mul.f32 %f1053, %f1052, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1054, %f1053; - fma.rn.f32 %f1056, %f1054, %f1909, %f1052; - fma.rn.f32 %f1058, %f1054, %f1910, %f1056; - mul.f32 %f1059, %f1058, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1060, %f1059; - add.f32 %f1061, %f1054, 0f00000000; - ex2.approx.f32 %f1062, %f1061; - mul.f32 %f1063, %f1060, %f1062; - setp.lt.f32 %p111, %f1052, 0fC2D20000; - selp.f32 %f1064, 0f00000000, %f1063, %p111; - setp.gt.f32 %p112, %f1052, 0f42D20000; - selp.f32 %f188, 0f7F800000, %f1064, %p112; - div.rn.f32 %f189, %f1906, %f394; - abs.f32 %f190, %f189; - setp.lt.f32 %p113, %f190, 0f00800000; - mul.f32 %f1065, %f190, 0f4B800000; - selp.f32 %f1066, 0fC3170000, 0fC2FE0000, %p113; - selp.f32 %f1067, %f1065, %f190, %p113; - mov.b32 %r153, %f1067; - and.b32 %r154, %r153, 8388607; - or.b32 %r155, %r154, 1065353216; - mov.b32 %f1068, %r155; - shr.u32 %r156, %r153, 23; - cvt.rn.f32.u32 %f1069, %r156; - add.f32 %f1070, %f1066, %f1069; - setp.gt.f32 %p114, %f1068, 0f3FB504F3; - mul.f32 %f1071, %f1068, 0f3F000000; + mov.f32 %f1020, 0f3F317200; + mul.rn.f32 %f1021, %f992, %f1020; + mov.f32 %f1022, 0f35BFBE8E; + mul.rn.f32 %f1023, %f992, %f1022; + add.f32 %f1024, %f1021, %f1017; + sub.f32 %f1025, %f1021, %f1024; + add.f32 %f1026, %f1017, %f1025; + add.f32 %f1027, %f1019, %f1026; + add.f32 %f1028, %f1023, %f1027; + add.f32 %f1029, %f1024, %f1028; + sub.f32 %f1030, %f1024, %f1029; + add.f32 %f1031, %f1028, %f1030; + mul.rn.f32 %f1032, %f940, %f1029; + neg.f32 %f1033, %f1032; + fma.rn.f32 %f1034, %f940, %f1029, %f1033; + fma.rn.f32 %f1035, %f940, %f1031, %f1034; + fma.rn.f32 %f1037, %f924, %f1029, %f1035; + add.rn.f32 %f1038, %f1032, %f1037; + neg.f32 %f1039, %f1038; + add.rn.f32 %f1040, %f1032, %f1039; + add.rn.f32 %f1041, %f1040, %f1037; + mov.b32 %r388, %f1038; + setp.eq.s32 %p308, %r388, 1118925336; + add.s32 %r389, %r388, -1; + mov.b32 %f1042, %r389; + add.f32 %f1043, %f1041, 0f37000000; + selp.f32 %f206, %f1043, %f1041, %p308; + selp.f32 %f1044, %f1042, %f1038, %p308; + mov.f32 %f1045, 0f3FB8AA3B; + mul.rn.f32 %f1046, %f1044, %f1045; + cvt.rzi.f32.f32 %f1047, %f1046; + abs.f32 %f1048, %f1047; + setp.gt.f32 %p309, %f1048, 0f42FC0000; + mov.b32 %r390, %f1047; + and.b32 %r391, %r390, -2147483648; + or.b32 %r392, %r391, 1123811328; + mov.b32 %f1049, %r392; + selp.f32 %f1050, %f1049, %f1047, %p309; + mov.f32 %f1051, 0fBF317218; + fma.rn.f32 %f1052, %f1050, %f1051, %f1044; + mov.f32 %f1053, 0f3102E308; + fma.rn.f32 %f1054, %f1050, %f1053, %f1052; + mul.f32 %f1055, %f1054, 0f3FB8AA3B; + add.f32 %f1056, %f1050, 0f4B40007F; + mov.b32 %r393, %f1056; + shl.b32 %r394, %r393, 23; + mov.b32 %f1057, %r394; + ex2.approx.ftz.f32 %f1058, %f1055; + mul.f32 %f207, %f1058, %f1057; + setp.lt.f32 %p310, %f204, 0f00000000; + and.pred %p13, %p310, %p311; + add.f32 %f1059, %f204, %f204; + selp.f32 %f208, %f1059, 0f00000000, %p311; + add.f32 %f1060, %f205, 0f40000000; + mov.b32 %r81, %f1060; + div.rn.f32 %f209, %f963, %f317; + abs.f32 %f210, %f209; + setp.lt.f32 %p312, %f210, 0f00800000; + mul.f32 %f1061, %f210, 0f4B800000; + selp.f32 %f1062, %f1061, %f210, %p312; + selp.f32 %f1063, 0fC3170000, 0fC2FE0000, %p312; + mov.b32 %r395, %f1062; + and.b32 %r396, %r395, 8388607; + or.b32 %r397, %r396, 1065353216; + mov.b32 %f1064, %r397; + shr.u32 %r398, %r395, 23; + cvt.rn.f32.u32 %f1065, %r398; + add.f32 %f1066, %f1063, %f1065; + setp.gt.f32 %p313, %f1064, 0f3FB504F3; + mul.f32 %f1067, %f1064, 0f3F000000; + add.f32 %f1068, %f1066, 0f3F800000; + selp.f32 %f1069, %f1068, %f1066, %p313; + selp.f32 %f1070, %f1067, %f1064, %p313; + add.f32 %f1071, %f1070, 0fBF800000; add.f32 %f1072, %f1070, 0f3F800000; - selp.f32 %f1073, %f1071, %f1068, %p114; - selp.f32 %f1074, %f1072, %f1070, %p114; - add.f32 %f1075, %f1073, 0fBF800000; - add.f32 %f1050, %f1073, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1049,%f1050; - // inline asm - add.f32 %f1076, %f1075, %f1075; - mul.f32 %f1077, %f1049, %f1076; - mul.f32 %f1078, %f1077, %f1077; - fma.rn.f32 %f1081, %f1911, %f1078, %f1912; - fma.rn.f32 %f1083, %f1081, %f1078, %f1913; - mul.rn.f32 %f1084, %f1083, %f1078; - mul.rn.f32 %f1085, %f1084, %f1077; - sub.f32 %f1086, %f1075, %f1077; - neg.f32 %f1087, %f1077; - add.f32 %f1088, %f1086, %f1086; - fma.rn.f32 %f1089, %f1087, %f1075, %f1088; - mul.rn.f32 %f1090, %f1049, %f1089; - add.f32 %f1091, %f1085, %f1077; - sub.f32 %f1092, %f1077, %f1091; - add.f32 %f1093, %f1085, %f1092; - add.f32 %f1094, %f1090, %f1093; - add.f32 %f1095, %f1091, %f1094; - sub.f32 %f1096, %f1091, %f1095; - add.f32 %f1097, %f1094, %f1096; - mul.rn.f32 %f1099, %f1074, %f1915; - mul.rn.f32 %f1101, %f1074, %f1916; - add.f32 %f1102, %f1099, %f1095; - sub.f32 %f1103, %f1099, %f1102; - add.f32 %f1104, %f1095, %f1103; - add.f32 %f1105, %f1097, %f1104; - add.f32 %f1106, %f1101, %f1105; - add.f32 %f1107, %f1102, %f1106; - sub.f32 %f1108, %f1102, %f1107; - add.f32 %f1109, %f1106, %f1108; - mul.rn.f32 %f1111, %f1908, %f1107; - neg.f32 %f1112, %f1111; - fma.rn.f32 %f1113, %f1908, %f1107, %f1112; - fma.rn.f32 %f1114, %f1908, %f1109, %f1113; - fma.rn.f32 %f1116, %f1914, %f1107, %f1114; - add.rn.f32 %f1117, %f1111, %f1116; - neg.f32 %f1118, %f1117; - add.rn.f32 %f1119, %f1111, %f1118; - add.rn.f32 %f1120, %f1119, %f1116; - mov.b32 %r157, %f1117; - setp.eq.s32 %p115, %r157, 1118925336; - add.s32 %r158, %r157, -1; - mov.b32 %f1121, %r158; - add.f32 %f1122, %f1120, 0f37000000; - selp.f32 %f1123, %f1121, %f1117, %p115; - selp.f32 %f191, %f1122, %f1120, %p115; - mul.f32 %f1124, %f1123, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1125, %f1124; - fma.rn.f32 %f1126, %f1125, %f1909, %f1123; - fma.rn.f32 %f1127, %f1125, %f1910, %f1126; - mul.f32 %f1128, %f1127, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1129, %f1128; - add.f32 %f1130, %f1125, 0f00000000; - ex2.approx.f32 %f1131, %f1130; - mul.f32 %f1132, %f1129, %f1131; - setp.lt.f32 %p116, %f1123, 0fC2D20000; - selp.f32 %f1133, 0f00000000, %f1132, %p116; - setp.gt.f32 %p117, %f1123, 0f42D20000; - selp.f32 %f2117, 0f7F800000, %f1133, %p117; - setp.eq.f32 %p118, %f2117, 0f7F800000; - @%p118 bra BB0_73; - - fma.rn.f32 %f2117, %f2117, %f191, %f2117; - -BB0_73: - setp.lt.f32 %p119, %f189, 0f00000000; - and.pred %p6, %p119, %p54; - mov.b32 %r159, %f2117; - xor.b32 %r160, %r159, -2147483648; - mov.b32 %f1134, %r160; - selp.f32 %f2119, %f1134, %f2117, %p6; - setp.eq.f32 %p121, %f189, 0f00000000; - @%p121 bra BB0_76; - bra.uni BB0_74; - -BB0_76: - add.f32 %f1137, %f189, %f189; - selp.f32 %f2119, %f1137, 0f00000000, %p54; - bra.uni BB0_77; - -BB0_74: - setp.geu.f32 %p122, %f189, 0f00000000; - @%p122 bra BB0_77; - - mov.f32 %f1961, 0f40000000; - cvt.rzi.f32.f32 %f1136, %f1961; - setp.neu.f32 %p123, %f1136, 0f40000000; - selp.f32 %f2119, 0f7FFFFFFF, %f2119, %p123; - -BB0_77: - abs.f32 %f1965, %f189; - add.f32 %f1138, %f1965, 0f40000000; - mov.b32 %r161, %f1138; - setp.lt.s32 %p125, %r161, 2139095040; - @%p125 bra BB0_82; - - abs.f32 %f1969, %f189; - setp.gtu.f32 %p126, %f1969, 0f7F800000; - @%p126 bra BB0_81; - bra.uni BB0_79; - -BB0_81: - add.f32 %f2119, %f189, 0f40000000; - bra.uni BB0_82; - -BB0_79: - abs.f32 %f1970, %f189; - setp.neu.f32 %p127, %f1970, 0f7F800000; - @%p127 bra BB0_82; - - selp.f32 %f2119, 0fFF800000, 0f7F800000, %p6; - -BB0_82: - cvt.rn.f32.s32 %f1923, %r261; - add.f32 %f1922, %f1923, 0f3F800000; - sub.f32 %f1921, %f1922, %f2132; - mov.f32 %f2120, 0f00000000; - mov.f32 %f1919, 0fB5BFBE8E; - mov.f32 %f1918, 0fBF317200; - sub.f32 %f1917, %f1923, %f2132; - mul.f32 %f1140, %f2119, 0fBF000000; - setp.eq.f32 %p128, %f189, 0f3F800000; - selp.f32 %f1141, 0fBF000000, %f1140, %p128; - mul.f32 %f1142, %f1141, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1143, %f1142; - fma.rn.f32 %f1145, %f1143, %f1918, %f1141; - fma.rn.f32 %f1147, %f1143, %f1919, %f1145; - mul.f32 %f1148, %f1147, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1149, %f1148; - add.f32 %f1150, %f1143, 0f00000000; - ex2.approx.f32 %f1151, %f1150; - mul.f32 %f1152, %f1149, %f1151; - setp.lt.f32 %p129, %f1141, 0fC2D20000; - selp.f32 %f1153, 0f00000000, %f1152, %p129; - setp.gt.f32 %p130, %f1141, 0f42D20000; - selp.f32 %f1154, 0f7F800000, %f1153, %p130; - sub.f32 %f1155, %f188, %f1154; - mul.f32 %f1156, %f92, %f1155; - mul.f32 %f202, %f139, %f1156; - mul.f32 %f1157, %f1917, %f1154; - mul.f32 %f1158, %f1921, %f188; - sub.f32 %f1159, %f1158, %f1157; - mul.f32 %f1160, %f93, %f1159; - mul.f32 %f203, %f139, %f1160; - mul.f32 %f204, %f139, %f152; - setp.leu.f32 %p131, %f153, 0f3C23D70A; - @%p131 bra BB0_84; - - div.rn.f32 %f1161, %f154, %f153; - add.f32 %f2120, %f1161, 0fBF800000; - -BB0_84: - mov.f32 %f2121, 0f00000000; - @%p131 bra BB0_86; - - mul.f32 %f1163, %f153, %f153; - div.rn.f32 %f2121, %f154, %f1163; - -BB0_86: - mov.f32 %f1164, 0f47C35000; - min.f32 %f1165, %f2120, %f1164; - fma.rn.f32 %f2101, %f1165, %f172, %f2101; - mul.f32 %f1166, %f1165, %f173; - mul.f32 %f1167, %f172, %f172; - min.f32 %f1168, %f2121, %f1164; - mul.f32 %f1169, %f1168, %f1167; - sub.f32 %f1170, %f1166, %f1169; - add.f32 %f2105, %f1170, %f2105; - fma.rn.f32 %f2100, %f1165, %f202, %f2100; - mul.f32 %f1171, %f1165, %f203; - mul.f32 %f1172, %f202, %f202; - mul.f32 %f1173, %f1168, %f1172; - sub.f32 %f1174, %f1171, %f1173; - add.f32 %f2104, %f1174, %f2104; - fma.rn.f32 %f2099, %f1165, %f204, %f2099; - mul.f32 %f1175, %f1165, 0f00000000; - mul.f32 %f1176, %f204, %f204; - mul.f32 %f1177, %f1168, %f1176; - sub.f32 %f1178, %f1175, %f1177; - add.f32 %f2103, %f1178, %f2103; - add.f32 %f2098, %f2098, %f1165; - sub.f32 %f1179, %f1175, %f1168; - add.f32 %f2102, %f1179, %f2102; - add.s32 %r261, %r261, 1; - setp.lt.s32 %p133, %r261, %r53; - @%p133 bra BB0_40; - - add.s32 %r260, %r260, 1; - setp.lt.s32 %p134, %r260, %r53; - @%p134 bra BB0_39; - -BB0_88: - div.rn.f32 %f1180, %f2101, %f2105; - mov.f32 %f1181, 0fBF800000; - max.f32 %f1182, %f1180, %f1181; - mov.f32 %f1183, 0f3F800000; - min.f32 %f1184, %f1182, %f1183; - sub.f32 %f2133, %f2133, %f1184; - div.rn.f32 %f1185, %f2100, %f2104; - max.f32 %f1186, %f1185, %f1181; - min.f32 %f1187, %f1186, %f1183; - sub.f32 %f2132, %f2132, %f1187; - neg.f32 %f1188, %f2131; - div.rn.f32 %f1189, %f2099, %f2103; - max.f32 %f1190, %f1189, %f1188; - min.f32 %f1191, %f1190, %f2131; - sub.f32 %f1192, %f2131, %f1191; - neg.f32 %f1193, %f2064; - div.rn.f32 %f1194, %f2098, %f2102; - max.f32 %f1195, %f1194, %f1193; - min.f32 %f1196, %f1195, %f2064; - sub.f32 %f1197, %f2064, %f1196; - max.f32 %f2131, %f1192, %f1183; - mov.f32 %f1198, 0f3C23D70A; - max.f32 %f2064, %f1197, %f1198; - add.s32 %r259, %r259, 1; - setp.lt.s32 %p135, %r259, %r54; - @%p135 bra BB0_37; - -BB0_89: - add.s64 %rd3, %rd2, 4; - mov.f32 %f2162, 0f00000000; - @%p14 bra BB0_144; - - mov.u32 %r262, 0; - div.rn.f32 %f1201, %f2131, 0fC0206C98; - div.rn.f32 %f233, %f1201, %f394; - mov.f32 %f2162, 0f00000000; - -BB0_91: - mov.u32 %r263, 0; - cvt.rn.f32.s32 %f1202, %r262; - sub.f32 %f1203, %f1202, %f2133; - add.f32 %f1204, %f1203, 0f3F800000; - sqrt.rn.f32 %f235, %f40; - mul.f32 %f236, %f1204, %f235; - abs.f32 %f237, %f236; - mul.f32 %f238, %f236, %f236; - mul.f32 %f239, %f1203, %f235; - abs.f32 %f240, %f239; - add.f32 %f1205, %f1202, 0f3F800000; - sub.f32 %f1206, %f1205, %f2133; - div.rn.f32 %f242, %f1206, %f394; - mov.f32 %f1207, 0f3F800000; - cvt.rzi.f32.f32 %f1208, %f1207; - add.f32 %f1209, %f1208, %f1208; - mov.f32 %f1210, 0f40000000; - sub.f32 %f1211, %f1210, %f1209; - abs.f32 %f243, %f1211; - setp.eq.f32 %p137, %f243, 0f3F800000; - abs.f32 %f244, %f242; - setp.lt.f32 %p138, %f244, 0f00800000; - mul.f32 %f1212, %f244, 0f4B800000; - selp.f32 %f1213, 0fC3170000, 0fC2FE0000, %p138; - selp.f32 %f1214, %f1212, %f244, %p138; - mov.b32 %r164, %f1214; - and.b32 %r165, %r164, 8388607; - or.b32 %r166, %r165, 1065353216; - mov.b32 %f1215, %r166; - shr.u32 %r167, %r164, 23; - cvt.rn.f32.u32 %f1216, %r167; - add.f32 %f1217, %f1213, %f1216; - setp.gt.f32 %p139, %f1215, 0f3FB504F3; - mul.f32 %f1218, %f1215, 0f3F000000; - add.f32 %f1219, %f1217, 0f3F800000; - selp.f32 %f1220, %f1218, %f1215, %p139; - selp.f32 %f1221, %f1219, %f1217, %p139; - add.f32 %f245, %f1220, 0fBF800000; - add.f32 %f246, %f1220, 0f3F800000; - add.f32 %f247, %f245, %f245; - mov.f32 %f1222, 0f3F317200; - mul.rn.f32 %f248, %f1221, %f1222; - mov.f32 %f1223, 0f35BFBE8E; - mul.rn.f32 %f249, %f1221, %f1223; - setp.lt.f32 %p140, %f242, 0f00000000; - and.pred %p7, %p140, %p137; - add.f32 %f1224, %f242, %f242; - selp.f32 %f250, %f1224, 0f00000000, %p137; - div.rn.f32 %f253, %f1203, %f394; - abs.f32 %f254, %f253; - setp.lt.f32 %p141, %f254, 0f00800000; - mul.f32 %f1226, %f254, 0f4B800000; - selp.f32 %f1227, 0fC3170000, 0fC2FE0000, %p141; - selp.f32 %f1228, %f1226, %f254, %p141; - mov.b32 %r168, %f1228; - and.b32 %r169, %r168, 8388607; - or.b32 %r170, %r169, 1065353216; - mov.b32 %f1229, %r170; - shr.u32 %r171, %r168, 23; - cvt.rn.f32.u32 %f1230, %r171; - add.f32 %f1231, %f1227, %f1230; - setp.gt.f32 %p142, %f1229, 0f3FB504F3; - mul.f32 %f1232, %f1229, 0f3F000000; - add.f32 %f1233, %f1231, 0f3F800000; - selp.f32 %f1234, %f1232, %f1229, %p142; - selp.f32 %f1235, %f1233, %f1231, %p142; - add.f32 %f255, %f1234, 0fBF800000; - add.f32 %f256, %f1234, 0f3F800000; - add.f32 %f257, %f255, %f255; - mul.rn.f32 %f258, %f1235, %f1222; - mul.rn.f32 %f259, %f1235, %f1223; - setp.lt.f32 %p143, %f253, 0f00000000; - and.pred %p8, %p143, %p137; - add.f32 %f1236, %f253, %f253; - selp.f32 %f260, %f1236, 0f00000000, %p137; - mov.b32 %r173, %f236; - and.b32 %r43, %r173, -2147483648; - ld.local.v4.f32 {%f2144, %f2143, %f2142, %f2141}, [%rd2]; - ld.local.f32 %f2140, [%rd3+16]; - ld.local.v2.f32 {%f2139, %f2138}, [%rd3+20]; - ld.local.v2.f32 {%f2137, %f2136}, [%rd3+36]; - ld.local.f32 %f2135, [%rd3+56]; - -BB0_92: - setp.ltu.f32 %p144, %f237, 0f3F800000; - @%p144 bra BB0_94; - bra.uni BB0_93; - -BB0_94: - cvt.rn.f32.s32 %f2030, %r262; - sub.f32 %f2029, %f2030, %f2133; - add.f32 %f2028, %f2029, 0f3F800000; - mul.f32 %f2027, %f2028, %f235; - mov.f32 %f1264, 0f3BA0C9F8; - mov.f32 %f1265, 0fBA1268FB; - fma.rn.f32 %f1266, %f1265, %f238, %f1264; - mov.f32 %f1267, 0fBCDABFD4; - fma.rn.f32 %f1268, %f1266, %f238, %f1267; - mov.f32 %f1269, 0f3DE70331; - fma.rn.f32 %f1270, %f1268, %f238, %f1269; - mov.f32 %f1271, 0fBEC09330; - fma.rn.f32 %f1272, %f1270, %f238, %f1271; - mov.f32 %f1273, 0f3F906EBA; - fma.rn.f32 %f1274, %f1272, %f238, %f1273; - mul.f32 %f2146, %f2027, %f1274; - bra.uni BB0_95; - -BB0_93: - mov.f32 %f1986, 0f3F800000; - setp.ltu.f32 %p145, %f237, 0f407AD445; - mov.f32 %f1246, 0f3A03BB71; - mov.f32 %f1247, 0fB7B730FB; - fma.rn.f32 %f1248, %f1247, %f237, %f1246; - mov.f32 %f1249, 0fBBACA3B3; - fma.rn.f32 %f1250, %f1248, %f237, %f1249; - mov.f32 %f1251, 0f3D0A7445; - fma.rn.f32 %f1252, %f1250, %f237, %f1251; - mov.f32 %f1253, 0fBE1B3B75; - fma.rn.f32 %f1254, %f1252, %f237, %f1253; - mov.f32 %f1255, 0fBF6B385A; - fma.rn.f32 %f1256, %f1254, %f237, %f1255; - mov.f32 %f1257, 0fBFD0316E; - fma.rn.f32 %f1258, %f1256, %f237, %f1257; - mov.f32 %f1259, 0fBA031CCE; - fma.rn.f32 %f1260, %f1258, %f237, %f1259; - ex2.approx.ftz.f32 %f1261, %f1260; - sub.f32 %f1263, %f1986, %f1261; - mov.b32 %r174, %f1263; - selp.b32 %r175, %r174, 1065353216, %p145; - or.b32 %r176, %r175, %r43; - mov.b32 %f2146, %r176; - -BB0_95: - setp.ltu.f32 %p146, %f240, 0f3F800000; - @%p146 bra BB0_97; - bra.uni BB0_96; - -BB0_97: - cvt.rn.f32.s32 %f2025, %r262; - sub.f32 %f2024, %f2025, %f2133; - mul.f32 %f2023, %f2024, %f235; - mul.f32 %f2022, %f2023, %f2023; - mov.f32 %f1293, 0f3BA0C9F8; - mov.f32 %f1294, 0fBA1268FB; - fma.rn.f32 %f1295, %f1294, %f2022, %f1293; - mov.f32 %f1296, 0fBCDABFD4; - fma.rn.f32 %f1297, %f1295, %f2022, %f1296; - mov.f32 %f1298, 0f3DE70331; - fma.rn.f32 %f1299, %f1297, %f2022, %f1298; - mov.f32 %f1300, 0fBEC09330; - fma.rn.f32 %f1301, %f1299, %f2022, %f1300; - mov.f32 %f1302, 0f3F906EBA; - fma.rn.f32 %f1303, %f1301, %f2022, %f1302; - mul.f32 %f2147, %f2023, %f1303; - bra.uni BB0_98; - -BB0_96: - cvt.rn.f32.s32 %f1990, %r262; - sub.f32 %f1989, %f1990, %f2133; - mul.f32 %f1988, %f1989, %f235; - mov.b32 %r242, %f1988; - and.b32 %r241, %r242, -2147483648; - mov.f32 %f1987, 0f3F800000; - setp.ltu.f32 %p147, %f240, 0f407AD445; - mov.f32 %f1275, 0f3A03BB71; - mov.f32 %f1276, 0fB7B730FB; - fma.rn.f32 %f1277, %f1276, %f240, %f1275; - mov.f32 %f1278, 0fBBACA3B3; - fma.rn.f32 %f1279, %f1277, %f240, %f1278; - mov.f32 %f1280, 0f3D0A7445; - fma.rn.f32 %f1281, %f1279, %f240, %f1280; - mov.f32 %f1282, 0fBE1B3B75; - fma.rn.f32 %f1283, %f1281, %f240, %f1282; - mov.f32 %f1284, 0fBF6B385A; - fma.rn.f32 %f1285, %f1283, %f240, %f1284; - mov.f32 %f1286, 0fBFD0316E; - fma.rn.f32 %f1287, %f1285, %f240, %f1286; - mov.f32 %f1288, 0fBA031CCE; - fma.rn.f32 %f1289, %f1287, %f240, %f1288; - ex2.approx.ftz.f32 %f1290, %f1289; - sub.f32 %f1292, %f1987, %f1290; - mov.b32 %r177, %f1292; - selp.b32 %r178, %r177, 1065353216, %p147; - or.b32 %r179, %r178, %r241; - mov.b32 %f2147, %r179; - -BB0_98: - sub.f32 %f1304, %f2146, %f2147; - mul.f32 %f290, %f1304, 0f3F000000; - cvt.rn.f32.s32 %f291, %r263; - sub.f32 %f292, %f291, %f2132; - add.f32 %f1305, %f292, 0f3F800000; - mul.f32 %f293, %f1305, %f235; - abs.f32 %f294, %f293; - setp.ltu.f32 %p148, %f294, 0f3F800000; - @%p148 bra BB0_100; - bra.uni BB0_99; - -BB0_100: - mul.f32 %f1324, %f293, %f293; - mov.f32 %f1325, 0f3BA0C9F8; - mov.f32 %f1326, 0fBA1268FB; - fma.rn.f32 %f1327, %f1326, %f1324, %f1325; - mov.f32 %f1328, 0fBCDABFD4; - fma.rn.f32 %f1329, %f1327, %f1324, %f1328; - mov.f32 %f1330, 0f3DE70331; - fma.rn.f32 %f1331, %f1329, %f1324, %f1330; - mov.f32 %f1332, 0fBEC09330; - fma.rn.f32 %f1333, %f1331, %f1324, %f1332; - mov.f32 %f1334, 0f3F906EBA; - fma.rn.f32 %f1335, %f1333, %f1324, %f1334; - mul.f32 %f2148, %f293, %f1335; - bra.uni BB0_101; - -BB0_99: - mov.f32 %f1991, 0f3F800000; - mov.f32 %f1306, 0f3A03BB71; - mov.f32 %f1307, 0fB7B730FB; - fma.rn.f32 %f1308, %f1307, %f294, %f1306; - mov.f32 %f1309, 0fBBACA3B3; - fma.rn.f32 %f1310, %f1308, %f294, %f1309; - mov.f32 %f1311, 0f3D0A7445; - fma.rn.f32 %f1312, %f1310, %f294, %f1311; - mov.f32 %f1313, 0fBE1B3B75; - fma.rn.f32 %f1314, %f1312, %f294, %f1313; - mov.f32 %f1315, 0fBF6B385A; - fma.rn.f32 %f1316, %f1314, %f294, %f1315; - mov.f32 %f1317, 0fBFD0316E; - fma.rn.f32 %f1318, %f1316, %f294, %f1317; - mov.f32 %f1319, 0fBA031CCE; - fma.rn.f32 %f1320, %f1318, %f294, %f1319; - ex2.approx.ftz.f32 %f1321, %f1320; - sub.f32 %f1323, %f1991, %f1321; - mov.b32 %r180, %f1323; - setp.ltu.f32 %p149, %f294, 0f407AD445; - selp.b32 %r181, %r180, 1065353216, %p149; - mov.b32 %r182, %f293; - and.b32 %r183, %r182, -2147483648; - or.b32 %r184, %r181, %r183; - mov.b32 %f2148, %r184; - -BB0_101: - cvt.rn.f32.s32 %f1993, %r263; - sub.f32 %f1992, %f1993, %f2132; - mul.f32 %f298, %f1992, %f235; - abs.f32 %f299, %f298; - setp.ltu.f32 %p150, %f299, 0f3F800000; - @%p150 bra BB0_103; - bra.uni BB0_102; - -BB0_103: - mul.f32 %f1354, %f298, %f298; - mov.f32 %f1355, 0f3BA0C9F8; - mov.f32 %f1356, 0fBA1268FB; - fma.rn.f32 %f1357, %f1356, %f1354, %f1355; - mov.f32 %f1358, 0fBCDABFD4; - fma.rn.f32 %f1359, %f1357, %f1354, %f1358; - mov.f32 %f1360, 0f3DE70331; - fma.rn.f32 %f1361, %f1359, %f1354, %f1360; - mov.f32 %f1362, 0fBEC09330; - fma.rn.f32 %f1363, %f1361, %f1354, %f1362; - mov.f32 %f1364, 0f3F906EBA; - fma.rn.f32 %f1365, %f1363, %f1354, %f1364; - mul.f32 %f2149, %f298, %f1365; - bra.uni BB0_104; - -BB0_102: - mov.f32 %f1994, 0f3F800000; - mov.f32 %f1336, 0f3A03BB71; - mov.f32 %f1337, 0fB7B730FB; - fma.rn.f32 %f1338, %f1337, %f299, %f1336; - mov.f32 %f1339, 0fBBACA3B3; - fma.rn.f32 %f1340, %f1338, %f299, %f1339; - mov.f32 %f1341, 0f3D0A7445; - fma.rn.f32 %f1342, %f1340, %f299, %f1341; - mov.f32 %f1343, 0fBE1B3B75; - fma.rn.f32 %f1344, %f1342, %f299, %f1343; - mov.f32 %f1345, 0fBF6B385A; - fma.rn.f32 %f1346, %f1344, %f299, %f1345; - mov.f32 %f1347, 0fBFD0316E; - fma.rn.f32 %f1348, %f1346, %f299, %f1347; - mov.f32 %f1349, 0fBA031CCE; - fma.rn.f32 %f1350, %f1348, %f299, %f1349; - ex2.approx.ftz.f32 %f1351, %f1350; - sub.f32 %f1353, %f1994, %f1351; - mov.b32 %r185, %f1353; - setp.ltu.f32 %p151, %f299, 0f407AD445; - selp.b32 %r186, %r185, 1065353216, %p151; - mov.b32 %r187, %f298; - and.b32 %r188, %r187, -2147483648; - or.b32 %r189, %r186, %r188; - mov.b32 %f2149, %r189; - -BB0_104: - mov.f32 %f1995, 0f40000000; - sub.f32 %f1368, %f2148, %f2149; - mul.f32 %f303, %f1368, 0f3F000000; - mul.f32 %f1369, %f290, %f2131; - fma.rn.f32 %f304, %f303, %f1369, %f2064; - mad.lo.s32 %r190, %r263, %r53, %r262; - add.s32 %r191, %r190, %r2; - mul.wide.s32 %rd56, %r191, 4; - add.s64 %rd57, %rd1, %rd56; - ld.global.f32 %f305, [%rd57]; - // inline asm - rcp.approx.ftz.f32 %f1366,%f246; - // inline asm - mul.f32 %f1370, %f1366, %f247; - mul.f32 %f1371, %f1370, %f1370; - mov.f32 %f1372, 0f3C4CAF63; - mov.f32 %f1373, 0f3B18F0FE; - fma.rn.f32 %f1374, %f1373, %f1371, %f1372; - mov.f32 %f1375, 0f3DAAAABD; - fma.rn.f32 %f1376, %f1374, %f1371, %f1375; - mul.rn.f32 %f1377, %f1376, %f1371; - mul.rn.f32 %f1378, %f1377, %f1370; - sub.f32 %f1379, %f245, %f1370; - neg.f32 %f1380, %f1370; - add.f32 %f1381, %f1379, %f1379; - fma.rn.f32 %f1382, %f1380, %f245, %f1381; - mul.rn.f32 %f1383, %f1366, %f1382; - add.f32 %f1384, %f1378, %f1370; - sub.f32 %f1385, %f1370, %f1384; - add.f32 %f1386, %f1378, %f1385; - add.f32 %f1387, %f1383, %f1386; - add.f32 %f1388, %f1384, %f1387; - sub.f32 %f1389, %f1384, %f1388; - add.f32 %f1390, %f1387, %f1389; - add.f32 %f1391, %f248, %f1388; - sub.f32 %f1392, %f248, %f1391; - add.f32 %f1393, %f1388, %f1392; - add.f32 %f1394, %f1390, %f1393; - add.f32 %f1395, %f249, %f1394; - add.f32 %f1396, %f1391, %f1395; - sub.f32 %f1397, %f1391, %f1396; - add.f32 %f1398, %f1395, %f1397; - mul.rn.f32 %f1400, %f1995, %f1396; - neg.f32 %f1401, %f1400; - fma.rn.f32 %f1402, %f1995, %f1396, %f1401; - fma.rn.f32 %f1403, %f1995, %f1398, %f1402; - mov.f32 %f1404, 0f00000000; - fma.rn.f32 %f1405, %f1404, %f1396, %f1403; - add.rn.f32 %f1406, %f1400, %f1405; - neg.f32 %f1407, %f1406; - add.rn.f32 %f1408, %f1400, %f1407; - add.rn.f32 %f1409, %f1408, %f1405; - mov.b32 %r192, %f1406; - setp.eq.s32 %p152, %r192, 1118925336; - add.s32 %r193, %r192, -1; - mov.b32 %f1410, %r193; - add.f32 %f1411, %f1409, 0f37000000; - selp.f32 %f1412, %f1410, %f1406, %p152; - selp.f32 %f306, %f1411, %f1409, %p152; - mul.f32 %f1413, %f1412, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1414, %f1413; - mov.f32 %f1415, 0fBF317200; - fma.rn.f32 %f1416, %f1414, %f1415, %f1412; - mov.f32 %f1417, 0fB5BFBE8E; - fma.rn.f32 %f1418, %f1414, %f1417, %f1416; - mul.f32 %f1419, %f1418, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1420, %f1419; - add.f32 %f1421, %f1414, 0f00000000; - ex2.approx.f32 %f1422, %f1421; - mul.f32 %f1423, %f1420, %f1422; - setp.lt.f32 %p153, %f1412, 0fC2D20000; - selp.f32 %f1424, 0f00000000, %f1423, %p153; - setp.gt.f32 %p154, %f1412, 0f42D20000; - selp.f32 %f2150, 0f7F800000, %f1424, %p154; - setp.eq.f32 %p155, %f2150, 0f7F800000; - @%p155 bra BB0_106; - - fma.rn.f32 %f2150, %f2150, %f306, %f2150; - -BB0_106: - setp.geu.f32 %p227, %f242, 0f00000000; - mov.b32 %r194, %f2150; - xor.b32 %r195, %r194, -2147483648; - mov.b32 %f1425, %r195; - selp.f32 %f310, %f1425, %f2150, %p7; - setp.eq.f32 %p156, %f242, 0f00000000; - selp.f32 %f2151, %f250, %f310, %p156; - @%p227 bra BB0_108; - - mov.f32 %f1996, 0f40000000; - cvt.rzi.f32.f32 %f1427, %f1996; - setp.neu.f32 %p157, %f1427, 0f40000000; - selp.f32 %f2151, 0f7FFFFFFF, %f310, %p157; - -BB0_108: - abs.f32 %f2007, %f242; - mov.f32 %f2006, 0f00000000; - mov.f32 %f2005, 0f3DAAAABD; - mov.f32 %f2004, 0f3C4CAF63; - mov.f32 %f2003, 0f3B18F0FE; - mov.f32 %f2002, 0fB5BFBE8E; - mov.f32 %f2001, 0fBF317200; - add.f32 %f2000, %f2007, 0f40000000; - mov.b32 %r243, %f2000; - selp.f32 %f1999, 0fFF800000, 0f7F800000, %p7; - add.f32 %f1998, %f242, 0f40000000; - mov.f32 %f1997, 0f40000000; - setp.gtu.f32 %p158, %f2007, 0f7F800000; - selp.f32 %f1430, %f1998, %f2151, %p158; - setp.neu.f32 %p159, %f2007, 0f7F800000; - selp.f32 %f1431, %f1430, %f1999, %p159; - setp.gt.s32 %p160, %r243, 2139095039; - selp.f32 %f1432, %f1431, %f2151, %p160; - mul.f32 %f1433, %f1432, 0fBF000000; - setp.eq.f32 %p161, %f242, 0f3F800000; - selp.f32 %f1434, 0fBF000000, %f1433, %p161; - mul.f32 %f1435, %f1434, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1436, %f1435; - fma.rn.f32 %f1438, %f1436, %f2001, %f1434; - fma.rn.f32 %f1440, %f1436, %f2002, %f1438; - mul.f32 %f1441, %f1440, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1442, %f1441; - add.f32 %f1443, %f1436, 0f00000000; - ex2.approx.f32 %f1444, %f1443; - mul.f32 %f1445, %f1442, %f1444; - setp.lt.f32 %p162, %f1434, 0fC2D20000; - selp.f32 %f1446, 0f00000000, %f1445, %p162; - setp.gt.f32 %p163, %f1434, 0f42D20000; - selp.f32 %f314, 0f7F800000, %f1446, %p163; - // inline asm - rcp.approx.ftz.f32 %f1428,%f256; - // inline asm - mul.f32 %f1447, %f1428, %f257; - mul.f32 %f1448, %f1447, %f1447; - fma.rn.f32 %f1451, %f2003, %f1448, %f2004; - fma.rn.f32 %f1453, %f1451, %f1448, %f2005; - mul.rn.f32 %f1454, %f1453, %f1448; - mul.rn.f32 %f1455, %f1454, %f1447; - sub.f32 %f1456, %f255, %f1447; - neg.f32 %f1457, %f1447; - add.f32 %f1458, %f1456, %f1456; - fma.rn.f32 %f1459, %f1457, %f255, %f1458; - mul.rn.f32 %f1460, %f1428, %f1459; - add.f32 %f1461, %f1455, %f1447; - sub.f32 %f1462, %f1447, %f1461; - add.f32 %f1463, %f1455, %f1462; - add.f32 %f1464, %f1460, %f1463; - add.f32 %f1465, %f1461, %f1464; - sub.f32 %f1466, %f1461, %f1465; - add.f32 %f1467, %f1464, %f1466; - add.f32 %f1468, %f258, %f1465; - sub.f32 %f1469, %f258, %f1468; - add.f32 %f1470, %f1465, %f1469; - add.f32 %f1471, %f1467, %f1470; - add.f32 %f1472, %f259, %f1471; - add.f32 %f1473, %f1468, %f1472; - sub.f32 %f1474, %f1468, %f1473; - add.f32 %f1475, %f1472, %f1474; - mul.rn.f32 %f1477, %f1997, %f1473; - neg.f32 %f1478, %f1477; - fma.rn.f32 %f1479, %f1997, %f1473, %f1478; - fma.rn.f32 %f1480, %f1997, %f1475, %f1479; - fma.rn.f32 %f1482, %f2006, %f1473, %f1480; - add.rn.f32 %f1483, %f1477, %f1482; - neg.f32 %f1484, %f1483; - add.rn.f32 %f1485, %f1477, %f1484; - add.rn.f32 %f1486, %f1485, %f1482; - mov.b32 %r196, %f1483; - setp.eq.s32 %p164, %r196, 1118925336; - add.s32 %r197, %r196, -1; - mov.b32 %f1487, %r197; - add.f32 %f1488, %f1486, 0f37000000; - selp.f32 %f1489, %f1487, %f1483, %p164; - selp.f32 %f315, %f1488, %f1486, %p164; - mul.f32 %f1490, %f1489, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1491, %f1490; - fma.rn.f32 %f1492, %f1491, %f2001, %f1489; - fma.rn.f32 %f1493, %f1491, %f2002, %f1492; - mul.f32 %f1494, %f1493, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1495, %f1494; - add.f32 %f1496, %f1491, 0f00000000; - ex2.approx.f32 %f1497, %f1496; - mul.f32 %f1498, %f1495, %f1497; - setp.lt.f32 %p165, %f1489, 0fC2D20000; - selp.f32 %f1499, 0f00000000, %f1498, %p165; - setp.gt.f32 %p166, %f1489, 0f42D20000; - selp.f32 %f2152, 0f7F800000, %f1499, %p166; - setp.eq.f32 %p167, %f2152, 0f7F800000; - @%p167 bra BB0_110; - - fma.rn.f32 %f2152, %f2152, %f315, %f2152; - -BB0_110: - setp.geu.f32 %p228, %f253, 0f00000000; - mov.b32 %r198, %f2152; - xor.b32 %r199, %r198, -2147483648; - mov.b32 %f1500, %r199; - selp.f32 %f319, %f1500, %f2152, %p8; - setp.eq.f32 %p168, %f253, 0f00000000; - selp.f32 %f2153, %f260, %f319, %p168; - @%p228 bra BB0_112; - - mov.f32 %f2008, 0f40000000; - cvt.rzi.f32.f32 %f1502, %f2008; - setp.neu.f32 %p169, %f1502, 0f40000000; - selp.f32 %f2153, 0f7FFFFFFF, %f319, %p169; - -BB0_112: - abs.f32 %f2021, %f253; - mov.f32 %f2020, 0f35BFBE8E; - mov.f32 %f2019, 0f3F317200; - add.f32 %f2018, %f2021, 0f40000000; - mov.b32 %r244, %f2018; - selp.f32 %f2017, 0fFF800000, 0f7F800000, %p8; - add.f32 %f2016, %f253, 0f40000000; - mov.f32 %f2015, 0f00000000; - mov.f32 %f2014, 0f3DAAAABD; - mov.f32 %f2013, 0f3C4CAF63; - mov.f32 %f2012, 0f3B18F0FE; - mov.f32 %f2011, 0fB5BFBE8E; - mov.f32 %f2010, 0fBF317200; - mov.f32 %f2009, 0f40000000; - setp.gtu.f32 %p170, %f2021, 0f7F800000; - selp.f32 %f1505, %f2016, %f2153, %p170; - setp.neu.f32 %p171, %f2021, 0f7F800000; - selp.f32 %f1506, %f1505, %f2017, %p171; - setp.gt.s32 %p172, %r244, 2139095039; - selp.f32 %f1507, %f1506, %f2153, %p172; - mul.f32 %f1508, %f1507, 0fBF000000; - setp.eq.f32 %p173, %f253, 0f3F800000; - selp.f32 %f1509, 0fBF000000, %f1508, %p173; - mul.f32 %f1510, %f1509, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1511, %f1510; - fma.rn.f32 %f1513, %f1511, %f2010, %f1509; - fma.rn.f32 %f1515, %f1511, %f2011, %f1513; - mul.f32 %f1516, %f1515, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1517, %f1516; - add.f32 %f1518, %f1511, 0f00000000; - ex2.approx.f32 %f1519, %f1518; - mul.f32 %f1520, %f1517, %f1519; - setp.lt.f32 %p174, %f1509, 0fC2D20000; - selp.f32 %f1521, 0f00000000, %f1520, %p174; - setp.gt.f32 %p175, %f1509, 0f42D20000; - selp.f32 %f1522, 0f7F800000, %f1521, %p175; - sub.f32 %f1523, %f314, %f1522; - mul.f32 %f1524, %f233, %f1523; - mul.f32 %f323, %f303, %f1524; - add.f32 %f1525, %f291, 0f3F800000; - sub.f32 %f1526, %f1525, %f2132; - div.rn.f32 %f324, %f1526, %f394; - abs.f32 %f325, %f324; - setp.lt.f32 %p176, %f325, 0f00800000; - mul.f32 %f1527, %f325, 0f4B800000; - selp.f32 %f1528, 0fC3170000, 0fC2FE0000, %p176; - selp.f32 %f1529, %f1527, %f325, %p176; - mov.b32 %r200, %f1529; - and.b32 %r201, %r200, 8388607; - or.b32 %r202, %r201, 1065353216; - mov.b32 %f1530, %r202; - shr.u32 %r203, %r200, 23; - cvt.rn.f32.u32 %f1531, %r203; - add.f32 %f1532, %f1528, %f1531; - setp.gt.f32 %p177, %f1530, 0f3FB504F3; - mul.f32 %f1533, %f1530, 0f3F000000; - add.f32 %f1534, %f1532, 0f3F800000; - selp.f32 %f1535, %f1533, %f1530, %p177; - selp.f32 %f1536, %f1534, %f1532, %p177; - add.f32 %f1537, %f1535, 0fBF800000; - add.f32 %f1504, %f1535, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1503,%f1504; - // inline asm - add.f32 %f1538, %f1537, %f1537; - mul.f32 %f1539, %f1503, %f1538; - mul.f32 %f1540, %f1539, %f1539; - fma.rn.f32 %f1543, %f2012, %f1540, %f2013; - fma.rn.f32 %f1545, %f1543, %f1540, %f2014; - mul.rn.f32 %f1546, %f1545, %f1540; - mul.rn.f32 %f1547, %f1546, %f1539; - sub.f32 %f1548, %f1537, %f1539; - neg.f32 %f1549, %f1539; - add.f32 %f1550, %f1548, %f1548; - fma.rn.f32 %f1551, %f1549, %f1537, %f1550; - mul.rn.f32 %f1552, %f1503, %f1551; - add.f32 %f1553, %f1547, %f1539; - sub.f32 %f1554, %f1539, %f1553; - add.f32 %f1555, %f1547, %f1554; - add.f32 %f1556, %f1552, %f1555; - add.f32 %f1557, %f1553, %f1556; - sub.f32 %f1558, %f1553, %f1557; - add.f32 %f1559, %f1556, %f1558; - mul.rn.f32 %f1561, %f1536, %f2019; - mul.rn.f32 %f1563, %f1536, %f2020; - add.f32 %f1564, %f1561, %f1557; - sub.f32 %f1565, %f1561, %f1564; - add.f32 %f1566, %f1557, %f1565; - add.f32 %f1567, %f1559, %f1566; - add.f32 %f1568, %f1563, %f1567; - add.f32 %f1569, %f1564, %f1568; - sub.f32 %f1570, %f1564, %f1569; - add.f32 %f1571, %f1568, %f1570; - mul.rn.f32 %f1573, %f2009, %f1569; - neg.f32 %f1574, %f1573; - fma.rn.f32 %f1575, %f2009, %f1569, %f1574; - fma.rn.f32 %f1576, %f2009, %f1571, %f1575; - fma.rn.f32 %f1578, %f2015, %f1569, %f1576; - add.rn.f32 %f1579, %f1573, %f1578; - neg.f32 %f1580, %f1579; - add.rn.f32 %f1581, %f1573, %f1580; - add.rn.f32 %f1582, %f1581, %f1578; - mov.b32 %r204, %f1579; - setp.eq.s32 %p178, %r204, 1118925336; - add.s32 %r205, %r204, -1; - mov.b32 %f1583, %r205; - add.f32 %f1584, %f1582, 0f37000000; - selp.f32 %f1585, %f1583, %f1579, %p178; - selp.f32 %f326, %f1584, %f1582, %p178; - mul.f32 %f1586, %f1585, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1587, %f1586; - fma.rn.f32 %f1588, %f1587, %f2010, %f1585; - fma.rn.f32 %f1589, %f1587, %f2011, %f1588; - mul.f32 %f1590, %f1589, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1591, %f1590; - add.f32 %f1592, %f1587, 0f00000000; - ex2.approx.f32 %f1593, %f1592; - mul.f32 %f1594, %f1591, %f1593; - setp.lt.f32 %p179, %f1585, 0fC2D20000; - selp.f32 %f1595, 0f00000000, %f1594, %p179; - setp.gt.f32 %p180, %f1585, 0f42D20000; - selp.f32 %f2154, 0f7F800000, %f1595, %p180; - setp.eq.f32 %p181, %f2154, 0f7F800000; - @%p181 bra BB0_114; - - fma.rn.f32 %f2154, %f2154, %f326, %f2154; - -BB0_114: - setp.lt.f32 %p182, %f324, 0f00000000; - and.pred %p11, %p182, %p137; - mov.b32 %r206, %f2154; - xor.b32 %r207, %r206, -2147483648; - mov.b32 %f1596, %r207; - selp.f32 %f2156, %f1596, %f2154, %p11; - setp.eq.f32 %p184, %f324, 0f00000000; - @%p184 bra BB0_117; - bra.uni BB0_115; - -BB0_117: - add.f32 %f1599, %f324, %f324; - selp.f32 %f2156, %f1599, 0f00000000, %p137; - bra.uni BB0_118; - -BB0_115: - setp.geu.f32 %p185, %f324, 0f00000000; - @%p185 bra BB0_118; - - mov.f32 %f2034, 0f40000000; - cvt.rzi.f32.f32 %f1598, %f2034; - setp.neu.f32 %p186, %f1598, 0f40000000; - selp.f32 %f2156, 0f7FFFFFFF, %f2156, %p186; - -BB0_118: - abs.f32 %f1971, %f324; - add.f32 %f1600, %f1971, 0f40000000; - mov.b32 %r208, %f1600; - setp.lt.s32 %p188, %r208, 2139095040; - @%p188 bra BB0_123; - - abs.f32 %f2032, %f324; - setp.gtu.f32 %p189, %f2032, 0f7F800000; - @%p189 bra BB0_122; - bra.uni BB0_120; - -BB0_122: - add.f32 %f2156, %f324, 0f40000000; - bra.uni BB0_123; - -BB0_120: - abs.f32 %f2033, %f324; - setp.neu.f32 %p190, %f2033, 0f7F800000; - @%p190 bra BB0_123; - - selp.f32 %f2156, 0fFF800000, 0f7F800000, %p11; - -BB0_123: - mov.f32 %f1982, 0f35BFBE8E; - mov.f32 %f1981, 0f3F317200; - mov.f32 %f1980, 0f00000000; - mov.f32 %f1979, 0f3DAAAABD; - mov.f32 %f1978, 0f3C4CAF63; - mov.f32 %f1977, 0f3B18F0FE; - mov.f32 %f1976, 0fB5BFBE8E; - mov.f32 %f1975, 0fBF317200; - mov.f32 %f1974, 0f40000000; - cvt.rn.f32.s32 %f1973, %r263; - sub.f32 %f1972, %f1973, %f2132; - mul.f32 %f1603, %f2156, 0fBF000000; - setp.eq.f32 %p191, %f324, 0f3F800000; - selp.f32 %f1604, 0fBF000000, %f1603, %p191; - mul.f32 %f1605, %f1604, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1606, %f1605; - fma.rn.f32 %f1608, %f1606, %f1975, %f1604; - fma.rn.f32 %f1610, %f1606, %f1976, %f1608; - mul.f32 %f1611, %f1610, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1612, %f1611; - add.f32 %f1613, %f1606, 0f00000000; - ex2.approx.f32 %f1614, %f1613; - mul.f32 %f1615, %f1612, %f1614; - setp.lt.f32 %p192, %f1604, 0fC2D20000; - selp.f32 %f1616, 0f00000000, %f1615, %p192; - setp.gt.f32 %p193, %f1604, 0f42D20000; - selp.f32 %f337, 0f7F800000, %f1616, %p193; - div.rn.f32 %f338, %f1972, %f394; - abs.f32 %f339, %f338; - setp.lt.f32 %p194, %f339, 0f00800000; - mul.f32 %f1617, %f339, 0f4B800000; - selp.f32 %f1618, 0fC3170000, 0fC2FE0000, %p194; - selp.f32 %f1619, %f1617, %f339, %p194; - mov.b32 %r209, %f1619; - and.b32 %r210, %r209, 8388607; - or.b32 %r211, %r210, 1065353216; - mov.b32 %f1620, %r211; - shr.u32 %r212, %r209, 23; - cvt.rn.f32.u32 %f1621, %r212; - add.f32 %f1622, %f1618, %f1621; - setp.gt.f32 %p195, %f1620, 0f3FB504F3; - mul.f32 %f1623, %f1620, 0f3F000000; - add.f32 %f1624, %f1622, 0f3F800000; - selp.f32 %f1625, %f1623, %f1620, %p195; - selp.f32 %f1626, %f1624, %f1622, %p195; - add.f32 %f1627, %f1625, 0fBF800000; - add.f32 %f1602, %f1625, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1601,%f1602; - // inline asm - add.f32 %f1628, %f1627, %f1627; - mul.f32 %f1629, %f1601, %f1628; - mul.f32 %f1630, %f1629, %f1629; - fma.rn.f32 %f1633, %f1977, %f1630, %f1978; - fma.rn.f32 %f1635, %f1633, %f1630, %f1979; - mul.rn.f32 %f1636, %f1635, %f1630; - mul.rn.f32 %f1637, %f1636, %f1629; - sub.f32 %f1638, %f1627, %f1629; - neg.f32 %f1639, %f1629; - add.f32 %f1640, %f1638, %f1638; - fma.rn.f32 %f1641, %f1639, %f1627, %f1640; - mul.rn.f32 %f1642, %f1601, %f1641; - add.f32 %f1643, %f1637, %f1629; - sub.f32 %f1644, %f1629, %f1643; - add.f32 %f1645, %f1637, %f1644; - add.f32 %f1646, %f1642, %f1645; - add.f32 %f1647, %f1643, %f1646; - sub.f32 %f1648, %f1643, %f1647; - add.f32 %f1649, %f1646, %f1648; - mul.rn.f32 %f1651, %f1626, %f1981; - mul.rn.f32 %f1653, %f1626, %f1982; - add.f32 %f1654, %f1651, %f1647; - sub.f32 %f1655, %f1651, %f1654; - add.f32 %f1656, %f1647, %f1655; - add.f32 %f1657, %f1649, %f1656; - add.f32 %f1658, %f1653, %f1657; - add.f32 %f1659, %f1654, %f1658; - sub.f32 %f1660, %f1654, %f1659; - add.f32 %f1661, %f1658, %f1660; - mul.rn.f32 %f1663, %f1974, %f1659; - neg.f32 %f1664, %f1663; - fma.rn.f32 %f1665, %f1974, %f1659, %f1664; - fma.rn.f32 %f1666, %f1974, %f1661, %f1665; - fma.rn.f32 %f1668, %f1980, %f1659, %f1666; - add.rn.f32 %f1669, %f1663, %f1668; - neg.f32 %f1670, %f1669; - add.rn.f32 %f1671, %f1663, %f1670; - add.rn.f32 %f1672, %f1671, %f1668; - mov.b32 %r213, %f1669; - setp.eq.s32 %p196, %r213, 1118925336; - add.s32 %r214, %r213, -1; - mov.b32 %f1673, %r214; - add.f32 %f1674, %f1672, 0f37000000; - selp.f32 %f1675, %f1673, %f1669, %p196; - selp.f32 %f340, %f1674, %f1672, %p196; - mul.f32 %f1676, %f1675, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1677, %f1676; - fma.rn.f32 %f1678, %f1677, %f1975, %f1675; - fma.rn.f32 %f1679, %f1677, %f1976, %f1678; - mul.f32 %f1680, %f1679, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1681, %f1680; - add.f32 %f1682, %f1677, 0f00000000; - ex2.approx.f32 %f1683, %f1682; - mul.f32 %f1684, %f1681, %f1683; - setp.lt.f32 %p197, %f1675, 0fC2D20000; - selp.f32 %f1685, 0f00000000, %f1684, %p197; - setp.gt.f32 %p198, %f1675, 0f42D20000; - selp.f32 %f2157, 0f7F800000, %f1685, %p198; - setp.eq.f32 %p199, %f2157, 0f7F800000; - @%p199 bra BB0_125; - - fma.rn.f32 %f2157, %f2157, %f340, %f2157; - -BB0_125: - setp.lt.f32 %p200, %f338, 0f00000000; - and.pred %p12, %p200, %p137; - mov.b32 %r215, %f2157; - xor.b32 %r216, %r215, -2147483648; - mov.b32 %f1686, %r216; - selp.f32 %f2159, %f1686, %f2157, %p12; - setp.eq.f32 %p202, %f338, 0f00000000; - @%p202 bra BB0_128; - bra.uni BB0_126; - -BB0_128: - add.f32 %f1689, %f338, %f338; - selp.f32 %f2159, %f1689, 0f00000000, %p137; - bra.uni BB0_129; - -BB0_126: - setp.geu.f32 %p203, %f338, 0f00000000; - @%p203 bra BB0_129; - - mov.f32 %f2031, 0f40000000; - cvt.rzi.f32.f32 %f1688, %f2031; - setp.neu.f32 %p204, %f1688, 0f40000000; - selp.f32 %f2159, 0f7FFFFFFF, %f2159, %p204; - -BB0_129: - abs.f32 %f2035, %f338; - add.f32 %f1690, %f2035, 0f40000000; - mov.b32 %r217, %f1690; - setp.lt.s32 %p206, %r217, 2139095040; - @%p206 bra BB0_134; - - abs.f32 %f2036, %f338; - setp.gtu.f32 %p207, %f2036, 0f7F800000; - @%p207 bra BB0_133; - bra.uni BB0_131; - -BB0_133: - add.f32 %f2159, %f338, 0f40000000; - bra.uni BB0_134; - -BB0_131: - abs.f32 %f2037, %f338; - setp.neu.f32 %p208, %f2037, 0f7F800000; - @%p208 bra BB0_134; - - selp.f32 %f2159, 0fFF800000, 0f7F800000, %p12; - -BB0_134: - mov.f32 %f1984, 0fB5BFBE8E; - mov.f32 %f1983, 0fBF317200; - mul.f32 %f1691, %f2159, 0fBF000000; - setp.eq.f32 %p209, %f338, 0f3F800000; - selp.f32 %f1692, 0fBF000000, %f1691, %p209; - mul.f32 %f1693, %f1692, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1694, %f1693; - fma.rn.f32 %f1696, %f1694, %f1983, %f1692; - fma.rn.f32 %f1698, %f1694, %f1984, %f1696; - mul.f32 %f1699, %f1698, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1700, %f1699; - add.f32 %f1701, %f1694, 0f00000000; - ex2.approx.f32 %f1702, %f1701; - mul.f32 %f1703, %f1700, %f1702; - setp.lt.f32 %p210, %f1692, 0fC2D20000; - selp.f32 %f1704, 0f00000000, %f1703, %p210; - setp.gt.f32 %p211, %f1692, 0f42D20000; - selp.f32 %f1705, 0f7F800000, %f1704, %p211; - sub.f32 %f1706, %f337, %f1705; - mul.f32 %f1707, %f233, %f1706; - mul.f32 %f1708, %f290, %f1707; - mul.f32 %f1709, %f323, %f323; - div.rn.f32 %f1710, %f1709, %f304; - add.f32 %f2144, %f1710, %f2144; - mul.f32 %f1711, %f1708, %f323; - div.rn.f32 %f1712, %f1711, %f304; - add.f32 %f2143, %f1712, %f2143; - mul.f32 %f1713, %f290, %f303; - mul.f32 %f1714, %f1713, %f323; - div.rn.f32 %f1715, %f1714, %f304; - add.f32 %f2142, %f1715, %f2142; - div.rn.f32 %f1716, %f323, %f304; - add.f32 %f2141, %f1716, %f2141; - mul.f32 %f1717, %f1708, %f1708; - div.rn.f32 %f1718, %f1717, %f304; - add.f32 %f2140, %f1718, %f2140; - mul.f32 %f1719, %f1713, %f1708; - div.rn.f32 %f1720, %f1719, %f304; - add.f32 %f2139, %f1720, %f2139; - div.rn.f32 %f1721, %f1708, %f304; - add.f32 %f2138, %f1721, %f2138; - mul.f32 %f1722, %f1713, %f1713; - div.rn.f32 %f1723, %f1722, %f304; - add.f32 %f2137, %f1723, %f2137; - div.rn.f32 %f1724, %f1713, %f304; - add.f32 %f2136, %f1724, %f2136; - rcp.rn.f32 %f1725, %f304; - add.f32 %f2135, %f1725, %f2135; - setp.leu.f32 %p212, %f304, 0f00000000; - @%p212 bra BB0_142; - - setp.gt.f32 %p213, %f305, 0f00000000; - @%p213 bra BB0_137; - bra.uni BB0_136; - -BB0_137: - setp.lt.f32 %p214, %f304, 0f00800000; - mul.f32 %f1726, %f304, 0f4B000000; - selp.f32 %f362, %f1726, %f304, %p214; - selp.f32 %f1727, 0fC1B80000, 0f00000000, %p214; - mov.b32 %r218, %f362; - add.s32 %r219, %r218, -1059760811; - and.b32 %r220, %r219, -8388608; - sub.s32 %r221, %r218, %r220; - mov.b32 %f1728, %r221; - cvt.rn.f32.s32 %f1729, %r220; - mov.f32 %f1730, 0f34000000; - fma.rn.f32 %f1731, %f1729, %f1730, %f1727; - add.f32 %f1732, %f1728, 0fBF800000; - mov.f32 %f1733, 0f3E1039F6; - mov.f32 %f1734, 0fBE055027; - fma.rn.f32 %f1735, %f1734, %f1732, %f1733; - mov.f32 %f1736, 0fBDF8CDCC; - fma.rn.f32 %f1737, %f1735, %f1732, %f1736; - mov.f32 %f1738, 0f3E0F2955; - fma.rn.f32 %f1739, %f1737, %f1732, %f1738; - mov.f32 %f1740, 0fBE2AD8B9; - fma.rn.f32 %f1741, %f1739, %f1732, %f1740; - mov.f32 %f1742, 0f3E4CED0B; - fma.rn.f32 %f1743, %f1741, %f1732, %f1742; - mov.f32 %f1744, 0fBE7FFF22; - fma.rn.f32 %f1745, %f1743, %f1732, %f1744; - mov.f32 %f1746, 0f3EAAAA78; - fma.rn.f32 %f1747, %f1745, %f1732, %f1746; - mov.f32 %f1748, 0fBF000000; - fma.rn.f32 %f1749, %f1747, %f1732, %f1748; - mul.f32 %f1750, %f1732, %f1749; - fma.rn.f32 %f1751, %f1750, %f1732, %f1732; - mov.f32 %f1752, 0f3F317218; - fma.rn.f32 %f2160, %f1731, %f1752, %f1751; - setp.lt.u32 %p215, %r218, 2139095040; - @%p215 bra BB0_139; - - mov.f32 %f1753, 0f7F800000; - fma.rn.f32 %f2160, %f362, %f1753, %f1753; - -BB0_139: - setp.eq.f32 %p216, %f362, 0f00000000; - selp.f32 %f1754, 0fFF800000, %f2160, %p216; - mul.f32 %f1755, %f305, %f1754; - sub.f32 %f366, %f1755, %f304; - mul.f32 %f1756, %f305, 0f4B000000; - setp.lt.f32 %p217, %f305, 0f00800000; - selp.f32 %f367, %f1756, %f305, %p217; - selp.f32 %f1757, 0fC1B80000, 0f00000000, %p217; - mov.b32 %r222, %f367; - add.s32 %r223, %r222, -1059760811; - and.b32 %r224, %r223, -8388608; - sub.s32 %r225, %r222, %r224; - mov.b32 %f1758, %r225; - cvt.rn.f32.s32 %f1759, %r224; - fma.rn.f32 %f1761, %f1759, %f1730, %f1757; - add.f32 %f1762, %f1758, 0fBF800000; - fma.rn.f32 %f1765, %f1734, %f1762, %f1733; - fma.rn.f32 %f1767, %f1765, %f1762, %f1736; - fma.rn.f32 %f1769, %f1767, %f1762, %f1738; - fma.rn.f32 %f1771, %f1769, %f1762, %f1740; - fma.rn.f32 %f1773, %f1771, %f1762, %f1742; - fma.rn.f32 %f1775, %f1773, %f1762, %f1744; - fma.rn.f32 %f1777, %f1775, %f1762, %f1746; - fma.rn.f32 %f1779, %f1777, %f1762, %f1748; - mul.f32 %f1780, %f1762, %f1779; - fma.rn.f32 %f1781, %f1780, %f1762, %f1762; - fma.rn.f32 %f2161, %f1761, %f1752, %f1781; - setp.lt.u32 %p218, %r222, 2139095040; - @%p218 bra BB0_141; - - mov.f32 %f1783, 0f7F800000; - fma.rn.f32 %f2161, %f367, %f1783, %f1783; - -BB0_141: - setp.eq.f32 %p219, %f367, 0f00000000; - selp.f32 %f1784, 0fFF800000, %f2161, %p219; - mul.f32 %f1785, %f305, %f1784; - sub.f32 %f1786, %f366, %f1785; - add.f32 %f1787, %f305, %f1786; - add.f32 %f2162, %f2162, %f1787; - bra.uni BB0_142; - -BB0_136: - sub.f32 %f2162, %f2162, %f304; - -BB0_142: - add.s32 %r263, %r263, 1; - setp.lt.s32 %p220, %r263, %r53; - @%p220 bra BB0_92; - - st.local.v4.f32 [%rd2], {%f2144, %f2143, %f2142, %f2141}; - st.local.v4.f32 [%rd3+12], {%f2143, %f2140, %f2139, %f2138}; - st.local.v4.f32 [%rd3+28], {%f2142, %f2139, %f2137, %f2136}; - st.local.v4.f32 [%rd3+44], {%f2141, %f2138, %f2136, %f2135}; - add.s32 %r262, %r262, 1; - setp.lt.s32 %p221, %r262, %r53; - @%p221 bra BB0_91; - -BB0_144: - mov.f32 %f2164, 0f00000000; - ld.local.v4.f32 {%f1789, %f1790, %f1791, %f1792}, [%rd2]; - rcp.rn.f32 %f374, %f1789; - mul.f32 %f375, %f374, %f1790; - st.local.f32 [%rd3], %f375; - mul.f32 %f376, %f374, %f1791; - mul.f32 %f377, %f374, %f1792; - st.local.v2.f32 [%rd3+4], {%f376, %f377}; - ld.local.v4.f32 {%f1797, %f1798, %f1799, %f1800}, [%rd3+12]; - ld.local.f32 %f1805, [%rd3]; - fma.rn.f32 %f1806, %f1805, %f1797, 0f00000000; - sub.f32 %f1807, %f1798, %f1806; - ld.local.f32 %f378, [%rd3+12]; - st.local.f32 [%rd3+16], %f1807; - fma.rn.f32 %f1808, %f376, %f378, 0f00000000; - rcp.rn.f32 %f379, %f1807; - sub.f32 %f1809, %f1799, %f1808; - mul.f32 %f380, %f379, %f1809; - fma.rn.f32 %f1810, %f377, %f378, 0f00000000; - sub.f32 %f1811, %f1800, %f1810; - mul.f32 %f381, %f379, %f1811; - st.local.v2.f32 [%rd3+20], {%f380, %f381}; - ld.local.v2.f32 {%f1812, %f1813}, [%rd3+28]; - ld.local.f32 %f1816, [%rd3]; - fma.rn.f32 %f1817, %f1816, %f1812, 0f00000000; - sub.f32 %f382, %f1813, %f1817; - st.local.f32 [%rd3+32], %f382; - add.s64 %rd80, %rd2, 32; - add.s64 %rd79, %rd2, 8; - mov.u32 %r264, -1; - -BB0_145: - ld.local.f32 %f1818, [%rd80]; - ld.local.f32 %f1819, [%rd79]; - fma.rn.f32 %f2164, %f1819, %f1818, %f2164; - add.s64 %rd80, %rd80, 4; - add.s64 %rd79, %rd79, 16; - add.s32 %r264, %r264, 1; - setp.lt.s32 %p222, %r264, 1; - @%p222 bra BB0_145; - - ld.local.v4.f32 {%f1821, %f1822, %f1823, %f1824}, [%rd3+28]; - fma.rn.f32 %f1825, %f377, %f1821, 0f00000000; - fma.rn.f32 %f1826, %f381, %f382, %f1825; - sub.f32 %f1828, %f1823, %f2164; - rcp.rn.f32 %f386, %f1828; - sub.f32 %f1830, %f1824, %f1826; - mul.f32 %f387, %f386, %f1830; - ld.local.f32 %f1831, [%rd3]; - st.local.v2.f32 [%rd3+36], {%f1828, %f387}; - ld.local.v2.f32 {%f1832, %f1833}, [%rd3+44]; - fma.rn.f32 %f1836, %f1831, %f1832, 0f00000000; - sub.f32 %f388, %f1833, %f1836; - st.local.f32 [%rd3+48], %f388; - add.s64 %rd82, %rd2, 48; - add.s64 %rd81, %rd2, 8; - mov.f32 %f2165, 0f00000000; - mov.u32 %r265, -1; - -BB0_147: - ld.local.f32 %f1837, [%rd82]; - ld.local.f32 %f1838, [%rd81]; - fma.rn.f32 %f2165, %f1838, %f1837, %f2165; - add.s64 %rd82, %rd82, 4; - add.s64 %rd81, %rd81, 16; - add.s32 %r265, %r265, 1; - setp.lt.s32 %p223, %r265, 1; - @%p223 bra BB0_147; - - ld.local.f32 %f1840, [%rd3+52]; - sub.f32 %f391, %f1840, %f2165; - st.local.f32 [%rd3+52], %f391; - add.s64 %rd84, %rd2, 48; - add.s64 %rd83, %rd2, 12; - mov.f32 %f2166, 0f00000000; - mov.u32 %r266, -1; - -BB0_149: - ld.local.f32 %f1841, [%rd84]; - ld.local.f32 %f1842, [%rd83]; - fma.rn.f32 %f2166, %f1842, %f1841, %f2166; - add.s64 %rd84, %rd84, 4; - add.s64 %rd83, %rd83, 16; - add.s32 %r266, %r266, 1; - setp.lt.s32 %p224, %r266, 2; - @%p224 bra BB0_149; - - ld.param.u64 %rd78, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_6]; - ld.param.u64 %rd77, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_5]; - ld.param.u32 %r238, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_7]; - ld.param.u64 %rd76, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_4]; - mov.u32 %r237, %tid.x; - mov.u32 %r236, %ctaid.x; - mov.u32 %r235, %ntid.x; - mad.lo.s32 %r234, %r235, %r236, %r237; - ld.local.v4.f32 {%f1843, %f1844, %f1845, %f1846}, [%rd2+48]; - sub.f32 %f1849, %f1846, %f2166; - st.local.f32 [%rd2+60], %f1849; - add.f32 %f1850, %f375, 0f00000000; - mov.f32 %f1851, 0f00000000; - sub.f32 %f1852, %f1851, %f1850; - add.f32 %f1853, %f376, 0f00000000; - fma.rn.f32 %f1854, %f380, %f1852, %f1853; - sub.f32 %f1855, %f1851, %f1854; - add.f32 %f1856, %f377, 0f00000000; - fma.rn.f32 %f1857, %f381, %f1852, %f1856; - fma.rn.f32 %f1858, %f387, %f1855, %f1857; - sub.f32 %f1859, %f1851, %f1858; - div.rn.f32 %f1860, %f1859, %f1849; - fma.rn.f32 %f1861, %f391, %f1860, 0f00000000; - sub.f32 %f1862, %f1855, %f1861; - mul.f32 %f1863, %f386, %f1862; - fma.rn.f32 %f1864, %f382, %f1863, 0f00000000; - fma.rn.f32 %f1865, %f388, %f1860, %f1864; - sub.f32 %f1866, %f1852, %f1865; - mul.f32 %f1867, %f379, %f1866; - fma.rn.f32 %f1868, %f378, %f1867, 0f00000000; - fma.rn.f32 %f1869, %f1821, %f1863, %f1868; - fma.rn.f32 %f1870, %f1843, %f1860, %f1869; - mov.f32 %f1871, 0f3F800000; - sub.f32 %f1872, %f1871, %f1870; - mul.f32 %f1873, %f374, %f1872; - fma.rn.f32 %f1874, %f375, 0f00000000, 0f00000000; - sub.f32 %f1875, %f1871, %f1874; - fma.rn.f32 %f1876, %f376, 0f00000000, 0f00000000; - fma.rn.f32 %f1877, %f380, %f1875, %f1876; - sub.f32 %f1878, %f1851, %f1877; - fma.rn.f32 %f1879, %f377, 0f00000000, 0f00000000; - fma.rn.f32 %f1880, %f381, %f1875, %f1879; - fma.rn.f32 %f1881, %f387, %f1878, %f1880; - sub.f32 %f1882, %f1851, %f1881; - div.rn.f32 %f1883, %f1882, %f1849; - fma.rn.f32 %f1884, %f391, %f1883, 0f00000000; - sub.f32 %f1885, %f1878, %f1884; - mul.f32 %f1886, %f386, %f1885; - fma.rn.f32 %f1887, %f382, %f1886, 0f00000000; - fma.rn.f32 %f1888, %f388, %f1883, %f1887; - sub.f32 %f1889, %f1875, %f1888; - mul.f32 %f1890, %f379, %f1889; - sub.f32 %f1891, %f1851, %f1874; - fma.rn.f32 %f1892, %f380, %f1891, %f1876; - sub.f32 %f1893, %f1871, %f1892; - fma.rn.f32 %f1894, %f381, %f1891, %f1879; - fma.rn.f32 %f1895, %f387, %f1893, %f1894; - sub.f32 %f1896, %f1851, %f1895; - div.rn.f32 %f1897, %f1896, %f1849; - fma.rn.f32 %f1898, %f391, %f1897, 0f00000000; - sub.f32 %f1899, %f1893, %f1898; - mul.f32 %f1900, %f386, %f1899; - sub.f32 %f1901, %f1851, %f1892; - fma.rn.f32 %f1902, %f387, %f1901, %f1894; - sub.f32 %f1903, %f1871, %f1902; - div.rn.f32 %f1904, %f1903, %f1849; - cvta.to.global.u64 %rd62, %rd76; - mul.wide.s32 %rd63, %r234, 4; - add.s64 %rd64, %rd62, %rd63; - st.global.f32 [%rd64], %f2133; - shl.b32 %r233, %r238, 2; - cvt.s64.s32 %rd65, %r233; - add.s64 %rd66, %rd64, %rd65; - st.global.f32 [%rd66], %f2132; - add.s64 %rd67, %rd66, %rd65; - st.global.f32 [%rd67], %f2131; - add.s64 %rd68, %rd67, %rd65; - st.global.f32 [%rd68], %f2064; - cvta.to.global.u64 %rd69, %rd77; - add.s64 %rd70, %rd69, %rd63; - st.global.f32 [%rd70], %f1873; - add.s64 %rd71, %rd70, %rd65; - st.global.f32 [%rd71], %f1890; - add.s64 %rd72, %rd71, %rd65; - st.global.f32 [%rd72], %f1900; - add.s64 %rd73, %rd72, %rd65; - st.global.f32 [%rd73], %f1904; - cvta.to.global.u64 %rd74, %rd78; - add.s64 %rd75, %rd74, %rd63; - st.global.f32 [%rd75], %f2162; - -BB0_151: + rcp.approx.ftz.f32 %f1073, %f1072; + add.f32 %f1074, %f1071, %f1071; + mul.f32 %f1075, %f1074, %f1073; + mul.f32 %f1076, %f1075, %f1075; + fma.rn.f32 %f1077, %f1002, %f1076, %f1001; + fma.rn.f32 %f1078, %f1077, %f1076, %f1004; + mul.rn.f32 %f1079, %f1078, %f1076; + mul.rn.f32 %f1080, %f1079, %f1075; + sub.f32 %f1081, %f1071, %f1075; + add.f32 %f1082, %f1081, %f1081; + neg.f32 %f1083, %f1075; + fma.rn.f32 %f1084, %f1083, %f1071, %f1082; + mul.rn.f32 %f1085, %f1073, %f1084; + add.f32 %f1086, %f1080, %f1075; + sub.f32 %f1087, %f1075, %f1086; + add.f32 %f1088, %f1080, %f1087; + add.f32 %f1089, %f1085, %f1088; + add.f32 %f1090, %f1086, %f1089; + sub.f32 %f1091, %f1086, %f1090; + add.f32 %f1092, %f1089, %f1091; + mul.rn.f32 %f1093, %f1069, %f1020; + mul.rn.f32 %f1094, %f1069, %f1022; + add.f32 %f1095, %f1093, %f1090; + sub.f32 %f1096, %f1093, %f1095; + add.f32 %f1097, %f1090, %f1096; + add.f32 %f1098, %f1092, %f1097; + add.f32 %f1099, %f1094, %f1098; + add.f32 %f1100, %f1095, %f1099; + sub.f32 %f1101, %f1095, %f1100; + add.f32 %f1102, %f1099, %f1101; + mul.rn.f32 %f1103, %f940, %f1100; + neg.f32 %f1104, %f1103; + fma.rn.f32 %f1105, %f940, %f1100, %f1104; + fma.rn.f32 %f1106, %f940, %f1102, %f1105; + fma.rn.f32 %f1107, %f924, %f1100, %f1106; + add.rn.f32 %f1108, %f1103, %f1107; + neg.f32 %f1109, %f1108; + add.rn.f32 %f1110, %f1103, %f1109; + add.rn.f32 %f1111, %f1110, %f1107; + mov.b32 %r399, %f1108; + setp.eq.s32 %p314, %r399, 1118925336; + add.s32 %r400, %r399, -1; + mov.b32 %f1112, %r400; + add.f32 %f1113, %f1111, 0f37000000; + selp.f32 %f211, %f1113, %f1111, %p314; + selp.f32 %f1114, %f1112, %f1108, %p314; + mul.rn.f32 %f1115, %f1114, %f1045; + cvt.rzi.f32.f32 %f1116, %f1115; + abs.f32 %f1117, %f1116; + setp.gt.f32 %p315, %f1117, 0f42FC0000; + mov.b32 %r401, %f1116; + and.b32 %r402, %r401, -2147483648; + or.b32 %r403, %r402, 1123811328; + mov.b32 %f1118, %r403; + selp.f32 %f1119, %f1118, %f1116, %p315; + fma.rn.f32 %f1120, %f1119, %f1051, %f1114; + fma.rn.f32 %f1121, %f1119, %f1053, %f1120; + mul.f32 %f1122, %f1121, 0f3FB8AA3B; + add.f32 %f1123, %f1119, 0f4B40007F; + mov.b32 %r404, %f1123; + shl.b32 %r405, %r404, 23; + mov.b32 %f1124, %r405; + ex2.approx.ftz.f32 %f1125, %f1122; + mul.f32 %f212, %f1125, %f1124; + add.f32 %f213, %f204, 0f40000000; + setp.lt.f32 %p316, %f209, 0f00000000; + and.pred %p14, %p316, %p311; + selp.f32 %f214, 0fFF800000, 0f7F800000, %p13; + add.f32 %f1126, %f209, %f209; + selp.f32 %f215, %f1126, 0f00000000, %p311; + add.f32 %f1127, %f210, 0f40000000; + mov.b32 %r82, %f1127; + add.f32 %f216, %f209, 0f40000000; + selp.f32 %f217, 0fFF800000, 0f7F800000, %p14; + setp.geu.f32 %p15, %f204, 0f00000000; + setp.geu.f32 %p16, %f209, 0f00000000; + mov.u32 %r538, %r380; + +$L__BB0_208: + setp.ltu.f32 %p317, %f200, 0f3F8060FE; + mov.f32 %f1786, %f201; + @%p317 bra $L__BB0_210; + + ex2.approx.ftz.f32 %f1128, %f201; + sub.f32 %f1130, %f937, %f1128; + mov.b32 %r406, %f1130; + or.b32 %r407, %r79, %r406; + mov.b32 %f1786, %r407; + +$L__BB0_210: + setp.ltu.f32 %p318, %f202, 0f3F8060FE; + mov.f32 %f1787, %f203; + @%p318 bra $L__BB0_212; + + ex2.approx.ftz.f32 %f1131, %f203; + sub.f32 %f1133, %f937, %f1131; + mov.b32 %r408, %f1133; + or.b32 %r409, %r80, %r408; + mov.b32 %f1787, %r409; + +$L__BB0_212: + sub.f32 %f1134, %f1786, %f1787; + mul.f32 %f233, %f1134, 0f3F000000; + cvt.rn.f32.s32 %f234, %r538; + sub.f32 %f235, %f234, %f1762; + add.f32 %f1135, %f235, 0f3F000000; + mul.f32 %f236, %f187, %f1135; + abs.f32 %f1136, %f236; + setp.ltu.f32 %p319, %f1136, 0f3F8060FE; + setp.ge.f32 %p320, %f1136, 0f3F8060FE; + mul.f32 %f1137, %f236, %f236; + selp.f32 %f1138, %f1136, %f1137, %p320; + selp.f32 %f1139, 0f3789CA3C, 0f38B1E96A, %p320; + selp.f32 %f1140, 0fB9F560B9, 0fBA574D20, %p320; + fma.rn.f32 %f1141, %f1139, %f1138, %f1140; + selp.f32 %f1142, 0f3BAC840B, 0f3BAAD5EA, %p320; + fma.rn.f32 %f1143, %f1141, %f1138, %f1142; + selp.f32 %f1144, 0fBD0C8162, 0fBCDC1BE7, %p320; + fma.rn.f32 %f1145, %f1143, %f1138, %f1144; + selp.f32 %f1146, 0f3E1CF906, 0f3DE718AF, %p320; + fma.rn.f32 %f1147, %f1145, %f1138, %f1146; + selp.f32 %f1148, 0f3F6A937E, 0fBEC093AC, %p320; + fma.rn.f32 %f1149, %f1147, %f1138, %f1148; + selp.f32 %f1150, 0f3F20D842, 0f3E0375D3, %p320; + fma.rn.f32 %f1151, %f1149, %f1138, %f1150; + neg.f32 %f1152, %f1136; + selp.f32 %f1153, %f1152, %f236, %p320; + fma.rn.f32 %f1788, %f1151, %f1153, %f1153; + @%p319 bra $L__BB0_214; + + ex2.approx.ftz.f32 %f1154, %f1788; + sub.f32 %f1156, %f937, %f1154; + mov.b32 %r410, %f1156; + mov.b32 %r411, %f236; + and.b32 %r412, %r411, -2147483648; + or.b32 %r413, %r412, %r410; + mov.b32 %f1788, %r413; + +$L__BB0_214: + add.f32 %f240, %f235, 0fBF000000; + mul.f32 %f241, %f187, %f240; + abs.f32 %f1157, %f241; + setp.ltu.f32 %p321, %f1157, 0f3F8060FE; + setp.ge.f32 %p322, %f1157, 0f3F8060FE; + mul.f32 %f1158, %f241, %f241; + selp.f32 %f1159, %f1157, %f1158, %p322; + selp.f32 %f1160, 0f3789CA3C, 0f38B1E96A, %p322; + selp.f32 %f1161, 0fB9F560B9, 0fBA574D20, %p322; + fma.rn.f32 %f1162, %f1160, %f1159, %f1161; + selp.f32 %f1163, 0f3BAC840B, 0f3BAAD5EA, %p322; + fma.rn.f32 %f1164, %f1162, %f1159, %f1163; + selp.f32 %f1165, 0fBD0C8162, 0fBCDC1BE7, %p322; + fma.rn.f32 %f1166, %f1164, %f1159, %f1165; + selp.f32 %f1167, 0f3E1CF906, 0f3DE718AF, %p322; + fma.rn.f32 %f1168, %f1166, %f1159, %f1167; + selp.f32 %f1169, 0f3F6A937E, 0fBEC093AC, %p322; + fma.rn.f32 %f1170, %f1168, %f1159, %f1169; + selp.f32 %f1171, 0f3F20D842, 0f3E0375D3, %p322; + fma.rn.f32 %f1172, %f1170, %f1159, %f1171; + neg.f32 %f1173, %f1157; + selp.f32 %f1174, %f1173, %f241, %p322; + fma.rn.f32 %f1789, %f1172, %f1174, %f1174; + @%p321 bra $L__BB0_216; + + ex2.approx.ftz.f32 %f1175, %f1789; + sub.f32 %f1177, %f937, %f1175; + mov.b32 %r414, %f1177; + mov.b32 %r415, %f241; + and.b32 %r416, %r415, -2147483648; + or.b32 %r417, %r416, %r414; + mov.b32 %f1789, %r417; + +$L__BB0_216: + sub.f32 %f1179, %f1788, %f1789; + mul.f32 %f245, %f1179, 0f3F000000; + mul.f32 %f1180, %f233, %f1761; + fma.rn.f32 %f246, %f245, %f1180, %f1760; + mad.lo.s32 %r418, %r538, %r86, %r537; + add.s32 %r419, %r418, %r2; + mul.wide.s32 %rd26, %r419, 4; + add.s64 %rd27, %rd1, %rd26; + ld.global.f32 %f247, [%rd27]; + setp.eq.f32 %p323, %f207, 0f7F800000; + mov.f32 %f1790, 0f7F800000; + @%p323 bra $L__BB0_218; + + fma.rn.f32 %f1790, %f207, %f206, %f207; + +$L__BB0_218: + mov.b32 %r420, %f1790; + xor.b32 %r421, %r420, -2147483648; + mov.b32 %f1181, %r421; + selp.f32 %f250, %f1181, %f1790, %p13; + setp.eq.f32 %p324, %f204, 0f00000000; + selp.f32 %f1791, %f208, %f250, %p324; + @%p15 bra $L__BB0_221; + + cvt.rzi.f32.f32 %f1183, %f940; + setp.eq.f32 %p325, %f1183, 0f40000000; + mov.f32 %f1791, %f250; + @%p325 bra $L__BB0_221; + + mov.f32 %f1791, 0f7FFFFFFF; + +$L__BB0_221: + setp.eq.f32 %p326, %f212, 0f7F800000; + mov.f32 %f1792, 0f7F800000; + @%p326 bra $L__BB0_223; + + fma.rn.f32 %f1792, %f212, %f211, %f212; + +$L__BB0_223: + mov.b32 %r422, %f1792; + xor.b32 %r423, %r422, -2147483648; + mov.b32 %f1186, %r423; + selp.f32 %f255, %f1186, %f1792, %p14; + setp.eq.f32 %p327, %f209, 0f00000000; + selp.f32 %f1793, %f215, %f255, %p327; + @%p16 bra $L__BB0_226; + + cvt.rzi.f32.f32 %f1188, %f940; + setp.eq.f32 %p328, %f1188, 0f40000000; + mov.f32 %f1793, %f255; + @%p328 bra $L__BB0_226; + + mov.f32 %f1793, 0f7FFFFFFF; + +$L__BB0_226: + setp.gtu.f32 %p329, %f205, 0f7F800000; + mov.f32 %f1794, 0f7F800000; + selp.f32 %f1191, %f213, %f1791, %p329; + setp.neu.f32 %p330, %f205, 0f7F800000; + selp.f32 %f1192, %f1191, %f214, %p330; + setp.gt.s32 %p331, %r81, 2139095039; + selp.f32 %f1193, %f1192, %f1791, %p331; + mul.f32 %f1194, %f1193, 0fBF000000; + setp.eq.f32 %p332, %f204, 0f3F800000; + selp.f32 %f1195, 0fBF000000, %f1194, %p332; + mov.f32 %f1197, 0f3BBB989D; + fma.rn.f32 %f1198, %f1195, %f1197, %f350; + mov.f32 %f1200, 0f437C0000; + cvt.sat.f32.f32 %f1201, %f1198; + mov.f32 %f1202, 0f4B400001; + fma.rm.f32 %f1203, %f1201, %f1200, %f1202; + setp.gtu.f32 %p333, %f210, 0f7F800000; + selp.f32 %f1204, %f216, %f1793, %p333; + setp.neu.f32 %p334, %f210, 0f7F800000; + selp.f32 %f1205, %f1204, %f217, %p334; + setp.gt.s32 %p335, %r82, 2139095039; + selp.f32 %f1206, %f1205, %f1793, %p335; + mul.f32 %f1207, %f1206, 0fBF000000; + setp.eq.f32 %p336, %f209, 0f3F800000; + selp.f32 %f1208, 0fBF000000, %f1207, %p336; + fma.rn.f32 %f1209, %f1208, %f1197, %f350; + cvt.sat.f32.f32 %f1210, %f1209; + fma.rm.f32 %f1211, %f1210, %f1200, %f1202; + add.f32 %f1212, %f1211, 0fCB40007F; + neg.f32 %f1213, %f1212; + fma.rn.f32 %f1214, %f1208, %f1045, %f1213; + mov.f32 %f1215, 0f32A57060; + fma.rn.f32 %f1216, %f1208, %f1215, %f1214; + mov.b32 %r424, %f1211; + shl.b32 %r425, %r424, 23; + mov.b32 %f1217, %r425; + ex2.approx.ftz.f32 %f1218, %f1216; + mul.f32 %f1219, %f1218, %f1217; + mov.b32 %r426, %f1203; + shl.b32 %r427, %r426, 23; + mov.b32 %f1220, %r427; + add.f32 %f1221, %f1203, 0fCB40007F; + neg.f32 %f1222, %f1221; + fma.rn.f32 %f1223, %f1195, %f1045, %f1222; + fma.rn.f32 %f1224, %f1195, %f1215, %f1223; + ex2.approx.ftz.f32 %f1225, %f1224; + mul.f32 %f1226, %f1225, %f1220; + sub.f32 %f1227, %f1226, %f1219; + mul.f32 %f1228, %f186, %f1227; + mul.f32 %f258, %f245, %f1228; + add.f32 %f1229, %f234, 0f3F000000; + sub.f32 %f1230, %f1229, %f1762; + div.rn.f32 %f259, %f1230, %f317; + abs.f32 %f260, %f259; + setp.lt.f32 %p337, %f260, 0f00800000; + mul.f32 %f1231, %f260, 0f4B800000; + selp.f32 %f1232, %f1231, %f260, %p337; + selp.f32 %f1233, 0fC3170000, 0fC2FE0000, %p337; + mov.b32 %r428, %f1232; + and.b32 %r429, %r428, 8388607; + or.b32 %r430, %r429, 1065353216; + mov.b32 %f1234, %r430; + shr.u32 %r431, %r428, 23; + cvt.rn.f32.u32 %f1235, %r431; + add.f32 %f1236, %f1233, %f1235; + setp.gt.f32 %p338, %f1234, 0f3FB504F3; + mul.f32 %f1237, %f1234, 0f3F000000; + add.f32 %f1238, %f1236, 0f3F800000; + selp.f32 %f1239, %f1238, %f1236, %p338; + selp.f32 %f1240, %f1237, %f1234, %p338; + add.f32 %f1241, %f1240, 0fBF800000; + add.f32 %f1242, %f1240, 0f3F800000; + rcp.approx.ftz.f32 %f1243, %f1242; + add.f32 %f1244, %f1241, %f1241; + mul.f32 %f1246, %f1244, %f1243; + mul.f32 %f1247, %f1246, %f1246; + fma.rn.f32 %f1250, %f1002, %f1247, %f1001; + fma.rn.f32 %f1252, %f1250, %f1247, %f1004; + mul.rn.f32 %f1253, %f1252, %f1247; + mul.rn.f32 %f1254, %f1253, %f1246; + sub.f32 %f1255, %f1241, %f1246; + add.f32 %f1256, %f1255, %f1255; + neg.f32 %f1257, %f1246; + fma.rn.f32 %f1258, %f1257, %f1241, %f1256; + mul.rn.f32 %f1259, %f1243, %f1258; + add.f32 %f1260, %f1254, %f1246; + sub.f32 %f1261, %f1246, %f1260; + add.f32 %f1262, %f1254, %f1261; + add.f32 %f1263, %f1259, %f1262; + add.f32 %f1264, %f1260, %f1263; + sub.f32 %f1265, %f1260, %f1264; + add.f32 %f1266, %f1263, %f1265; + mul.rn.f32 %f1268, %f1239, %f1020; + mul.rn.f32 %f1270, %f1239, %f1022; + add.f32 %f1271, %f1268, %f1264; + sub.f32 %f1272, %f1268, %f1271; + add.f32 %f1273, %f1264, %f1272; + add.f32 %f1274, %f1266, %f1273; + add.f32 %f1275, %f1270, %f1274; + add.f32 %f1276, %f1271, %f1275; + sub.f32 %f1277, %f1271, %f1276; + add.f32 %f1278, %f1275, %f1277; + mul.rn.f32 %f1279, %f940, %f1276; + neg.f32 %f1280, %f1279; + fma.rn.f32 %f1281, %f940, %f1276, %f1280; + fma.rn.f32 %f1282, %f940, %f1278, %f1281; + mov.f32 %f1283, 0f00000000; + fma.rn.f32 %f1284, %f1283, %f1276, %f1282; + add.rn.f32 %f1285, %f1279, %f1284; + neg.f32 %f1286, %f1285; + add.rn.f32 %f1287, %f1279, %f1286; + add.rn.f32 %f1288, %f1287, %f1284; + mov.b32 %r432, %f1285; + setp.eq.s32 %p339, %r432, 1118925336; + add.s32 %r433, %r432, -1; + mov.b32 %f1289, %r433; + add.f32 %f1290, %f1288, 0f37000000; + selp.f32 %f261, %f1290, %f1288, %p339; + selp.f32 %f1291, %f1289, %f1285, %p339; + mul.rn.f32 %f1292, %f1291, %f1045; + cvt.rzi.f32.f32 %f1293, %f1292; + abs.f32 %f1294, %f1293; + setp.gt.f32 %p340, %f1294, 0f42FC0000; + mov.b32 %r434, %f1293; + and.b32 %r435, %r434, -2147483648; + or.b32 %r436, %r435, 1123811328; + mov.b32 %f1295, %r436; + selp.f32 %f1296, %f1295, %f1293, %p340; + fma.rn.f32 %f1298, %f1296, %f1051, %f1291; + fma.rn.f32 %f1300, %f1296, %f1053, %f1298; + mul.f32 %f1301, %f1300, 0f3FB8AA3B; + add.f32 %f1302, %f1296, 0f4B40007F; + mov.b32 %r437, %f1302; + shl.b32 %r438, %r437, 23; + mov.b32 %f1303, %r438; + ex2.approx.ftz.f32 %f1304, %f1301; + mul.f32 %f262, %f1304, %f1303; + setp.eq.f32 %p341, %f262, 0f7F800000; + @%p341 bra $L__BB0_228; + + fma.rn.f32 %f1794, %f262, %f261, %f262; + +$L__BB0_228: + setp.lt.f32 %p342, %f259, 0f00000000; + and.pred %p17, %p342, %p311; + setp.eq.f32 %p344, %f259, 0f00000000; + @%p344 bra $L__BB0_232; + bra.uni $L__BB0_229; + +$L__BB0_232: + add.f32 %f1309, %f259, %f259; + selp.f32 %f1796, %f1309, 0f00000000, %p311; + bra.uni $L__BB0_233; + +$L__BB0_229: + mov.b32 %r439, %f1794; + xor.b32 %r440, %r439, -2147483648; + mov.b32 %f1305, %r440; + selp.f32 %f1796, %f1305, %f1794, %p17; + setp.geu.f32 %p345, %f259, 0f00000000; + @%p345 bra $L__BB0_233; + + cvt.rzi.f32.f32 %f1307, %f940; + setp.eq.f32 %p346, %f1307, 0f40000000; + @%p346 bra $L__BB0_233; + + mov.f32 %f1796, 0f7FFFFFFF; + +$L__BB0_233: + add.f32 %f1310, %f260, 0f40000000; + mov.b32 %r441, %f1310; + setp.lt.s32 %p348, %r441, 2139095040; + @%p348 bra $L__BB0_238; + + setp.gtu.f32 %p349, %f260, 0f7F800000; + @%p349 bra $L__BB0_237; + bra.uni $L__BB0_235; + +$L__BB0_237: + add.f32 %f1796, %f259, 0f40000000; + bra.uni $L__BB0_238; + +$L__BB0_235: + setp.neu.f32 %p350, %f260, 0f7F800000; + @%p350 bra $L__BB0_238; + + selp.f32 %f1796, 0fFF800000, 0f7F800000, %p17; + +$L__BB0_238: + mul.f32 %f1312, %f1796, 0fBF000000; + setp.eq.f32 %p351, %f259, 0f3F800000; + selp.f32 %f1313, 0fBF000000, %f1312, %p351; + fma.rn.f32 %f1316, %f1313, %f1197, %f350; + cvt.sat.f32.f32 %f1319, %f1316; + fma.rm.f32 %f1321, %f1319, %f1200, %f1202; + add.f32 %f1322, %f1321, 0fCB40007F; + neg.f32 %f1323, %f1322; + fma.rn.f32 %f1324, %f1313, %f1045, %f1323; + fma.rn.f32 %f1326, %f1313, %f1215, %f1324; + mov.b32 %r442, %f1321; + shl.b32 %r443, %r442, 23; + mov.b32 %f1327, %r443; + ex2.approx.ftz.f32 %f1328, %f1326; + mul.f32 %f271, %f1328, %f1327; + div.rn.f32 %f272, %f240, %f317; + abs.f32 %f273, %f272; + setp.lt.f32 %p352, %f273, 0f00800000; + mul.f32 %f1329, %f273, 0f4B800000; + selp.f32 %f1330, %f1329, %f273, %p352; + selp.f32 %f1331, 0fC3170000, 0fC2FE0000, %p352; + mov.b32 %r444, %f1330; + and.b32 %r445, %r444, 8388607; + or.b32 %r446, %r445, 1065353216; + mov.b32 %f1332, %r446; + shr.u32 %r447, %r444, 23; + cvt.rn.f32.u32 %f1333, %r447; + add.f32 %f1334, %f1331, %f1333; + setp.gt.f32 %p353, %f1332, 0f3FB504F3; + mul.f32 %f1335, %f1332, 0f3F000000; + add.f32 %f1336, %f1334, 0f3F800000; + selp.f32 %f1337, %f1336, %f1334, %p353; + selp.f32 %f1338, %f1335, %f1332, %p353; + add.f32 %f1339, %f1338, 0fBF800000; + add.f32 %f1340, %f1338, 0f3F800000; + rcp.approx.ftz.f32 %f1341, %f1340; + add.f32 %f1342, %f1339, %f1339; + mul.f32 %f1344, %f1342, %f1341; + mul.f32 %f1345, %f1344, %f1344; + fma.rn.f32 %f1348, %f1002, %f1345, %f1001; + fma.rn.f32 %f1350, %f1348, %f1345, %f1004; + mul.rn.f32 %f1351, %f1350, %f1345; + mul.rn.f32 %f1352, %f1351, %f1344; + sub.f32 %f1353, %f1339, %f1344; + add.f32 %f1354, %f1353, %f1353; + neg.f32 %f1355, %f1344; + fma.rn.f32 %f1356, %f1355, %f1339, %f1354; + mul.rn.f32 %f1357, %f1341, %f1356; + add.f32 %f1358, %f1352, %f1344; + sub.f32 %f1359, %f1344, %f1358; + add.f32 %f1360, %f1352, %f1359; + add.f32 %f1361, %f1357, %f1360; + add.f32 %f1362, %f1358, %f1361; + sub.f32 %f1363, %f1358, %f1362; + add.f32 %f1364, %f1361, %f1363; + mul.rn.f32 %f1366, %f1337, %f1020; + mul.rn.f32 %f1368, %f1337, %f1022; + add.f32 %f1369, %f1366, %f1362; + sub.f32 %f1370, %f1366, %f1369; + add.f32 %f1371, %f1362, %f1370; + add.f32 %f1372, %f1364, %f1371; + add.f32 %f1373, %f1368, %f1372; + add.f32 %f1374, %f1369, %f1373; + sub.f32 %f1375, %f1369, %f1374; + add.f32 %f1376, %f1373, %f1375; + mul.rn.f32 %f1377, %f940, %f1374; + neg.f32 %f1378, %f1377; + fma.rn.f32 %f1379, %f940, %f1374, %f1378; + fma.rn.f32 %f1380, %f940, %f1376, %f1379; + fma.rn.f32 %f1382, %f1283, %f1374, %f1380; + add.rn.f32 %f1383, %f1377, %f1382; + neg.f32 %f1384, %f1383; + add.rn.f32 %f1385, %f1377, %f1384; + add.rn.f32 %f1386, %f1385, %f1382; + mov.b32 %r448, %f1383; + setp.eq.s32 %p354, %r448, 1118925336; + add.s32 %r449, %r448, -1; + mov.b32 %f1387, %r449; + add.f32 %f1388, %f1386, 0f37000000; + selp.f32 %f274, %f1388, %f1386, %p354; + selp.f32 %f1389, %f1387, %f1383, %p354; + mul.rn.f32 %f1390, %f1389, %f1045; + cvt.rzi.f32.f32 %f1391, %f1390; + abs.f32 %f1392, %f1391; + setp.gt.f32 %p355, %f1392, 0f42FC0000; + mov.b32 %r450, %f1391; + and.b32 %r451, %r450, -2147483648; + or.b32 %r452, %r451, 1123811328; + mov.b32 %f1393, %r452; + selp.f32 %f1394, %f1393, %f1391, %p355; + fma.rn.f32 %f1396, %f1394, %f1051, %f1389; + fma.rn.f32 %f1398, %f1394, %f1053, %f1396; + mul.f32 %f1399, %f1398, 0f3FB8AA3B; + add.f32 %f1400, %f1394, 0f4B40007F; + mov.b32 %r453, %f1400; + shl.b32 %r454, %r453, 23; + mov.b32 %f1401, %r454; + ex2.approx.ftz.f32 %f1402, %f1399; + mul.f32 %f275, %f1402, %f1401; + setp.eq.f32 %p356, %f275, 0f7F800000; + mov.f32 %f1797, 0f7F800000; + @%p356 bra $L__BB0_240; + + fma.rn.f32 %f1797, %f275, %f274, %f275; + +$L__BB0_240: + setp.lt.f32 %p357, %f272, 0f00000000; + and.pred %p18, %p357, %p311; + setp.eq.f32 %p359, %f272, 0f00000000; + @%p359 bra $L__BB0_244; + bra.uni $L__BB0_241; + +$L__BB0_244: + add.f32 %f1407, %f272, %f272; + selp.f32 %f1799, %f1407, 0f00000000, %p311; + bra.uni $L__BB0_245; + +$L__BB0_241: + mov.b32 %r455, %f1797; + xor.b32 %r456, %r455, -2147483648; + mov.b32 %f1403, %r456; + selp.f32 %f1799, %f1403, %f1797, %p18; + setp.geu.f32 %p360, %f272, 0f00000000; + @%p360 bra $L__BB0_245; + + cvt.rzi.f32.f32 %f1405, %f940; + setp.eq.f32 %p361, %f1405, 0f40000000; + @%p361 bra $L__BB0_245; + + mov.f32 %f1799, 0f7FFFFFFF; + +$L__BB0_245: + add.f32 %f1408, %f273, 0f40000000; + mov.b32 %r457, %f1408; + setp.lt.s32 %p363, %r457, 2139095040; + @%p363 bra $L__BB0_250; + + setp.gtu.f32 %p364, %f273, 0f7F800000; + @%p364 bra $L__BB0_249; + bra.uni $L__BB0_247; + +$L__BB0_249: + add.f32 %f1799, %f272, 0f40000000; + bra.uni $L__BB0_250; + +$L__BB0_247: + setp.neu.f32 %p365, %f273, 0f7F800000; + @%p365 bra $L__BB0_250; + + selp.f32 %f1799, 0fFF800000, 0f7F800000, %p18; + +$L__BB0_250: + mul.f32 %f1409, %f1799, 0fBF000000; + setp.eq.f32 %p366, %f272, 0f3F800000; + selp.f32 %f1410, 0fBF000000, %f1409, %p366; + fma.rn.f32 %f1413, %f1410, %f1197, %f350; + cvt.sat.f32.f32 %f1416, %f1413; + fma.rm.f32 %f1418, %f1416, %f1200, %f1202; + add.f32 %f1419, %f1418, 0fCB40007F; + neg.f32 %f1420, %f1419; + fma.rn.f32 %f1421, %f1410, %f1045, %f1420; + fma.rn.f32 %f1423, %f1410, %f1215, %f1421; + mov.b32 %r458, %f1418; + shl.b32 %r459, %r458, 23; + mov.b32 %f1424, %r459; + ex2.approx.ftz.f32 %f1425, %f1423; + mul.f32 %f1426, %f1425, %f1424; + sub.f32 %f1427, %f271, %f1426; + mul.f32 %f1428, %f186, %f1427; + mul.f32 %f1429, %f233, %f1428; + mul.f32 %f1430, %f258, %f258; + div.rn.f32 %f1431, %f1430, %f246; + add.f32 %f1781, %f1781, %f1431; + mul.f32 %f1432, %f1429, %f258; + div.rn.f32 %f1433, %f1432, %f246; + add.f32 %f1780, %f1780, %f1433; + mul.f32 %f1434, %f233, %f245; + mul.f32 %f1435, %f1434, %f258; + div.rn.f32 %f1436, %f1435, %f246; + add.f32 %f1779, %f1779, %f1436; + div.rn.f32 %f1437, %f258, %f246; + add.f32 %f1778, %f1778, %f1437; + mul.f32 %f1438, %f1429, %f1429; + div.rn.f32 %f1439, %f1438, %f246; + add.f32 %f1777, %f1777, %f1439; + mul.f32 %f1440, %f1434, %f1429; + div.rn.f32 %f1441, %f1440, %f246; + add.f32 %f1776, %f1776, %f1441; + div.rn.f32 %f1442, %f1429, %f246; + add.f32 %f1775, %f1775, %f1442; + mul.f32 %f1443, %f1434, %f1434; + div.rn.f32 %f1444, %f1443, %f246; + add.f32 %f1782, %f1782, %f1444; + div.rn.f32 %f1445, %f1434, %f246; + add.f32 %f1783, %f1783, %f1445; + rcp.rn.f32 %f1446, %f246; + add.f32 %f1784, %f1784, %f1446; + setp.leu.f32 %p367, %f246, 0f00000000; + @%p367 bra $L__BB0_258; + + setp.gt.f32 %p368, %f247, 0f00000000; + @%p368 bra $L__BB0_253; + bra.uni $L__BB0_252; + +$L__BB0_253: + setp.lt.f32 %p369, %f246, 0f00800000; + mul.f32 %f1447, %f246, 0f4B000000; + selp.f32 %f295, %f1447, %f246, %p369; + selp.f32 %f1448, 0fC1B80000, 0f00000000, %p369; + mov.b32 %r460, %f295; + add.s32 %r461, %r460, -1059760811; + and.b32 %r462, %r461, -8388608; + sub.s32 %r463, %r460, %r462; + mov.b32 %f1449, %r463; + cvt.rn.f32.s32 %f1450, %r462; + mov.f32 %f1451, 0f34000000; + fma.rn.f32 %f1452, %f1450, %f1451, %f1448; + add.f32 %f1453, %f1449, 0fBF800000; + mov.f32 %f1454, 0f3E1039F6; + mov.f32 %f1455, 0fBE055027; + fma.rn.f32 %f1456, %f1455, %f1453, %f1454; + mov.f32 %f1457, 0fBDF8CDCC; + fma.rn.f32 %f1458, %f1456, %f1453, %f1457; + mov.f32 %f1459, 0f3E0F2955; + fma.rn.f32 %f1460, %f1458, %f1453, %f1459; + mov.f32 %f1461, 0fBE2AD8B9; + fma.rn.f32 %f1462, %f1460, %f1453, %f1461; + mov.f32 %f1463, 0f3E4CED0B; + fma.rn.f32 %f1464, %f1462, %f1453, %f1463; + mov.f32 %f1465, 0fBE7FFF22; + fma.rn.f32 %f1466, %f1464, %f1453, %f1465; + mov.f32 %f1467, 0f3EAAAA78; + fma.rn.f32 %f1468, %f1466, %f1453, %f1467; + mov.f32 %f1469, 0fBF000000; + fma.rn.f32 %f1470, %f1468, %f1453, %f1469; + mul.f32 %f1471, %f1453, %f1470; + fma.rn.f32 %f1472, %f1471, %f1453, %f1453; + mov.f32 %f1473, 0f3F317218; + fma.rn.f32 %f1800, %f1452, %f1473, %f1472; + setp.lt.u32 %p370, %r460, 2139095040; + @%p370 bra $L__BB0_255; + + mov.f32 %f1474, 0f7F800000; + fma.rn.f32 %f1800, %f295, %f1474, %f1474; + +$L__BB0_255: + setp.eq.f32 %p371, %f295, 0f00000000; + selp.f32 %f1475, 0fFF800000, %f1800, %p371; + mul.f32 %f1476, %f247, %f1475; + sub.f32 %f299, %f1476, %f246; + mul.f32 %f1477, %f247, 0f4B000000; + setp.lt.f32 %p372, %f247, 0f00800000; + selp.f32 %f300, %f1477, %f247, %p372; + selp.f32 %f1478, 0fC1B80000, 0f00000000, %p372; + mov.b32 %r464, %f300; + add.s32 %r465, %r464, -1059760811; + and.b32 %r466, %r465, -8388608; + sub.s32 %r467, %r464, %r466; + mov.b32 %f1479, %r467; + cvt.rn.f32.s32 %f1480, %r466; + fma.rn.f32 %f1482, %f1480, %f1451, %f1478; + add.f32 %f1483, %f1479, 0fBF800000; + fma.rn.f32 %f1486, %f1455, %f1483, %f1454; + fma.rn.f32 %f1488, %f1486, %f1483, %f1457; + fma.rn.f32 %f1490, %f1488, %f1483, %f1459; + fma.rn.f32 %f1492, %f1490, %f1483, %f1461; + fma.rn.f32 %f1494, %f1492, %f1483, %f1463; + fma.rn.f32 %f1496, %f1494, %f1483, %f1465; + fma.rn.f32 %f1498, %f1496, %f1483, %f1467; + fma.rn.f32 %f1500, %f1498, %f1483, %f1469; + mul.f32 %f1501, %f1483, %f1500; + fma.rn.f32 %f1502, %f1501, %f1483, %f1483; + fma.rn.f32 %f1801, %f1482, %f1473, %f1502; + setp.lt.u32 %p373, %r464, 2139095040; + @%p373 bra $L__BB0_257; + + mov.f32 %f1504, 0f7F800000; + fma.rn.f32 %f1801, %f300, %f1504, %f1504; + +$L__BB0_257: + setp.eq.f32 %p374, %f300, 0f00000000; + selp.f32 %f1505, 0fFF800000, %f1801, %p374; + mul.f32 %f1506, %f247, %f1505; + sub.f32 %f1507, %f299, %f1506; + add.f32 %f1508, %f247, %f1507; + add.f32 %f1802, %f1802, %f1508; + bra.uni $L__BB0_258; + +$L__BB0_252: + sub.f32 %f1802, %f1802, %f246; + +$L__BB0_258: + add.s32 %r538, %r538, 1; + setp.lt.s32 %p375, %r538, %r86; + @%p375 bra $L__BB0_208; + + add.s32 %r537, %r537, 1; + setp.lt.s32 %p376, %r537, %r86; + @%p376 bra $L__BB0_207; + +$L__BB0_260: + ld.param.u64 %rd46, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_5]; + ld.param.u32 %r478, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_7]; + ld.param.u64 %rd45, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_4]; + ld.param.u64 %rd44, [_Z19kernel_MLEFit_XYNB_PKffiiPfS1_S1_i_param_6]; + mov.u32 %r477, %tid.x; + mov.u32 %r476, %ntid.x; + mov.u32 %r475, %ctaid.x; + mad.lo.s32 %r474, %r475, %r476, %r477; + cvta.to.global.u64 %rd28, %rd44; + rcp.rn.f32 %f1509, %f1781; + mov.f32 %f1510, 0f3F800000; + mul.f32 %f1511, %f1509, %f1780; + mul.f32 %f1512, %f1509, %f1779; + mul.f32 %f1513, %f1509, %f1778; + fma.rn.f32 %f1514, %f1511, %f1780, 0f00000000; + sub.f32 %f1516, %f1777, %f1514; + fma.rn.f32 %f1517, %f1512, %f1780, 0f00000000; + rcp.rn.f32 %f1518, %f1516; + sub.f32 %f1519, %f1776, %f1517; + mul.f32 %f1520, %f1518, %f1519; + fma.rn.f32 %f1521, %f1513, %f1780, 0f00000000; + sub.f32 %f1522, %f1775, %f1521; + mul.f32 %f1523, %f1518, %f1522; + fma.rn.f32 %f1524, %f1511, %f1779, 0f00000000; + sub.f32 %f1525, %f1776, %f1524; + fma.rn.f32 %f1526, %f1512, %f1779, 0f00000000; + fma.rn.f32 %f1527, %f1520, %f1525, %f1526; + sub.f32 %f1528, %f1782, %f1527; + fma.rn.f32 %f1529, %f1513, %f1779, 0f00000000; + fma.rn.f32 %f1530, %f1523, %f1525, %f1529; + rcp.rn.f32 %f1531, %f1528; + sub.f32 %f1532, %f1783, %f1530; + mul.f32 %f1533, %f1531, %f1532; + fma.rn.f32 %f1534, %f1511, %f1778, 0f00000000; + sub.f32 %f1535, %f1775, %f1534; + fma.rn.f32 %f1536, %f1512, %f1778, 0f00000000; + fma.rn.f32 %f1537, %f1520, %f1535, %f1536; + sub.f32 %f1538, %f1783, %f1537; + fma.rn.f32 %f1539, %f1513, %f1778, 0f00000000; + fma.rn.f32 %f1540, %f1523, %f1535, %f1539; + fma.rn.f32 %f1541, %f1533, %f1538, %f1540; + sub.f32 %f1542, %f1784, %f1541; + add.f32 %f1543, %f1511, 0f00000000; + sub.f32 %f1544, %f924, %f1543; + add.f32 %f1545, %f1512, 0f00000000; + fma.rn.f32 %f1546, %f1520, %f1544, %f1545; + sub.f32 %f1547, %f924, %f1546; + add.f32 %f1548, %f1513, 0f00000000; + fma.rn.f32 %f1549, %f1523, %f1544, %f1548; + fma.rn.f32 %f1550, %f1533, %f1547, %f1549; + sub.f32 %f1551, %f924, %f1550; + div.rn.f32 %f1552, %f1551, %f1542; + fma.rn.f32 %f1553, %f1538, %f1552, 0f00000000; + sub.f32 %f1554, %f1547, %f1553; + mul.f32 %f1555, %f1531, %f1554; + fma.rn.f32 %f1556, %f1525, %f1555, 0f00000000; + fma.rn.f32 %f1557, %f1535, %f1552, %f1556; + sub.f32 %f1558, %f1544, %f1557; + mul.f32 %f1559, %f1518, %f1558; + fma.rn.f32 %f1560, %f1780, %f1559, 0f00000000; + fma.rn.f32 %f1561, %f1779, %f1555, %f1560; + fma.rn.f32 %f1562, %f1778, %f1552, %f1561; + sub.f32 %f1563, %f1510, %f1562; + mul.f32 %f1564, %f1509, %f1563; + fma.rn.f32 %f1565, %f1511, 0f00000000, 0f00000000; + sub.f32 %f1566, %f1510, %f1565; + fma.rn.f32 %f1567, %f1512, 0f00000000, 0f00000000; + fma.rn.f32 %f1568, %f1520, %f1566, %f1567; + sub.f32 %f1569, %f924, %f1568; + fma.rn.f32 %f1570, %f1513, 0f00000000, 0f00000000; + fma.rn.f32 %f1571, %f1523, %f1566, %f1570; + fma.rn.f32 %f1572, %f1533, %f1569, %f1571; + sub.f32 %f1573, %f924, %f1572; + div.rn.f32 %f1574, %f1573, %f1542; + fma.rn.f32 %f1575, %f1538, %f1574, 0f00000000; + sub.f32 %f1576, %f1569, %f1575; + mul.f32 %f1577, %f1531, %f1576; + fma.rn.f32 %f1578, %f1525, %f1577, 0f00000000; + fma.rn.f32 %f1579, %f1535, %f1574, %f1578; + sub.f32 %f1580, %f1566, %f1579; + mul.f32 %f1581, %f1518, %f1580; + sub.f32 %f1582, %f924, %f1565; + fma.rn.f32 %f1583, %f1520, %f1582, %f1567; + sub.f32 %f1584, %f1510, %f1583; + fma.rn.f32 %f1585, %f1523, %f1582, %f1570; + fma.rn.f32 %f1586, %f1533, %f1584, %f1585; + sub.f32 %f1587, %f924, %f1586; + div.rn.f32 %f1588, %f1587, %f1542; + fma.rn.f32 %f1589, %f1538, %f1588, 0f00000000; + sub.f32 %f1590, %f1584, %f1589; + mul.f32 %f1591, %f1531, %f1590; + sub.f32 %f1592, %f924, %f1583; + fma.rn.f32 %f1593, %f1533, %f1592, %f1585; + sub.f32 %f1594, %f1510, %f1593; + div.rn.f32 %f1595, %f1594, %f1542; + cvta.to.global.u64 %rd29, %rd45; + mul.wide.s32 %rd30, %r474, 4; + add.s64 %rd31, %rd29, %rd30; + st.global.f32 [%rd31], %f1763; + mul.wide.s32 %rd32, %r478, 4; + add.s64 %rd33, %rd31, %rd32; + st.global.f32 [%rd33], %f1762; + add.s32 %r468, %r474, %r478; + add.s32 %r469, %r468, %r478; + mul.wide.s32 %rd34, %r469, 4; + add.s64 %rd35, %rd29, %rd34; + st.global.f32 [%rd35], %f1761; + add.s32 %r470, %r469, %r478; + mul.wide.s32 %rd36, %r470, 4; + add.s64 %rd37, %rd29, %rd36; + st.global.f32 [%rd37], %f1760; + cvta.to.global.u64 %rd38, %rd46; + add.s64 %rd39, %rd38, %rd30; + st.global.f32 [%rd39], %f1564; + add.s64 %rd40, %rd39, %rd32; + st.global.f32 [%rd40], %f1581; + add.s64 %rd41, %rd38, %rd34; + st.global.f32 [%rd41], %f1591; + add.s64 %rd42, %rd38, %rd36; + st.global.f32 [%rd42], %f1595; + add.s64 %rd43, %rd28, %rd30; + st.global.f32 [%rd43], %f1802; + +$L__BB0_261: ret; -} +} // .globl _Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i .visible .entry _Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i( .param .u64 _Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_0, @@ -2721,3921 +3641,5808 @@ BB0_151: .param .u32 _Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_7 ) { - .local .align 4 .b8 __local_depot1[100]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<371>; - .reg .f32 %f<3132>; - .reg .b32 %r<319>; - .reg .b64 %rd<107>; - - - mov.u64 %SPL, __local_depot1; - ld.param.u64 %rd44, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_0]; - ld.param.f32 %f3076, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_1]; - ld.param.u32 %r63, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_2]; - ld.param.u32 %r64, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_3]; - ld.param.u32 %r65, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_7]; - cvta.to.global.u64 %rd1, %rd44; - add.u64 %rd2, %SPL, 0; - mov.u32 %r66, %ntid.x; - mov.u32 %r67, %ctaid.x; - mov.u32 %r68, %tid.x; - mad.lo.s32 %r1, %r66, %r67, %r68; - setp.ge.s32 %p13, %r1, %r65; - @%p13 bra BB1_218; - - mov.u32 %r69, 0; - mov.u64 %rd94, %rd2; - mov.u32 %r295, %r69; - -BB1_2: - st.local.u32 [%rd94], %r69; - add.s64 %rd94, %rd94, 4; - add.s32 %r295, %r295, 1; - setp.lt.u32 %p14, %r295, 25; - @%p14 bra BB1_2; - - mul.lo.s32 %r71, %r63, %r63; - mul.lo.s32 %r4, %r71, %r1; - mov.f32 %f560, 0f00000000; - setp.lt.s32 %p15, %r63, 1; - mov.f32 %f1, %f560; - mov.f32 %f2, %f560; - mov.f32 %f3, %f560; - @%p15 bra BB1_17; - - and.b32 %r5, %r63, 3; - shl.b32 %r6, %r63, 2; - mov.f32 %f563, 0f00000000; - mov.u32 %r72, 0; - mov.u32 %r296, %r72; - mov.f32 %f1, %f563; - mov.f32 %f2, %f563; - mov.f32 %f3, %f563; - -BB1_5: - cvt.rn.f32.s32 %f4, %r296; - setp.eq.s32 %p16, %r5, 0; - @%p16 bra BB1_6; - - setp.eq.s32 %p17, %r5, 1; - @%p17 bra BB1_8; - bra.uni BB1_9; - -BB1_8: - mov.u32 %r298, %r72; - bra.uni BB1_13; - -BB1_6: - mov.u32 %r300, %r72; - mov.f32 %f2982, %f1; - mov.f32 %f2983, %f2; - mov.f32 %f2984, %f3; - mov.f32 %f1, %f563; - mov.f32 %f2, %f563; - mov.f32 %f3, %f563; - bra.uni BB1_14; - -BB1_9: - setp.eq.s32 %p18, %r5, 2; - @%p18 bra BB1_10; - bra.uni BB1_11; - -BB1_10: - mov.u32 %r297, %r72; - bra.uni BB1_12; - -BB1_11: - add.s32 %r77, %r296, %r4; - mul.wide.s32 %rd50, %r77, 4; - add.s64 %rd51, %rd1, %rd50; - ld.global.f32 %f567, [%rd51]; - fma.rn.f32 %f3, %f4, %f567, %f3; - fma.rn.f32 %f2, %f567, 0f00000000, %f2; - add.f32 %f1, %f1, %f567; - mov.u32 %r297, 1; - -BB1_12: - neg.s32 %r78, %r297; - and.b32 %r79, %r78, %r63; - add.s32 %r80, %r79, %r296; - add.s32 %r81, %r80, %r4; - mul.wide.s32 %rd52, %r81, 4; - add.s64 %rd53, %rd1, %rd52; - ld.global.f32 %f568, [%rd53]; - fma.rn.f32 %f3, %f4, %f568, %f3; - cvt.rn.f32.s32 %f569, %r297; - fma.rn.f32 %f2, %f569, %f568, %f2; - add.f32 %f1, %f1, %f568; - add.s32 %r298, %r297, 1; - -BB1_13: - mad.lo.s32 %r82, %r298, %r63, %r296; - add.s32 %r83, %r82, %r4; - mul.wide.s32 %rd54, %r83, 4; - add.s64 %rd55, %rd1, %rd54; - ld.global.f32 %f570, [%rd55]; - fma.rn.f32 %f2984, %f4, %f570, %f3; - cvt.rn.f32.s32 %f571, %r298; - fma.rn.f32 %f2983, %f571, %f570, %f2; - add.f32 %f2982, %f1, %f570; - add.s32 %r300, %r298, 1; - mov.f32 %f1, %f2982; - mov.f32 %f2, %f2983; - mov.f32 %f3, %f2984; - -BB1_14: - setp.lt.u32 %p19, %r63, 4; - @%p19 bra BB1_16; - -BB1_15: - mad.lo.s32 %r84, %r300, %r63, %r296; - add.s32 %r85, %r84, %r4; - mul.wide.s32 %rd56, %r85, 4; - add.s64 %rd57, %rd1, %rd56; - ld.global.f32 %f572, [%rd57]; - fma.rn.f32 %f573, %f4, %f572, %f2984; - cvt.rn.f32.s32 %f574, %r300; - fma.rn.f32 %f575, %f574, %f572, %f2983; - add.f32 %f576, %f2982, %f572; - cvt.s64.s32 %rd58, %r6; - add.s64 %rd59, %rd57, %rd58; - ld.global.f32 %f577, [%rd59]; - fma.rn.f32 %f578, %f4, %f577, %f573; - add.s32 %r86, %r300, 1; - cvt.rn.f32.s32 %f579, %r86; - fma.rn.f32 %f580, %f579, %f577, %f575; - add.f32 %f581, %f576, %f577; - add.s64 %rd60, %rd59, %rd58; - ld.global.f32 %f582, [%rd60]; - fma.rn.f32 %f583, %f4, %f582, %f578; - add.s32 %r87, %r300, 2; - cvt.rn.f32.s32 %f584, %r87; - fma.rn.f32 %f585, %f584, %f582, %f580; - add.f32 %f586, %f581, %f582; - add.s64 %rd61, %rd60, %rd58; - ld.global.f32 %f587, [%rd61]; - fma.rn.f32 %f2984, %f4, %f587, %f583; - add.s32 %r88, %r300, 3; - cvt.rn.f32.s32 %f588, %r88; - fma.rn.f32 %f2983, %f588, %f587, %f585; - add.f32 %f2982, %f586, %f587; - add.s32 %r300, %r300, 4; - setp.lt.s32 %p20, %r300, %r63; - mov.f32 %f1, %f2982; - mov.f32 %f2, %f2983; - mov.f32 %f3, %f2984; - @%p20 bra BB1_15; - -BB1_16: - add.s32 %r296, %r296, 1; - setp.lt.s32 %p21, %r296, %r63; - @%p21 bra BB1_5; - -BB1_17: - div.rn.f32 %f3080, %f3, %f1; - div.rn.f32 %f3079, %f2, %f1; - mov.f32 %f591, 0f3F000000; - div.rn.f32 %f592, %f591, %f3076; - div.rn.f32 %f40, %f592, %f3076; - mov.f32 %f2993, 0f51BA43B7; - mov.f32 %f2994, %f560; - @%p15 bra BB1_36; - - and.b32 %r16, %r63, 3; - mov.f32 %f2994, 0f00000000; - mov.u32 %r89, 0; - mov.f32 %f2993, 0f51BA43B7; - mov.u32 %r301, %r89; - -BB1_19: - mov.u32 %r302, %r89; - -BB1_20: - cvt.rn.f32.s32 %f597, %r302; - mul.f32 %f598, %f597, %f597; - mul.f32 %f45, %f40, %f598; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f596, 0f00000000; - mov.f32 %f3011, %f596; - mov.f32 %f3012, %f596; - mov.u32 %r303, %r89; - -BB1_21: - sub.s32 %r93, %r303, %r301; - cvt.rn.f32.s32 %f50, %r93; - mul.lo.s32 %r20, %r303, %r63; - setp.eq.s32 %p23, %r16, 0; - @%p23 bra BB1_22; - - setp.eq.s32 %p24, %r16, 1; - @%p24 bra BB1_26; - bra.uni BB1_24; - -BB1_26: - mul.f32 %f613, %f50, %f50; - mul.f32 %f3002, %f40, %f613; - neg.f32 %f614, %f3002; - mul.f32 %f615, %f3002, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f616, %f615; - mov.f32 %f617, 0fBF317200; - fma.rn.f32 %f618, %f616, %f617, %f614; - mov.f32 %f619, 0fB5BFBE8E; - fma.rn.f32 %f620, %f616, %f619, %f618; - mul.f32 %f621, %f620, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f622, %f621; - add.f32 %f623, %f616, 0f00000000; - ex2.approx.f32 %f624, %f623; - mul.f32 %f3001, %f622, %f624; - mov.u32 %r305, 0; - bra.uni BB1_29; - -BB1_22: - mov.f32 %f3005, %f3011; - mov.f32 %f3006, %f3012; - mov.u32 %r307, %r89; - mov.f32 %f3011, %f596; - mov.f32 %f3012, %f596; - bra.uni BB1_30; - -BB1_24: - setp.ne.s32 %p25, %r16, 2; - @%p25 bra BB1_27; - - mul.f32 %f601, %f50, %f50; - mul.f32 %f3002, %f40, %f601; - neg.f32 %f602, %f3002; - mul.f32 %f603, %f3002, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f604, %f603; - mov.f32 %f605, 0fBF317200; - fma.rn.f32 %f606, %f604, %f605, %f602; - mov.f32 %f607, 0fB5BFBE8E; - fma.rn.f32 %f608, %f604, %f607, %f606; - mul.f32 %f609, %f608, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f610, %f609; - add.f32 %f611, %f604, 0f00000000; - ex2.approx.f32 %f612, %f611; - mul.f32 %f3001, %f610, %f612; - mov.u32 %r304, 0; - bra.uni BB1_28; - -BB1_27: - setp.lt.f32 %p26, %f45, 0fC2D20000; - mul.f32 %f625, %f50, %f50; - mul.f32 %f3002, %f40, %f625; - neg.f32 %f626, %f3002; - mul.f32 %f627, %f3002, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f628, %f627; - mov.f32 %f629, 0fBF317200; - fma.rn.f32 %f630, %f628, %f629, %f626; - mov.f32 %f631, 0fB5BFBE8E; - fma.rn.f32 %f632, %f628, %f631, %f630; - mul.f32 %f633, %f632, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f634, %f633; - add.f32 %f635, %f628, 0f00000000; - ex2.approx.f32 %f636, %f635; - mul.f32 %f3001, %f634, %f636; - setp.gt.f32 %p27, %f3002, 0f42D20000; - selp.f32 %f637, 0f00000000, %f3001, %p27; - setp.lt.f32 %p28, %f3002, 0fC2D20000; - selp.f32 %f638, 0f7F800000, %f637, %p28; - cvt.rzi.f32.f32 %f639, %f47; - fma.rn.f32 %f640, %f639, %f629, %f46; - fma.rn.f32 %f641, %f639, %f631, %f640; - mul.f32 %f642, %f641, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f643, %f642; - add.f32 %f644, %f639, 0f00000000; - ex2.approx.f32 %f645, %f644; - mul.f32 %f646, %f643, %f645; - setp.gt.f32 %p29, %f45, 0f42D20000; - selp.f32 %f647, 0f00000000, %f646, %p29; - selp.f32 %f648, 0f7F800000, %f647, %p26; - mul.f32 %f649, %f638, %f648; - add.s32 %r97, %r20, %r4; - mul.wide.s32 %rd62, %r97, 4; - add.s64 %rd63, %rd1, %rd62; - ld.global.f32 %f650, [%rd63]; - fma.rn.f32 %f3012, %f650, %f649, %f3012; - add.f32 %f3011, %f3011, %f649; - mov.u32 %r304, 1; - -BB1_28: - sub.s32 %r98, %r302, %r304; - cvt.rn.f32.s32 %f651, %r98; - mul.f32 %f652, %f651, %f651; - setp.gt.f32 %p30, %f3002, 0f42D20000; - selp.f32 %f653, 0f00000000, %f3001, %p30; - setp.lt.f32 %p31, %f3002, 0fC2D20000; - selp.f32 %f654, 0f7F800000, %f653, %p31; - mul.f32 %f655, %f40, %f652; - neg.f32 %f656, %f655; - mul.f32 %f657, %f655, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f658, %f657; - mov.f32 %f659, 0fBF317200; - fma.rn.f32 %f660, %f658, %f659, %f656; - mov.f32 %f661, 0fB5BFBE8E; - fma.rn.f32 %f662, %f658, %f661, %f660; - mul.f32 %f663, %f662, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f664, %f663; - add.f32 %f665, %f658, 0f00000000; - ex2.approx.f32 %f666, %f665; - mul.f32 %f667, %f664, %f666; - setp.gt.f32 %p32, %f655, 0f42D20000; - selp.f32 %f668, 0f00000000, %f667, %p32; - setp.lt.f32 %p33, %f655, 0fC2D20000; - selp.f32 %f669, 0f7F800000, %f668, %p33; - mul.f32 %f670, %f654, %f669; - add.s32 %r99, %r304, %r20; - add.s32 %r100, %r99, %r4; - mul.wide.s32 %rd64, %r100, 4; - add.s64 %rd65, %rd1, %rd64; - ld.global.f32 %f671, [%rd65]; - fma.rn.f32 %f3012, %f671, %f670, %f3012; - add.f32 %f3011, %f3011, %f670; - add.s32 %r305, %r304, 1; - -BB1_29: - sub.s32 %r101, %r302, %r305; - cvt.rn.f32.s32 %f672, %r101; - mul.f32 %f673, %f672, %f672; - setp.gt.f32 %p34, %f3002, 0f42D20000; - selp.f32 %f674, 0f00000000, %f3001, %p34; - setp.lt.f32 %p35, %f3002, 0fC2D20000; - selp.f32 %f675, 0f7F800000, %f674, %p35; - mul.f32 %f676, %f40, %f673; - neg.f32 %f677, %f676; - mul.f32 %f678, %f676, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f679, %f678; - mov.f32 %f680, 0fBF317200; - fma.rn.f32 %f681, %f679, %f680, %f677; - mov.f32 %f682, 0fB5BFBE8E; - fma.rn.f32 %f683, %f679, %f682, %f681; - mul.f32 %f684, %f683, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f685, %f684; - add.f32 %f686, %f679, 0f00000000; - ex2.approx.f32 %f687, %f686; - mul.f32 %f688, %f685, %f687; - setp.gt.f32 %p36, %f676, 0f42D20000; - selp.f32 %f689, 0f00000000, %f688, %p36; - setp.lt.f32 %p37, %f676, 0fC2D20000; - selp.f32 %f690, 0f7F800000, %f689, %p37; - mul.f32 %f691, %f675, %f690; - add.s32 %r102, %r305, %r20; - add.s32 %r103, %r102, %r4; - mul.wide.s32 %rd66, %r103, 4; - add.s64 %rd67, %rd1, %rd66; - ld.global.f32 %f692, [%rd67]; - fma.rn.f32 %f3006, %f692, %f691, %f3012; - add.f32 %f3005, %f3011, %f691; - add.s32 %r307, %r305, 1; - mov.f32 %f3011, %f3005; - mov.f32 %f3012, %f3006; - -BB1_30: - setp.lt.u32 %p38, %r63, 4; - @%p38 bra BB1_33; - - mul.f32 %f693, %f50, %f50; - mul.f32 %f694, %f40, %f693; - neg.f32 %f695, %f694; - mul.f32 %f696, %f694, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f697, %f696; - mov.f32 %f698, 0fBF317200; - fma.rn.f32 %f699, %f697, %f698, %f695; - mov.f32 %f700, 0fB5BFBE8E; - fma.rn.f32 %f701, %f697, %f700, %f699; - mul.f32 %f702, %f701, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f703, %f702; - add.f32 %f704, %f697, 0f00000000; - ex2.approx.f32 %f705, %f704; - mul.f32 %f706, %f703, %f705; - setp.gt.f32 %p39, %f694, 0f42D20000; - selp.f32 %f707, 0f00000000, %f706, %p39; - setp.lt.f32 %p40, %f694, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f707, %p40; - mov.f32 %f3011, %f3005; - mov.f32 %f3012, %f3006; - -BB1_32: - sub.s32 %r104, %r302, %r307; - cvt.rn.f32.s32 %f708, %r104; - mul.f32 %f709, %f708, %f708; - mul.f32 %f710, %f40, %f709; - neg.f32 %f711, %f710; - mul.f32 %f712, %f710, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f713, %f712; - fma.rn.f32 %f715, %f713, %f698, %f711; - fma.rn.f32 %f717, %f713, %f700, %f715; - mul.f32 %f718, %f717, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f719, %f718; - add.f32 %f720, %f713, 0f00000000; - ex2.approx.f32 %f721, %f720; - mul.f32 %f722, %f719, %f721; - setp.gt.f32 %p41, %f710, 0f42D20000; - selp.f32 %f723, 0f00000000, %f722, %p41; - setp.lt.f32 %p42, %f710, 0fC2D20000; - selp.f32 %f724, 0f7F800000, %f723, %p42; - mul.f32 %f725, %f75, %f724; - add.s32 %r105, %r307, %r20; - add.s32 %r106, %r105, %r4; - mul.wide.s32 %rd68, %r106, 4; - add.s64 %rd69, %rd1, %rd68; - ld.global.f32 %f726, [%rd69]; - fma.rn.f32 %f727, %f726, %f725, %f3012; - add.f32 %f728, %f3011, %f725; - add.s32 %r107, %r307, 1; - sub.s32 %r108, %r302, %r107; - cvt.rn.f32.s32 %f729, %r108; - mul.f32 %f730, %f729, %f729; - mul.f32 %f731, %f40, %f730; - neg.f32 %f732, %f731; - mul.f32 %f733, %f731, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f734, %f733; - fma.rn.f32 %f735, %f734, %f698, %f732; - fma.rn.f32 %f736, %f734, %f700, %f735; - mul.f32 %f737, %f736, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f738, %f737; - add.f32 %f739, %f734, 0f00000000; - ex2.approx.f32 %f740, %f739; - mul.f32 %f741, %f738, %f740; - setp.gt.f32 %p43, %f731, 0f42D20000; - selp.f32 %f742, 0f00000000, %f741, %p43; - setp.lt.f32 %p44, %f731, 0fC2D20000; - selp.f32 %f743, 0f7F800000, %f742, %p44; - mul.f32 %f744, %f75, %f743; - ld.global.f32 %f745, [%rd69+4]; - fma.rn.f32 %f746, %f745, %f744, %f727; - add.f32 %f747, %f728, %f744; - add.s32 %r109, %r307, 2; - sub.s32 %r110, %r302, %r109; - cvt.rn.f32.s32 %f748, %r110; - mul.f32 %f749, %f748, %f748; - mul.f32 %f750, %f40, %f749; - neg.f32 %f751, %f750; - mul.f32 %f752, %f750, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f753, %f752; - fma.rn.f32 %f754, %f753, %f698, %f751; - fma.rn.f32 %f755, %f753, %f700, %f754; - mul.f32 %f756, %f755, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f757, %f756; - add.f32 %f758, %f753, 0f00000000; - ex2.approx.f32 %f759, %f758; - mul.f32 %f760, %f757, %f759; - setp.gt.f32 %p45, %f750, 0f42D20000; - selp.f32 %f761, 0f00000000, %f760, %p45; - setp.lt.f32 %p46, %f750, 0fC2D20000; - selp.f32 %f762, 0f7F800000, %f761, %p46; - mul.f32 %f763, %f75, %f762; - ld.global.f32 %f764, [%rd69+8]; - fma.rn.f32 %f765, %f764, %f763, %f746; - add.f32 %f766, %f747, %f763; - add.s32 %r111, %r307, 3; - sub.s32 %r112, %r302, %r111; - cvt.rn.f32.s32 %f767, %r112; - mul.f32 %f768, %f767, %f767; - mul.f32 %f769, %f40, %f768; - neg.f32 %f770, %f769; - mul.f32 %f771, %f769, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f772, %f771; - fma.rn.f32 %f773, %f772, %f698, %f770; - fma.rn.f32 %f774, %f772, %f700, %f773; - mul.f32 %f775, %f774, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f776, %f775; - add.f32 %f777, %f772, 0f00000000; - ex2.approx.f32 %f778, %f777; - mul.f32 %f779, %f776, %f778; - setp.gt.f32 %p47, %f769, 0f42D20000; - selp.f32 %f780, 0f00000000, %f779, %p47; - setp.lt.f32 %p48, %f769, 0fC2D20000; - selp.f32 %f781, 0f7F800000, %f780, %p48; - mul.f32 %f782, %f75, %f781; - ld.global.f32 %f783, [%rd69+12]; - fma.rn.f32 %f3012, %f783, %f782, %f765; - add.f32 %f3011, %f766, %f782; - add.s32 %r307, %r307, 4; - setp.lt.s32 %p49, %r307, %r63; - @%p49 bra BB1_32; - -BB1_33: - add.s32 %r303, %r303, 1; - setp.lt.s32 %p50, %r303, %r63; - @%p50 bra BB1_21; - - div.rn.f32 %f784, %f3012, %f3011; - max.f32 %f2994, %f2994, %f784; - min.f32 %f2993, %f2993, %f784; - add.s32 %r302, %r302, 1; - setp.lt.s32 %p51, %r302, %r63; - @%p51 bra BB1_20; - - add.s32 %r301, %r301, 1; - setp.lt.s32 %p52, %r301, %r63; - @%p52 bra BB1_19; - -BB1_36: - sub.f32 %f785, %f2994, %f2993; - add.f32 %f786, %f785, %f785; - fma.rn.f32 %f787, %f785, 0f40000000, %f786; - mul.f32 %f788, %f787, 0f40490FD8; - mul.f32 %f789, %f788, %f3076; - mul.f32 %f790, %f789, %f3076; - max.f32 %f3078, %f560, %f790; - setp.lt.s32 %p53, %r64, 1; - @%p53 bra BB1_120; - - cvt.rn.f32.s32 %f792, %r63; - mul.f32 %f87, %f792, 0f3F000000; - mov.u32 %r308, 0; - -BB1_38: - mov.f32 %f3030, 0f00000000; - mov.f32 %f3031, %f3030; - mov.f32 %f3032, %f3030; - mov.f32 %f3033, %f3030; - mov.f32 %f3034, %f3030; - mov.f32 %f3035, %f3030; - mov.f32 %f3036, %f3030; - mov.f32 %f3037, %f3030; - mov.f32 %f3038, %f3030; - mov.f32 %f3039, %f3030; - @%p15 bra BB1_119; - - div.rn.f32 %f814, %f591, %f3076; - div.rn.f32 %f93, %f814, %f3076; - neg.f32 %f815, %f3078; - div.rn.f32 %f816, %f815, 0f40206C98; - div.rn.f32 %f94, %f816, %f3076; - div.rn.f32 %f95, %f94, %f3076; - mov.f32 %f817, 0fC0000000; - div.rn.f32 %f96, %f817, %f3076; - mul.f32 %f818, %f3076, %f3076; - mul.f32 %f819, %f3076, %f818; - div.rn.f32 %f97, %f816, %f819; - mul.f32 %f820, %f818, %f818; - mul.f32 %f821, %f3076, %f820; - div.rn.f32 %f822, %f3078, 0f40206C98; - div.rn.f32 %f98, %f822, %f821; - mov.u32 %r309, 0; - mov.f32 %f3030, 0f00000000; - mov.f32 %f3031, %f3030; - mov.f32 %f3032, %f3030; - mov.f32 %f3033, %f3030; - mov.f32 %f3034, %f3030; - mov.f32 %f3035, %f3030; - mov.f32 %f3036, %f3030; - mov.f32 %f3037, %f3030; - mov.f32 %f3038, %f3030; - mov.f32 %f3039, %f3030; - -BB1_40: - mov.u32 %r310, 0; - cvt.rn.f32.s32 %f823, %r309; - sub.f32 %f109, %f823, %f3080; - add.f32 %f110, %f109, 0f3F800000; - sqrt.rn.f32 %f111, %f93; - mul.f32 %f112, %f110, %f111; - abs.f32 %f113, %f112; - mul.f32 %f114, %f112, %f112; - mul.f32 %f115, %f109, %f111; - abs.f32 %f116, %f115; - add.f32 %f824, %f823, 0f3F800000; - sub.f32 %f118, %f824, %f3080; - div.rn.f32 %f119, %f118, %f3076; - mov.f32 %f825, 0f3F800000; - cvt.rzi.f32.f32 %f826, %f825; - add.f32 %f827, %f826, %f826; - mov.f32 %f828, 0f40000000; - sub.f32 %f829, %f828, %f827; - abs.f32 %f120, %f829; - setp.eq.f32 %p55, %f120, 0f3F800000; - abs.f32 %f121, %f119; - setp.lt.f32 %p56, %f121, 0f00800000; - mul.f32 %f830, %f121, 0f4B800000; - selp.f32 %f831, 0fC3170000, 0fC2FE0000, %p56; - selp.f32 %f832, %f830, %f121, %p56; - mov.b32 %r116, %f832; - and.b32 %r117, %r116, 8388607; - or.b32 %r118, %r117, 1065353216; - mov.b32 %f833, %r118; - shr.u32 %r119, %r116, 23; - cvt.rn.f32.u32 %f834, %r119; - add.f32 %f835, %f831, %f834; - setp.gt.f32 %p57, %f833, 0f3FB504F3; - mul.f32 %f836, %f833, 0f3F000000; - add.f32 %f837, %f835, 0f3F800000; - selp.f32 %f838, %f836, %f833, %p57; - selp.f32 %f839, %f837, %f835, %p57; - add.f32 %f122, %f838, 0fBF800000; - add.f32 %f123, %f838, 0f3F800000; - add.f32 %f124, %f122, %f122; - mov.f32 %f840, 0f3F317200; - mul.rn.f32 %f125, %f839, %f840; - mov.f32 %f841, 0f35BFBE8E; - mul.rn.f32 %f126, %f839, %f841; - setp.lt.f32 %p58, %f119, 0f00000000; - and.pred %p1, %p58, %p55; - add.f32 %f842, %f119, %f119; - selp.f32 %f127, %f842, 0f00000000, %p55; - div.rn.f32 %f128, %f109, %f3076; - abs.f32 %f129, %f128; - setp.lt.f32 %p59, %f129, 0f00800000; - mul.f32 %f844, %f129, 0f4B800000; - selp.f32 %f845, 0fC3170000, 0fC2FE0000, %p59; - selp.f32 %f846, %f844, %f129, %p59; - mov.b32 %r120, %f846; - and.b32 %r121, %r120, 8388607; - or.b32 %r122, %r121, 1065353216; - mov.b32 %f847, %r122; - shr.u32 %r123, %r120, 23; - cvt.rn.f32.u32 %f848, %r123; - add.f32 %f849, %f845, %f848; - setp.gt.f32 %p60, %f847, 0f3FB504F3; - mul.f32 %f850, %f847, 0f3F000000; - add.f32 %f851, %f849, 0f3F800000; - selp.f32 %f852, %f850, %f847, %p60; - selp.f32 %f853, %f851, %f849, %p60; - add.f32 %f130, %f852, 0fBF800000; - add.f32 %f131, %f852, 0f3F800000; - add.f32 %f132, %f130, %f130; - mul.rn.f32 %f133, %f853, %f840; - mul.rn.f32 %f134, %f853, %f841; - setp.lt.f32 %p61, %f128, 0f00000000; - and.pred %p2, %p61, %p55; - add.f32 %f854, %f128, %f128; - selp.f32 %f135, %f854, 0f00000000, %p55; - mul.f32 %f856, %f110, %f110; - mul.f32 %f136, %f110, %f856; - -BB1_41: - setp.ltu.f32 %p62, %f113, 0f3F800000; - @%p62 bra BB1_43; - bra.uni BB1_42; - -BB1_43: - mov.f32 %f876, 0f3BA0C9F8; - mov.f32 %f877, 0fBA1268FB; - fma.rn.f32 %f878, %f877, %f114, %f876; - mov.f32 %f879, 0fBCDABFD4; - fma.rn.f32 %f880, %f878, %f114, %f879; - mov.f32 %f881, 0f3DE70331; - fma.rn.f32 %f882, %f880, %f114, %f881; - mov.f32 %f883, 0fBEC09330; - fma.rn.f32 %f884, %f882, %f114, %f883; - mov.f32 %f885, 0f3F906EBA; - fma.rn.f32 %f886, %f884, %f114, %f885; - mul.f32 %f3040, %f112, %f886; - bra.uni BB1_44; - -BB1_42: - mov.f32 %f2803, 0f3F800000; - setp.ltu.f32 %p63, %f113, 0f407AD445; - mov.f32 %f858, 0f3A03BB71; - mov.f32 %f859, 0fB7B730FB; - fma.rn.f32 %f860, %f859, %f113, %f858; - mov.f32 %f861, 0fBBACA3B3; - fma.rn.f32 %f862, %f860, %f113, %f861; - mov.f32 %f863, 0f3D0A7445; - fma.rn.f32 %f864, %f862, %f113, %f863; - mov.f32 %f865, 0fBE1B3B75; - fma.rn.f32 %f866, %f864, %f113, %f865; - mov.f32 %f867, 0fBF6B385A; - fma.rn.f32 %f868, %f866, %f113, %f867; - mov.f32 %f869, 0fBFD0316E; - fma.rn.f32 %f870, %f868, %f113, %f869; - mov.f32 %f871, 0fBA031CCE; - fma.rn.f32 %f872, %f870, %f113, %f871; - ex2.approx.ftz.f32 %f873, %f872; - sub.f32 %f875, %f2803, %f873; - mov.b32 %r124, %f875; - selp.b32 %r125, %r124, 1065353216, %p63; - mov.b32 %r126, %f112; - and.b32 %r127, %r126, -2147483648; - or.b32 %r128, %r125, %r127; - mov.b32 %f3040, %r128; - -BB1_44: - setp.ltu.f32 %p64, %f116, 0f3F800000; - @%p64 bra BB1_46; - bra.uni BB1_45; - -BB1_46: - cvt.rn.f32.s32 %f2844, %r309; - sub.f32 %f2843, %f2844, %f3080; - mul.f32 %f2842, %f2843, %f111; - mul.f32 %f2841, %f2842, %f2842; - mov.f32 %f905, 0f3BA0C9F8; - mov.f32 %f906, 0fBA1268FB; - fma.rn.f32 %f907, %f906, %f2841, %f905; - mov.f32 %f908, 0fBCDABFD4; - fma.rn.f32 %f909, %f907, %f2841, %f908; - mov.f32 %f910, 0f3DE70331; - fma.rn.f32 %f911, %f909, %f2841, %f910; - mov.f32 %f912, 0fBEC09330; - fma.rn.f32 %f913, %f911, %f2841, %f912; - mov.f32 %f914, 0f3F906EBA; - fma.rn.f32 %f915, %f913, %f2841, %f914; - mul.f32 %f3041, %f2842, %f915; - bra.uni BB1_47; - -BB1_45: - cvt.rn.f32.s32 %f2857, %r309; - sub.f32 %f2856, %f2857, %f3080; - mul.f32 %f2855, %f2856, %f111; - mov.f32 %f2804, 0f3F800000; - setp.ltu.f32 %p65, %f116, 0f407AD445; - mov.f32 %f887, 0f3A03BB71; - mov.f32 %f888, 0fB7B730FB; - fma.rn.f32 %f889, %f888, %f116, %f887; - mov.f32 %f890, 0fBBACA3B3; - fma.rn.f32 %f891, %f889, %f116, %f890; - mov.f32 %f892, 0f3D0A7445; - fma.rn.f32 %f893, %f891, %f116, %f892; - mov.f32 %f894, 0fBE1B3B75; - fma.rn.f32 %f895, %f893, %f116, %f894; - mov.f32 %f896, 0fBF6B385A; - fma.rn.f32 %f897, %f895, %f116, %f896; - mov.f32 %f898, 0fBFD0316E; - fma.rn.f32 %f899, %f897, %f116, %f898; - mov.f32 %f900, 0fBA031CCE; - fma.rn.f32 %f901, %f899, %f116, %f900; - ex2.approx.ftz.f32 %f902, %f901; - sub.f32 %f904, %f2804, %f902; - mov.b32 %r129, %f904; - selp.b32 %r130, %r129, 1065353216, %p65; - mov.b32 %r131, %f2855; - and.b32 %r132, %r131, -2147483648; - or.b32 %r133, %r130, %r132; - mov.b32 %f3041, %r133; - -BB1_47: - sub.f32 %f916, %f3040, %f3041; - mul.f32 %f154, %f916, 0f3F000000; - cvt.rn.f32.s32 %f155, %r310; - sub.f32 %f156, %f155, %f3079; - add.f32 %f157, %f156, 0f3F800000; - mul.f32 %f158, %f157, %f111; - abs.f32 %f159, %f158; - setp.ltu.f32 %p66, %f159, 0f3F800000; - @%p66 bra BB1_49; - bra.uni BB1_48; - -BB1_49: - mul.f32 %f935, %f158, %f158; - mov.f32 %f936, 0f3BA0C9F8; - mov.f32 %f937, 0fBA1268FB; - fma.rn.f32 %f938, %f937, %f935, %f936; - mov.f32 %f939, 0fBCDABFD4; - fma.rn.f32 %f940, %f938, %f935, %f939; - mov.f32 %f941, 0f3DE70331; - fma.rn.f32 %f942, %f940, %f935, %f941; - mov.f32 %f943, 0fBEC09330; - fma.rn.f32 %f944, %f942, %f935, %f943; - mov.f32 %f945, 0f3F906EBA; - fma.rn.f32 %f946, %f944, %f935, %f945; - mul.f32 %f3042, %f158, %f946; - bra.uni BB1_50; - -BB1_48: - mov.f32 %f2805, 0f3F800000; - mov.f32 %f917, 0f3A03BB71; - mov.f32 %f918, 0fB7B730FB; - fma.rn.f32 %f919, %f918, %f159, %f917; - mov.f32 %f920, 0fBBACA3B3; - fma.rn.f32 %f921, %f919, %f159, %f920; - mov.f32 %f922, 0f3D0A7445; - fma.rn.f32 %f923, %f921, %f159, %f922; - mov.f32 %f924, 0fBE1B3B75; - fma.rn.f32 %f925, %f923, %f159, %f924; - mov.f32 %f926, 0fBF6B385A; - fma.rn.f32 %f927, %f925, %f159, %f926; - mov.f32 %f928, 0fBFD0316E; - fma.rn.f32 %f929, %f927, %f159, %f928; - mov.f32 %f930, 0fBA031CCE; - fma.rn.f32 %f931, %f929, %f159, %f930; - ex2.approx.ftz.f32 %f932, %f931; - sub.f32 %f934, %f2805, %f932; - mov.b32 %r134, %f934; - setp.ltu.f32 %p67, %f159, 0f407AD445; - selp.b32 %r135, %r134, 1065353216, %p67; - mov.b32 %r136, %f158; - and.b32 %r137, %r136, -2147483648; - or.b32 %r138, %r135, %r137; - mov.b32 %f3042, %r138; - -BB1_50: - cvt.rn.f32.s32 %f2859, %r310; - sub.f32 %f2858, %f2859, %f3079; - mul.f32 %f163, %f2858, %f111; - abs.f32 %f164, %f163; - setp.ltu.f32 %p68, %f164, 0f3F800000; - @%p68 bra BB1_52; - bra.uni BB1_51; - -BB1_52: - mul.f32 %f965, %f163, %f163; - mov.f32 %f966, 0f3BA0C9F8; - mov.f32 %f967, 0fBA1268FB; - fma.rn.f32 %f968, %f967, %f965, %f966; - mov.f32 %f969, 0fBCDABFD4; - fma.rn.f32 %f970, %f968, %f965, %f969; - mov.f32 %f971, 0f3DE70331; - fma.rn.f32 %f972, %f970, %f965, %f971; - mov.f32 %f973, 0fBEC09330; - fma.rn.f32 %f974, %f972, %f965, %f973; - mov.f32 %f975, 0f3F906EBA; - fma.rn.f32 %f976, %f974, %f965, %f975; - mul.f32 %f3043, %f163, %f976; - bra.uni BB1_53; - -BB1_51: - mov.f32 %f2806, 0f3F800000; - mov.f32 %f947, 0f3A03BB71; - mov.f32 %f948, 0fB7B730FB; - fma.rn.f32 %f949, %f948, %f164, %f947; - mov.f32 %f950, 0fBBACA3B3; - fma.rn.f32 %f951, %f949, %f164, %f950; - mov.f32 %f952, 0f3D0A7445; - fma.rn.f32 %f953, %f951, %f164, %f952; - mov.f32 %f954, 0fBE1B3B75; - fma.rn.f32 %f955, %f953, %f164, %f954; - mov.f32 %f956, 0fBF6B385A; - fma.rn.f32 %f957, %f955, %f164, %f956; - mov.f32 %f958, 0fBFD0316E; - fma.rn.f32 %f959, %f957, %f164, %f958; - mov.f32 %f960, 0fBA031CCE; - fma.rn.f32 %f961, %f959, %f164, %f960; - ex2.approx.ftz.f32 %f962, %f961; - sub.f32 %f964, %f2806, %f962; - mov.b32 %r139, %f964; - setp.ltu.f32 %p69, %f164, 0f407AD445; - selp.b32 %r140, %r139, 1065353216, %p69; - mov.b32 %r141, %f163; - and.b32 %r142, %r141, -2147483648; - or.b32 %r143, %r140, %r142; - mov.b32 %f3043, %r143; - -BB1_53: - sub.f32 %f979, %f3042, %f3043; - mul.f32 %f168, %f979, 0f3F000000; - mul.f32 %f980, %f154, %f3078; - fma.rn.f32 %f169, %f168, %f980, %f2993; - mad.lo.s32 %r144, %r310, %r63, %r309; - add.s32 %r145, %r144, %r4; - mul.wide.s32 %rd71, %r145, 4; - add.s64 %rd72, %rd1, %rd71; - ld.global.f32 %f170, [%rd72]; - // inline asm - rcp.approx.ftz.f32 %f977,%f123; - // inline asm - mul.f32 %f981, %f977, %f124; - mul.f32 %f982, %f981, %f981; - mov.f32 %f983, 0f3C4CAF63; - mov.f32 %f984, 0f3B18F0FE; - fma.rn.f32 %f985, %f984, %f982, %f983; - mov.f32 %f986, 0f3DAAAABD; - fma.rn.f32 %f987, %f985, %f982, %f986; - mul.rn.f32 %f988, %f987, %f982; - mul.rn.f32 %f989, %f988, %f981; - sub.f32 %f990, %f122, %f981; - neg.f32 %f991, %f981; - add.f32 %f992, %f990, %f990; - fma.rn.f32 %f993, %f991, %f122, %f992; - mul.rn.f32 %f994, %f977, %f993; - add.f32 %f995, %f989, %f981; - sub.f32 %f996, %f981, %f995; - add.f32 %f997, %f989, %f996; - add.f32 %f998, %f994, %f997; - add.f32 %f999, %f995, %f998; - sub.f32 %f1000, %f995, %f999; - add.f32 %f1001, %f998, %f1000; - add.f32 %f1002, %f125, %f999; - sub.f32 %f1003, %f125, %f1002; - add.f32 %f1004, %f999, %f1003; - add.f32 %f1005, %f1001, %f1004; - add.f32 %f1006, %f126, %f1005; - add.f32 %f1007, %f1002, %f1006; - sub.f32 %f1008, %f1002, %f1007; - add.f32 %f1009, %f1006, %f1008; - mul.rn.f32 %f1011, %f828, %f1007; - neg.f32 %f1012, %f1011; - fma.rn.f32 %f1013, %f828, %f1007, %f1012; - fma.rn.f32 %f1014, %f828, %f1009, %f1013; - mov.f32 %f1015, 0f00000000; - fma.rn.f32 %f1016, %f1015, %f1007, %f1014; - add.rn.f32 %f1017, %f1011, %f1016; - neg.f32 %f1018, %f1017; - add.rn.f32 %f1019, %f1011, %f1018; - add.rn.f32 %f1020, %f1019, %f1016; - mov.b32 %r146, %f1017; - setp.eq.s32 %p70, %r146, 1118925336; - add.s32 %r147, %r146, -1; - mov.b32 %f1021, %r147; - add.f32 %f1022, %f1020, 0f37000000; - selp.f32 %f1023, %f1021, %f1017, %p70; - selp.f32 %f171, %f1022, %f1020, %p70; - mul.f32 %f1024, %f1023, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1025, %f1024; - mov.f32 %f1026, 0fBF317200; - fma.rn.f32 %f1027, %f1025, %f1026, %f1023; - mov.f32 %f1028, 0fB5BFBE8E; - fma.rn.f32 %f1029, %f1025, %f1028, %f1027; - mul.f32 %f1030, %f1029, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1031, %f1030; - add.f32 %f1032, %f1025, 0f00000000; - ex2.approx.f32 %f1033, %f1032; - mul.f32 %f1034, %f1031, %f1033; - setp.lt.f32 %p71, %f1023, 0fC2D20000; - selp.f32 %f1035, 0f00000000, %f1034, %p71; - setp.gt.f32 %p72, %f1023, 0f42D20000; - selp.f32 %f3044, 0f7F800000, %f1035, %p72; - setp.eq.f32 %p73, %f3044, 0f7F800000; - @%p73 bra BB1_55; - - fma.rn.f32 %f3044, %f3044, %f171, %f3044; - -BB1_55: - setp.geu.f32 %p346, %f119, 0f00000000; - mov.b32 %r148, %f3044; - xor.b32 %r149, %r148, -2147483648; - mov.b32 %f1036, %r149; - selp.f32 %f175, %f1036, %f3044, %p1; - setp.eq.f32 %p74, %f119, 0f00000000; - selp.f32 %f3045, %f127, %f175, %p74; - @%p346 bra BB1_57; - - cvt.rzi.f32.f32 %f1038, %f828; - setp.neu.f32 %p75, %f1038, 0f40000000; - selp.f32 %f3045, 0f7FFFFFFF, %f175, %p75; - -BB1_57: - abs.f32 %f2814, %f119; - add.f32 %f2813, %f2814, 0f40000000; - mov.b32 %r284, %f2813; - mov.f32 %f2812, 0f00000000; - mov.f32 %f2811, 0f3DAAAABD; - mov.f32 %f2810, 0f3C4CAF63; - mov.f32 %f2809, 0f3B18F0FE; - mov.f32 %f2808, 0fB5BFBE8E; - mov.f32 %f2807, 0fBF317200; - add.f32 %f1041, %f119, 0f40000000; - setp.gtu.f32 %p76, %f2814, 0f7F800000; - selp.f32 %f1042, %f1041, %f3045, %p76; - selp.f32 %f1043, 0fFF800000, 0f7F800000, %p1; - setp.neu.f32 %p77, %f2814, 0f7F800000; - selp.f32 %f1044, %f1042, %f1043, %p77; - setp.gt.s32 %p78, %r284, 2139095039; - selp.f32 %f1045, %f1044, %f3045, %p78; - mul.f32 %f1046, %f1045, 0fBF000000; - setp.eq.f32 %p79, %f119, 0f3F800000; - selp.f32 %f1047, 0fBF000000, %f1046, %p79; - mul.f32 %f1048, %f1047, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1049, %f1048; - fma.rn.f32 %f1051, %f1049, %f2807, %f1047; - fma.rn.f32 %f1053, %f1049, %f2808, %f1051; - mul.f32 %f1054, %f1053, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1055, %f1054; - add.f32 %f1056, %f1049, 0f00000000; - ex2.approx.f32 %f1057, %f1056; - mul.f32 %f1058, %f1055, %f1057; - setp.lt.f32 %p80, %f1047, 0fC2D20000; - selp.f32 %f1059, 0f00000000, %f1058, %p80; - setp.gt.f32 %p81, %f1047, 0f42D20000; - selp.f32 %f179, 0f7F800000, %f1059, %p81; - // inline asm - rcp.approx.ftz.f32 %f1039,%f131; - // inline asm - mul.f32 %f1060, %f1039, %f132; - mul.f32 %f1061, %f1060, %f1060; - fma.rn.f32 %f1064, %f2809, %f1061, %f2810; - fma.rn.f32 %f1066, %f1064, %f1061, %f2811; - mul.rn.f32 %f1067, %f1066, %f1061; - mul.rn.f32 %f1068, %f1067, %f1060; - sub.f32 %f1069, %f130, %f1060; - neg.f32 %f1070, %f1060; - add.f32 %f1071, %f1069, %f1069; - fma.rn.f32 %f1072, %f1070, %f130, %f1071; - mul.rn.f32 %f1073, %f1039, %f1072; - add.f32 %f1074, %f1068, %f1060; - sub.f32 %f1075, %f1060, %f1074; - add.f32 %f1076, %f1068, %f1075; - add.f32 %f1077, %f1073, %f1076; - add.f32 %f1078, %f1074, %f1077; - sub.f32 %f1079, %f1074, %f1078; - add.f32 %f1080, %f1077, %f1079; - add.f32 %f1081, %f133, %f1078; - sub.f32 %f1082, %f133, %f1081; - add.f32 %f1083, %f1078, %f1082; - add.f32 %f1084, %f1080, %f1083; - add.f32 %f1085, %f134, %f1084; - add.f32 %f1086, %f1081, %f1085; - sub.f32 %f1087, %f1081, %f1086; - add.f32 %f1088, %f1085, %f1087; - mul.rn.f32 %f1090, %f828, %f1086; - neg.f32 %f1091, %f1090; - fma.rn.f32 %f1092, %f828, %f1086, %f1091; - fma.rn.f32 %f1093, %f828, %f1088, %f1092; - fma.rn.f32 %f1095, %f2812, %f1086, %f1093; - add.rn.f32 %f1096, %f1090, %f1095; - neg.f32 %f1097, %f1096; - add.rn.f32 %f1098, %f1090, %f1097; - add.rn.f32 %f1099, %f1098, %f1095; - mov.b32 %r150, %f1096; - setp.eq.s32 %p82, %r150, 1118925336; - add.s32 %r151, %r150, -1; - mov.b32 %f1100, %r151; - add.f32 %f1101, %f1099, 0f37000000; - selp.f32 %f1102, %f1100, %f1096, %p82; - selp.f32 %f180, %f1101, %f1099, %p82; - mul.f32 %f1103, %f1102, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1104, %f1103; - fma.rn.f32 %f1105, %f1104, %f2807, %f1102; - fma.rn.f32 %f1106, %f1104, %f2808, %f1105; - mul.f32 %f1107, %f1106, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1108, %f1107; - add.f32 %f1109, %f1104, 0f00000000; - ex2.approx.f32 %f1110, %f1109; - mul.f32 %f1111, %f1108, %f1110; - setp.lt.f32 %p83, %f1102, 0fC2D20000; - selp.f32 %f1112, 0f00000000, %f1111, %p83; - setp.gt.f32 %p84, %f1102, 0f42D20000; - selp.f32 %f3046, 0f7F800000, %f1112, %p84; - setp.eq.f32 %p85, %f3046, 0f7F800000; - @%p85 bra BB1_59; - - fma.rn.f32 %f3046, %f3046, %f180, %f3046; - -BB1_59: - setp.geu.f32 %p347, %f128, 0f00000000; - mov.b32 %r152, %f3046; - xor.b32 %r153, %r152, -2147483648; - mov.b32 %f1113, %r153; - selp.f32 %f184, %f1113, %f3046, %p2; - setp.eq.f32 %p86, %f128, 0f00000000; - selp.f32 %f3047, %f135, %f184, %p86; - @%p347 bra BB1_61; - - cvt.rzi.f32.f32 %f1115, %f828; - setp.neu.f32 %p87, %f1115, 0f40000000; - selp.f32 %f3047, 0f7FFFFFFF, %f184, %p87; - -BB1_61: - abs.f32 %f2829, %f128; - add.f32 %f2828, %f2829, 0f40000000; - mov.b32 %r285, %f2828; - cvt.rn.f32.s32 %f2827, %r309; - cvt.rn.f32.s32 %f2826, %r310; - mov.f32 %f2825, 0f35BFBE8E; - mov.f32 %f2824, 0f3F317200; - add.f32 %f2823, %f2827, 0f3F800000; - sub.f32 %f2822, %f2823, %f3080; - sub.f32 %f2821, %f2827, %f3080; - mov.f32 %f2820, 0f00000000; - mov.f32 %f2819, 0f3DAAAABD; - mov.f32 %f2818, 0f3C4CAF63; - mov.f32 %f2817, 0f3B18F0FE; - mov.f32 %f2816, 0fB5BFBE8E; - mov.f32 %f2815, 0fBF317200; - add.f32 %f1118, %f128, 0f40000000; - setp.gtu.f32 %p88, %f2829, 0f7F800000; - selp.f32 %f1119, %f1118, %f3047, %p88; - selp.f32 %f1120, 0fFF800000, 0f7F800000, %p2; - setp.neu.f32 %p89, %f2829, 0f7F800000; - selp.f32 %f1121, %f1119, %f1120, %p89; - setp.gt.s32 %p90, %r285, 2139095039; - selp.f32 %f1122, %f1121, %f3047, %p90; - mul.f32 %f1123, %f1122, 0fBF000000; - setp.eq.f32 %p91, %f128, 0f3F800000; - selp.f32 %f1124, 0fBF000000, %f1123, %p91; - mul.f32 %f1125, %f1124, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1126, %f1125; - fma.rn.f32 %f1128, %f1126, %f2815, %f1124; - fma.rn.f32 %f1130, %f1126, %f2816, %f1128; - mul.f32 %f1131, %f1130, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1132, %f1131; - add.f32 %f1133, %f1126, 0f00000000; - ex2.approx.f32 %f1134, %f1133; - mul.f32 %f1135, %f1132, %f1134; - setp.lt.f32 %p92, %f1124, 0fC2D20000; - selp.f32 %f1136, 0f00000000, %f1135, %p92; - setp.gt.f32 %p93, %f1124, 0f42D20000; - selp.f32 %f1137, 0f7F800000, %f1136, %p93; - sub.f32 %f1138, %f179, %f1137; - mul.f32 %f1139, %f94, %f1138; - mul.f32 %f188, %f168, %f1139; - mul.f32 %f1140, %f2821, %f1137; - mul.f32 %f1141, %f2822, %f179; - sub.f32 %f1142, %f1141, %f1140; - mul.f32 %f1143, %f1142, %f97; - mul.f32 %f189, %f168, %f1143; - add.f32 %f1144, %f2826, 0f3F800000; - sub.f32 %f190, %f1144, %f3079; - div.rn.f32 %f191, %f190, %f3076; - abs.f32 %f192, %f191; - setp.lt.f32 %p94, %f192, 0f00800000; - mul.f32 %f1145, %f192, 0f4B800000; - selp.f32 %f1146, 0fC3170000, 0fC2FE0000, %p94; - selp.f32 %f1147, %f1145, %f192, %p94; - mov.b32 %r154, %f1147; - and.b32 %r155, %r154, 8388607; - or.b32 %r156, %r155, 1065353216; - mov.b32 %f1148, %r156; - shr.u32 %r157, %r154, 23; - cvt.rn.f32.u32 %f1149, %r157; - add.f32 %f1150, %f1146, %f1149; - setp.gt.f32 %p95, %f1148, 0f3FB504F3; - mul.f32 %f1151, %f1148, 0f3F000000; - add.f32 %f1152, %f1150, 0f3F800000; - selp.f32 %f1153, %f1151, %f1148, %p95; - selp.f32 %f1154, %f1152, %f1150, %p95; - add.f32 %f193, %f1153, 0fBF800000; - add.f32 %f1117, %f1153, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1116,%f1117; - // inline asm - add.f32 %f195, %f193, %f193; - mul.f32 %f1155, %f1116, %f195; - mul.f32 %f1156, %f1155, %f1155; - fma.rn.f32 %f1159, %f2817, %f1156, %f2818; - fma.rn.f32 %f1161, %f1159, %f1156, %f2819; - mul.rn.f32 %f1162, %f1161, %f1156; - mul.rn.f32 %f1163, %f1162, %f1155; - sub.f32 %f1164, %f193, %f1155; - neg.f32 %f1165, %f1155; - add.f32 %f1166, %f1164, %f1164; - fma.rn.f32 %f1167, %f1165, %f193, %f1166; - mul.rn.f32 %f1168, %f1116, %f1167; - add.f32 %f1169, %f1163, %f1155; - sub.f32 %f1170, %f1155, %f1169; - add.f32 %f1171, %f1163, %f1170; - add.f32 %f1172, %f1168, %f1171; - add.f32 %f1173, %f1169, %f1172; - sub.f32 %f1174, %f1169, %f1173; - add.f32 %f1175, %f1172, %f1174; - mul.rn.f32 %f196, %f1154, %f2824; - mul.rn.f32 %f197, %f1154, %f2825; - add.f32 %f1178, %f196, %f1173; - sub.f32 %f1179, %f196, %f1178; - add.f32 %f1180, %f1173, %f1179; - add.f32 %f1181, %f1175, %f1180; - add.f32 %f1182, %f197, %f1181; - add.f32 %f1183, %f1178, %f1182; - sub.f32 %f1184, %f1178, %f1183; - add.f32 %f1185, %f1182, %f1184; - mul.rn.f32 %f1187, %f828, %f1183; - neg.f32 %f1188, %f1187; - fma.rn.f32 %f1189, %f828, %f1183, %f1188; - fma.rn.f32 %f1190, %f828, %f1185, %f1189; - fma.rn.f32 %f1192, %f2820, %f1183, %f1190; - add.rn.f32 %f1193, %f1187, %f1192; - neg.f32 %f1194, %f1193; - add.rn.f32 %f1195, %f1187, %f1194; - add.rn.f32 %f1196, %f1195, %f1192; - mov.b32 %r158, %f1193; - setp.eq.s32 %p96, %r158, 1118925336; - add.s32 %r159, %r158, -1; - mov.b32 %f1197, %r159; - add.f32 %f1198, %f1196, 0f37000000; - selp.f32 %f1199, %f1197, %f1193, %p96; - selp.f32 %f198, %f1198, %f1196, %p96; - mul.f32 %f1200, %f1199, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1201, %f1200; - fma.rn.f32 %f1202, %f1201, %f2815, %f1199; - fma.rn.f32 %f1203, %f1201, %f2816, %f1202; - mul.f32 %f1204, %f1203, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1205, %f1204; - add.f32 %f1206, %f1201, 0f00000000; - ex2.approx.f32 %f1207, %f1206; - mul.f32 %f1208, %f1205, %f1207; - setp.lt.f32 %p97, %f1199, 0fC2D20000; - selp.f32 %f1209, 0f00000000, %f1208, %p97; - setp.gt.f32 %p98, %f1199, 0f42D20000; - selp.f32 %f3048, 0f7F800000, %f1209, %p98; - setp.eq.f32 %p99, %f3048, 0f7F800000; - @%p99 bra BB1_63; - - fma.rn.f32 %f3048, %f3048, %f198, %f3048; - -BB1_63: - setp.lt.f32 %p100, %f191, 0f00000000; - and.pred %p5, %p100, %p55; - mov.b32 %r160, %f3048; - xor.b32 %r161, %r160, -2147483648; - mov.b32 %f1210, %r161; - selp.f32 %f3050, %f1210, %f3048, %p5; - setp.eq.f32 %p102, %f191, 0f00000000; - @%p102 bra BB1_66; - bra.uni BB1_64; - -BB1_66: - add.f32 %f1213, %f191, %f191; - selp.f32 %f3050, %f1213, 0f00000000, %p55; - bra.uni BB1_67; - -BB1_64: - setp.geu.f32 %p103, %f191, 0f00000000; - @%p103 bra BB1_67; - - cvt.rzi.f32.f32 %f1212, %f828; - setp.neu.f32 %p104, %f1212, 0f40000000; - selp.f32 %f3050, 0f7FFFFFFF, %f3050, %p104; - -BB1_67: - abs.f32 %f2830, %f191; - add.f32 %f1214, %f2830, 0f40000000; - mov.b32 %r36, %f1214; - setp.lt.s32 %p106, %r36, 2139095040; - @%p106 bra BB1_72; - - abs.f32 %f2839, %f191; - setp.gtu.f32 %p107, %f2839, 0f7F800000; - @%p107 bra BB1_71; - bra.uni BB1_69; - -BB1_71: - add.f32 %f3050, %f191, 0f40000000; - bra.uni BB1_72; - -BB1_69: - abs.f32 %f2840, %f191; - setp.neu.f32 %p108, %f2840, 0f7F800000; - @%p108 bra BB1_72; - - selp.f32 %f3050, 0fFF800000, 0f7F800000, %p5; - -BB1_72: - cvt.rn.f32.s32 %f2852, %r310; - sub.f32 %f2851, %f2852, %f3079; - mov.f32 %f2838, 0f35BFBE8E; - mov.f32 %f2837, 0f3F317200; - mov.f32 %f2836, 0f00000000; - mov.f32 %f2835, 0f3DAAAABD; - mov.f32 %f2834, 0f3C4CAF63; - mov.f32 %f2833, 0f3B18F0FE; - mov.f32 %f2832, 0fB5BFBE8E; - mov.f32 %f2831, 0fBF317200; - mul.f32 %f1217, %f3050, 0fBF000000; - setp.eq.f32 %p109, %f191, 0f3F800000; - selp.f32 %f1218, 0fBF000000, %f1217, %p109; - mul.f32 %f1219, %f1218, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1220, %f1219; - fma.rn.f32 %f1222, %f1220, %f2831, %f1218; - fma.rn.f32 %f1224, %f1220, %f2832, %f1222; - mul.f32 %f1225, %f1224, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1226, %f1225; - add.f32 %f1227, %f1220, 0f00000000; - ex2.approx.f32 %f1228, %f1227; - mul.f32 %f1229, %f1226, %f1228; - setp.lt.f32 %p110, %f1218, 0fC2D20000; - selp.f32 %f1230, 0f00000000, %f1229, %p110; - setp.gt.f32 %p111, %f1218, 0f42D20000; - selp.f32 %f209, 0f7F800000, %f1230, %p111; - div.rn.f32 %f210, %f2851, %f3076; - abs.f32 %f211, %f210; - setp.lt.f32 %p112, %f211, 0f00800000; - mul.f32 %f1231, %f211, 0f4B800000; - selp.f32 %f1232, 0fC3170000, 0fC2FE0000, %p112; - selp.f32 %f1233, %f1231, %f211, %p112; - mov.b32 %r162, %f1233; - and.b32 %r163, %r162, 8388607; - or.b32 %r164, %r163, 1065353216; - mov.b32 %f1234, %r164; - shr.u32 %r165, %r162, 23; - cvt.rn.f32.u32 %f1235, %r165; - add.f32 %f1236, %f1232, %f1235; - setp.gt.f32 %p113, %f1234, 0f3FB504F3; - mul.f32 %f1237, %f1234, 0f3F000000; - add.f32 %f1238, %f1236, 0f3F800000; - selp.f32 %f1239, %f1237, %f1234, %p113; - selp.f32 %f1240, %f1238, %f1236, %p113; - add.f32 %f212, %f1239, 0fBF800000; - add.f32 %f1216, %f1239, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1215,%f1216; - // inline asm - add.f32 %f214, %f212, %f212; - mul.f32 %f1241, %f1215, %f214; - mul.f32 %f1242, %f1241, %f1241; - fma.rn.f32 %f1245, %f2833, %f1242, %f2834; - fma.rn.f32 %f1247, %f1245, %f1242, %f2835; - mul.rn.f32 %f1248, %f1247, %f1242; - mul.rn.f32 %f1249, %f1248, %f1241; - sub.f32 %f1250, %f212, %f1241; - neg.f32 %f1251, %f1241; - add.f32 %f1252, %f1250, %f1250; - fma.rn.f32 %f1253, %f1251, %f212, %f1252; - mul.rn.f32 %f1254, %f1215, %f1253; - add.f32 %f1255, %f1249, %f1241; - sub.f32 %f1256, %f1241, %f1255; - add.f32 %f1257, %f1249, %f1256; - add.f32 %f1258, %f1254, %f1257; - add.f32 %f1259, %f1255, %f1258; - sub.f32 %f1260, %f1255, %f1259; - add.f32 %f1261, %f1258, %f1260; - mul.rn.f32 %f215, %f1240, %f2837; - mul.rn.f32 %f216, %f1240, %f2838; - add.f32 %f1264, %f215, %f1259; - sub.f32 %f1265, %f215, %f1264; - add.f32 %f1266, %f1259, %f1265; - add.f32 %f1267, %f1261, %f1266; - add.f32 %f1268, %f216, %f1267; - add.f32 %f1269, %f1264, %f1268; - sub.f32 %f1270, %f1264, %f1269; - add.f32 %f1271, %f1268, %f1270; - mul.rn.f32 %f1273, %f828, %f1269; - neg.f32 %f1274, %f1273; - fma.rn.f32 %f1275, %f828, %f1269, %f1274; - fma.rn.f32 %f1276, %f828, %f1271, %f1275; - fma.rn.f32 %f1278, %f2836, %f1269, %f1276; - add.rn.f32 %f1279, %f1273, %f1278; - neg.f32 %f1280, %f1279; - add.rn.f32 %f1281, %f1273, %f1280; - add.rn.f32 %f1282, %f1281, %f1278; - mov.b32 %r166, %f1279; - setp.eq.s32 %p114, %r166, 1118925336; - add.s32 %r167, %r166, -1; - mov.b32 %f1283, %r167; - add.f32 %f1284, %f1282, 0f37000000; - selp.f32 %f1285, %f1283, %f1279, %p114; - selp.f32 %f217, %f1284, %f1282, %p114; - mul.f32 %f1286, %f1285, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1287, %f1286; - fma.rn.f32 %f1288, %f1287, %f2831, %f1285; - fma.rn.f32 %f1289, %f1287, %f2832, %f1288; - mul.f32 %f1290, %f1289, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1291, %f1290; - add.f32 %f1292, %f1287, 0f00000000; - ex2.approx.f32 %f1293, %f1292; - mul.f32 %f1294, %f1291, %f1293; - setp.lt.f32 %p115, %f1285, 0fC2D20000; - selp.f32 %f1295, 0f00000000, %f1294, %p115; - setp.gt.f32 %p116, %f1285, 0f42D20000; - selp.f32 %f3051, 0f7F800000, %f1295, %p116; - setp.eq.f32 %p117, %f3051, 0f7F800000; - @%p117 bra BB1_74; - - fma.rn.f32 %f3051, %f3051, %f217, %f3051; - -BB1_74: - setp.lt.f32 %p118, %f210, 0f00000000; - and.pred %p6, %p118, %p55; - mov.b32 %r168, %f3051; - xor.b32 %r169, %r168, -2147483648; - mov.b32 %f1296, %r169; - selp.f32 %f3053, %f1296, %f3051, %p6; - setp.eq.f32 %p120, %f210, 0f00000000; - @%p120 bra BB1_77; - bra.uni BB1_75; - -BB1_77: - add.f32 %f1299, %f210, %f210; - selp.f32 %f3053, %f1299, 0f00000000, %p55; - bra.uni BB1_78; - -BB1_75: - setp.geu.f32 %p121, %f210, 0f00000000; - @%p121 bra BB1_78; - - cvt.rzi.f32.f32 %f1298, %f828; - setp.neu.f32 %p122, %f1298, 0f40000000; - selp.f32 %f3053, 0f7FFFFFFF, %f3053, %p122; - -BB1_78: - abs.f32 %f2753, %f210; - add.f32 %f1300, %f2753, 0f40000000; - mov.b32 %r37, %f1300; - setp.lt.s32 %p124, %r37, 2139095040; - @%p124 bra BB1_83; - - abs.f32 %f2849, %f210; - setp.gtu.f32 %p125, %f2849, 0f7F800000; - @%p125 bra BB1_82; - bra.uni BB1_80; - -BB1_82: - add.f32 %f3053, %f210, 0f40000000; - bra.uni BB1_83; - -BB1_80: - abs.f32 %f2850, %f210; - setp.neu.f32 %p126, %f2850, 0f7F800000; - @%p126 bra BB1_83; - - selp.f32 %f3053, 0fFF800000, 0f7F800000, %p6; - -BB1_83: - cvt.rn.f32.s32 %f2854, %r310; - sub.f32 %f2853, %f2854, %f3079; - cvt.rn.f32.s32 %f2762, %r310; - add.f32 %f2761, %f2762, 0f3F800000; - sub.f32 %f2760, %f2761, %f3079; - mov.f32 %f2759, 0f00000000; - mov.f32 %f2758, 0f3DAAAABD; - mov.f32 %f2757, 0f3C4CAF63; - mov.f32 %f2756, 0f3B18F0FE; - mov.f32 %f2755, 0fB5BFBE8E; - mov.f32 %f2754, 0fBF317200; - mul.f32 %f1303, %f3053, 0fBF000000; - setp.eq.f32 %p127, %f210, 0f3F800000; - selp.f32 %f1304, 0fBF000000, %f1303, %p127; - mul.f32 %f1305, %f1304, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1306, %f1305; - fma.rn.f32 %f1308, %f1306, %f2754, %f1304; - fma.rn.f32 %f1310, %f1306, %f2755, %f1308; - mul.f32 %f1311, %f1310, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1312, %f1311; - add.f32 %f1313, %f1306, 0f00000000; - ex2.approx.f32 %f1314, %f1313; - mul.f32 %f1315, %f1312, %f1314; - setp.lt.f32 %p128, %f1304, 0fC2D20000; - selp.f32 %f1316, 0f00000000, %f1315, %p128; - setp.gt.f32 %p129, %f1304, 0f42D20000; - selp.f32 %f1317, 0f7F800000, %f1316, %p129; - sub.f32 %f1318, %f209, %f1317; - mul.f32 %f1319, %f94, %f1318; - mul.f32 %f228, %f154, %f1319; - mul.f32 %f1320, %f2853, %f1317; - mul.f32 %f1321, %f2760, %f209; - sub.f32 %f1322, %f1321, %f1320; - mul.f32 %f1323, %f1322, %f97; - mul.f32 %f229, %f154, %f1323; - // inline asm - rcp.approx.ftz.f32 %f1301,%f123; - // inline asm - mul.f32 %f1324, %f1301, %f124; - mul.f32 %f1325, %f1324, %f1324; - fma.rn.f32 %f1328, %f2756, %f1325, %f2757; - fma.rn.f32 %f1330, %f1328, %f1325, %f2758; - mul.rn.f32 %f1331, %f1330, %f1325; - mul.rn.f32 %f1332, %f1331, %f1324; - sub.f32 %f1333, %f122, %f1324; - neg.f32 %f1334, %f1324; - add.f32 %f1335, %f1333, %f1333; - fma.rn.f32 %f1336, %f1334, %f122, %f1335; - mul.rn.f32 %f1337, %f1301, %f1336; - add.f32 %f1338, %f1332, %f1324; - sub.f32 %f1339, %f1324, %f1338; - add.f32 %f1340, %f1332, %f1339; - add.f32 %f1341, %f1337, %f1340; - add.f32 %f1342, %f1338, %f1341; - sub.f32 %f1343, %f1338, %f1342; - add.f32 %f1344, %f1341, %f1343; - add.f32 %f1345, %f125, %f1342; - sub.f32 %f1346, %f125, %f1345; - add.f32 %f1347, %f1342, %f1346; - add.f32 %f1348, %f1344, %f1347; - add.f32 %f1349, %f126, %f1348; - add.f32 %f1350, %f1345, %f1349; - sub.f32 %f1351, %f1345, %f1350; - add.f32 %f1352, %f1349, %f1351; - mul.rn.f32 %f1354, %f828, %f1350; - neg.f32 %f1355, %f1354; - fma.rn.f32 %f1356, %f828, %f1350, %f1355; - fma.rn.f32 %f1357, %f828, %f1352, %f1356; - fma.rn.f32 %f1359, %f2759, %f1350, %f1357; - add.rn.f32 %f1360, %f1354, %f1359; - neg.f32 %f1361, %f1360; - add.rn.f32 %f1362, %f1354, %f1361; - add.rn.f32 %f1363, %f1362, %f1359; - mov.b32 %r170, %f1360; - setp.eq.s32 %p130, %r170, 1118925336; - add.s32 %r171, %r170, -1; - mov.b32 %f1364, %r171; - add.f32 %f1365, %f1363, 0f37000000; - selp.f32 %f1366, %f1364, %f1360, %p130; - selp.f32 %f230, %f1365, %f1363, %p130; - mul.f32 %f1367, %f1366, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1368, %f1367; - fma.rn.f32 %f1369, %f1368, %f2754, %f1366; - fma.rn.f32 %f1370, %f1368, %f2755, %f1369; - mul.f32 %f1371, %f1370, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1372, %f1371; - add.f32 %f1373, %f1368, 0f00000000; - ex2.approx.f32 %f1374, %f1373; - mul.f32 %f1375, %f1372, %f1374; - setp.lt.f32 %p131, %f1366, 0fC2D20000; - selp.f32 %f1376, 0f00000000, %f1375, %p131; - setp.gt.f32 %p132, %f1366, 0f42D20000; - selp.f32 %f3054, 0f7F800000, %f1376, %p132; - setp.eq.f32 %p133, %f3054, 0f7F800000; - @%p133 bra BB1_85; - - fma.rn.f32 %f3054, %f3054, %f230, %f3054; - -BB1_85: - setp.eq.f32 %p332, %f119, 0f00000000; - setp.geu.f32 %p331, %f119, 0f00000000; - mov.b32 %r172, %f3054; - xor.b32 %r173, %r172, -2147483648; - mov.b32 %f1377, %r173; - selp.f32 %f234, %f1377, %f3054, %p1; - selp.f32 %f3055, %f127, %f234, %p332; - @%p331 bra BB1_87; - - cvt.rzi.f32.f32 %f1379, %f828; - setp.neu.f32 %p135, %f1379, 0f40000000; - selp.f32 %f3055, 0f7FFFFFFF, %f234, %p135; - -BB1_87: - abs.f32 %f2772, %f119; - setp.eq.f32 %p336, %f119, 0f3F800000; - add.f32 %f2771, %f2772, 0f40000000; - mov.b32 %r275, %f2771; - setp.gt.s32 %p335, %r275, 2139095039; - setp.neu.f32 %p334, %f2772, 0f7F800000; - selp.f32 %f2770, 0fFF800000, 0f7F800000, %p1; - setp.gtu.f32 %p333, %f2772, 0f7F800000; - add.f32 %f2769, %f119, 0f40000000; - mov.f32 %f2768, 0f00000000; - mov.f32 %f2767, 0f3DAAAABD; - mov.f32 %f2766, 0f3C4CAF63; - mov.f32 %f2765, 0f3B18F0FE; - mov.f32 %f2764, 0fB5BFBE8E; - mov.f32 %f2763, 0fBF317200; - selp.f32 %f1383, %f2769, %f3055, %p333; - selp.f32 %f1385, %f1383, %f2770, %p334; - selp.f32 %f1386, %f1385, %f3055, %p335; - mul.f32 %f1387, %f1386, 0fBF000000; - selp.f32 %f1388, 0fBF000000, %f1387, %p336; - mul.f32 %f1389, %f1388, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1390, %f1389; - fma.rn.f32 %f1392, %f1390, %f2763, %f1388; - fma.rn.f32 %f1394, %f1390, %f2764, %f1392; - mul.f32 %f1395, %f1394, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1396, %f1395; - add.f32 %f1397, %f1390, 0f00000000; - ex2.approx.f32 %f1398, %f1397; - mul.f32 %f1399, %f1396, %f1398; - setp.lt.f32 %p140, %f1388, 0fC2D20000; - selp.f32 %f1400, 0f00000000, %f1399, %p140; - setp.gt.f32 %p141, %f1388, 0f42D20000; - selp.f32 %f238, 0f7F800000, %f1400, %p141; - // inline asm - rcp.approx.ftz.f32 %f1380,%f131; - // inline asm - mul.f32 %f1401, %f1380, %f132; - mul.f32 %f1402, %f1401, %f1401; - fma.rn.f32 %f1405, %f2765, %f1402, %f2766; - fma.rn.f32 %f1407, %f1405, %f1402, %f2767; - mul.rn.f32 %f1408, %f1407, %f1402; - mul.rn.f32 %f1409, %f1408, %f1401; - sub.f32 %f1410, %f130, %f1401; - neg.f32 %f1411, %f1401; - add.f32 %f1412, %f1410, %f1410; - fma.rn.f32 %f1413, %f1411, %f130, %f1412; - mul.rn.f32 %f1414, %f1380, %f1413; - add.f32 %f1415, %f1409, %f1401; - sub.f32 %f1416, %f1401, %f1415; - add.f32 %f1417, %f1409, %f1416; - add.f32 %f1418, %f1414, %f1417; - add.f32 %f1419, %f1415, %f1418; - sub.f32 %f1420, %f1415, %f1419; - add.f32 %f1421, %f1418, %f1420; - add.f32 %f1422, %f133, %f1419; - sub.f32 %f1423, %f133, %f1422; - add.f32 %f1424, %f1419, %f1423; - add.f32 %f1425, %f1421, %f1424; - add.f32 %f1426, %f134, %f1425; - add.f32 %f1427, %f1422, %f1426; - sub.f32 %f1428, %f1422, %f1427; - add.f32 %f1429, %f1426, %f1428; - mul.rn.f32 %f1431, %f828, %f1427; - neg.f32 %f1432, %f1431; - fma.rn.f32 %f1433, %f828, %f1427, %f1432; - fma.rn.f32 %f1434, %f828, %f1429, %f1433; - fma.rn.f32 %f1436, %f2768, %f1427, %f1434; - add.rn.f32 %f1437, %f1431, %f1436; - neg.f32 %f1438, %f1437; - add.rn.f32 %f1439, %f1431, %f1438; - add.rn.f32 %f1440, %f1439, %f1436; - mov.b32 %r174, %f1437; - setp.eq.s32 %p142, %r174, 1118925336; - add.s32 %r175, %r174, -1; - mov.b32 %f1441, %r175; - add.f32 %f1442, %f1440, 0f37000000; - selp.f32 %f1443, %f1441, %f1437, %p142; - selp.f32 %f239, %f1442, %f1440, %p142; - mul.f32 %f1444, %f1443, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1445, %f1444; - fma.rn.f32 %f1446, %f1445, %f2763, %f1443; - fma.rn.f32 %f1447, %f1445, %f2764, %f1446; - mul.f32 %f1448, %f1447, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1449, %f1448; - add.f32 %f1450, %f1445, 0f00000000; - ex2.approx.f32 %f1451, %f1450; - mul.f32 %f1452, %f1449, %f1451; - setp.lt.f32 %p143, %f1443, 0fC2D20000; - selp.f32 %f1453, 0f00000000, %f1452, %p143; - setp.gt.f32 %p144, %f1443, 0f42D20000; - selp.f32 %f3056, 0f7F800000, %f1453, %p144; - setp.eq.f32 %p145, %f3056, 0f7F800000; - @%p145 bra BB1_89; - - fma.rn.f32 %f3056, %f3056, %f239, %f3056; - -BB1_89: - setp.eq.f32 %p338, %f128, 0f00000000; - setp.geu.f32 %p337, %f128, 0f00000000; - mov.b32 %r176, %f3056; - xor.b32 %r177, %r176, -2147483648; - mov.b32 %f1454, %r177; - selp.f32 %f243, %f1454, %f3056, %p2; - selp.f32 %f3057, %f135, %f243, %p338; - @%p337 bra BB1_91; - - cvt.rzi.f32.f32 %f1456, %f828; - setp.neu.f32 %p147, %f1456, 0f40000000; - selp.f32 %f3057, 0f7FFFFFFF, %f243, %p147; - -BB1_91: - abs.f32 %f2787, %f128; - cvt.rn.f32.s32 %f2786, %r309; - sub.f32 %f2785, %f2786, %f3080; - mul.f32 %f2784, %f2785, %f2785; - mul.f32 %f2783, %f2785, %f2784; - add.f32 %f2782, %f2785, 0f3F800000; - setp.eq.f32 %p342, %f128, 0f3F800000; - add.f32 %f2781, %f2787, 0f40000000; - mov.b32 %r276, %f2781; - setp.gt.s32 %p341, %r276, 2139095039; - setp.neu.f32 %p340, %f2787, 0f7F800000; - selp.f32 %f2780, 0fFF800000, 0f7F800000, %p2; - setp.gtu.f32 %p339, %f2787, 0f7F800000; - add.f32 %f2779, %f128, 0f40000000; - mov.f32 %f2778, 0f00000000; - mov.f32 %f2777, 0f3DAAAABD; - mov.f32 %f2776, 0f3C4CAF63; - mov.f32 %f2775, 0f3B18F0FE; - mov.f32 %f2774, 0fB5BFBE8E; - mov.f32 %f2773, 0fBF317200; - selp.f32 %f1460, %f2779, %f3057, %p339; - selp.f32 %f1462, %f1460, %f2780, %p340; - selp.f32 %f1463, %f1462, %f3057, %p341; - mul.f32 %f1464, %f1463, 0fBF000000; - selp.f32 %f1465, 0fBF000000, %f1464, %p342; - mul.f32 %f1466, %f1465, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1467, %f1466; - fma.rn.f32 %f1469, %f1467, %f2773, %f1465; - fma.rn.f32 %f1471, %f1467, %f2774, %f1469; - mul.f32 %f1472, %f1471, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1473, %f1472; - add.f32 %f1474, %f1467, 0f00000000; - ex2.approx.f32 %f1475, %f1474; - mul.f32 %f1476, %f1473, %f1475; - setp.lt.f32 %p152, %f1465, 0fC2D20000; - selp.f32 %f1477, 0f00000000, %f1476, %p152; - setp.gt.f32 %p153, %f1465, 0f42D20000; - selp.f32 %f1478, 0f7F800000, %f1477, %p153; - mul.f32 %f1479, %f2785, %f1478; - mul.f32 %f1480, %f2782, %f238; - sub.f32 %f1481, %f1480, %f1479; - mul.f32 %f1482, %f95, %f1481; - mul.f32 %f247, %f168, %f1482; - mul.f32 %f1483, %f96, %f247; - mul.f32 %f1484, %f1478, %f2783; - mul.f32 %f1485, %f238, %f136; - sub.f32 %f1486, %f1485, %f1484; - mul.f32 %f1487, %f98, %f1486; - mul.f32 %f1488, %f168, %f1487; - sub.f32 %f248, %f1483, %f1488; - // inline asm - rcp.approx.ftz.f32 %f1457,%f1117; - // inline asm - mul.f32 %f1489, %f1457, %f195; - mul.f32 %f1490, %f1489, %f1489; - fma.rn.f32 %f1493, %f2775, %f1490, %f2776; - fma.rn.f32 %f1495, %f1493, %f1490, %f2777; - mul.rn.f32 %f1496, %f1495, %f1490; - mul.rn.f32 %f1497, %f1496, %f1489; - sub.f32 %f1498, %f193, %f1489; - neg.f32 %f1499, %f1489; - add.f32 %f1500, %f1498, %f1498; - fma.rn.f32 %f1501, %f1499, %f193, %f1500; - mul.rn.f32 %f1502, %f1457, %f1501; - add.f32 %f1503, %f1497, %f1489; - sub.f32 %f1504, %f1489, %f1503; - add.f32 %f1505, %f1497, %f1504; - add.f32 %f1506, %f1502, %f1505; - add.f32 %f1507, %f1503, %f1506; - sub.f32 %f1508, %f1503, %f1507; - add.f32 %f1509, %f1506, %f1508; - add.f32 %f1510, %f196, %f1507; - sub.f32 %f1511, %f196, %f1510; - add.f32 %f1512, %f1507, %f1511; - add.f32 %f1513, %f1509, %f1512; - add.f32 %f1514, %f197, %f1513; - add.f32 %f1515, %f1510, %f1514; - sub.f32 %f1516, %f1510, %f1515; - add.f32 %f1517, %f1514, %f1516; - mul.rn.f32 %f1519, %f828, %f1515; - neg.f32 %f1520, %f1519; - fma.rn.f32 %f1521, %f828, %f1515, %f1520; - fma.rn.f32 %f1522, %f828, %f1517, %f1521; - fma.rn.f32 %f1524, %f2778, %f1515, %f1522; - add.rn.f32 %f1525, %f1519, %f1524; - neg.f32 %f1526, %f1525; - add.rn.f32 %f1527, %f1519, %f1526; - add.rn.f32 %f1528, %f1527, %f1524; - mov.b32 %r178, %f1525; - setp.eq.s32 %p154, %r178, 1118925336; - add.s32 %r179, %r178, -1; - mov.b32 %f1529, %r179; - add.f32 %f1530, %f1528, 0f37000000; - selp.f32 %f1531, %f1529, %f1525, %p154; - selp.f32 %f249, %f1530, %f1528, %p154; - mul.f32 %f1532, %f1531, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1533, %f1532; - fma.rn.f32 %f1534, %f1533, %f2773, %f1531; - fma.rn.f32 %f1535, %f1533, %f2774, %f1534; - mul.f32 %f1536, %f1535, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1537, %f1536; - add.f32 %f1538, %f1533, 0f00000000; - ex2.approx.f32 %f1539, %f1538; - mul.f32 %f1540, %f1537, %f1539; - setp.lt.f32 %p155, %f1531, 0fC2D20000; - selp.f32 %f1541, 0f00000000, %f1540, %p155; - setp.gt.f32 %p156, %f1531, 0f42D20000; - selp.f32 %f3058, 0f7F800000, %f1541, %p156; - setp.eq.f32 %p157, %f3058, 0f7F800000; - @%p157 bra BB1_93; - - fma.rn.f32 %f3058, %f3058, %f249, %f3058; - -BB1_93: - setp.eq.f32 %p343, %f191, 0f00000000; - mov.b32 %r180, %f3058; - xor.b32 %r181, %r180, -2147483648; - mov.b32 %f1542, %r181; - selp.f32 %f3060, %f1542, %f3058, %p5; - @%p343 bra BB1_96; - bra.uni BB1_94; - -BB1_96: - add.f32 %f1545, %f191, %f191; - selp.f32 %f3060, %f1545, 0f00000000, %p55; - bra.uni BB1_97; - -BB1_94: - setp.geu.f32 %p159, %f191, 0f00000000; - @%p159 bra BB1_97; - - cvt.rzi.f32.f32 %f1544, %f828; - setp.neu.f32 %p160, %f1544, 0f40000000; - selp.f32 %f3060, 0f7FFFFFFF, %f3060, %p160; - -BB1_97: - abs.f32 %f2789, %f191; - add.f32 %f2788, %f2789, 0f40000000; - mov.b32 %r277, %f2788; - setp.lt.s32 %p344, %r277, 2139095040; - @%p344 bra BB1_102; - - abs.f32 %f2847, %f191; - setp.gtu.f32 %p163, %f2847, 0f7F800000; - @%p163 bra BB1_101; - bra.uni BB1_99; - -BB1_101: - add.f32 %f3060, %f191, 0f40000000; - bra.uni BB1_102; - -BB1_99: - abs.f32 %f2848, %f191; - setp.neu.f32 %p164, %f2848, 0f7F800000; - @%p164 bra BB1_102; - - selp.f32 %f3060, 0fFF800000, 0f7F800000, %p5; - -BB1_102: - setp.eq.f32 %p345, %f191, 0f3F800000; - mov.f32 %f2795, 0f00000000; - mov.f32 %f2794, 0f3DAAAABD; - mov.f32 %f2793, 0f3C4CAF63; - mov.f32 %f2792, 0f3B18F0FE; - mov.f32 %f2791, 0fB5BFBE8E; - mov.f32 %f2790, 0fBF317200; - mul.f32 %f1548, %f3060, 0fBF000000; - selp.f32 %f1549, 0fBF000000, %f1548, %p345; - mul.f32 %f1550, %f1549, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1551, %f1550; - fma.rn.f32 %f1553, %f1551, %f2790, %f1549; - fma.rn.f32 %f1555, %f1551, %f2791, %f1553; - mul.f32 %f1556, %f1555, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1557, %f1556; - add.f32 %f1558, %f1551, 0f00000000; - ex2.approx.f32 %f1559, %f1558; - mul.f32 %f1560, %f1557, %f1559; - setp.lt.f32 %p166, %f1549, 0fC2D20000; - selp.f32 %f1561, 0f00000000, %f1560, %p166; - setp.gt.f32 %p167, %f1549, 0f42D20000; - selp.f32 %f260, 0f7F800000, %f1561, %p167; - // inline asm - rcp.approx.ftz.f32 %f1546,%f1216; - // inline asm - mul.f32 %f1562, %f1546, %f214; - mul.f32 %f1563, %f1562, %f1562; - fma.rn.f32 %f1566, %f2792, %f1563, %f2793; - fma.rn.f32 %f1568, %f1566, %f1563, %f2794; - mul.rn.f32 %f1569, %f1568, %f1563; - mul.rn.f32 %f1570, %f1569, %f1562; - sub.f32 %f1571, %f212, %f1562; - neg.f32 %f1572, %f1562; - add.f32 %f1573, %f1571, %f1571; - fma.rn.f32 %f1574, %f1572, %f212, %f1573; - mul.rn.f32 %f1575, %f1546, %f1574; - add.f32 %f1576, %f1570, %f1562; - sub.f32 %f1577, %f1562, %f1576; - add.f32 %f1578, %f1570, %f1577; - add.f32 %f1579, %f1575, %f1578; - add.f32 %f1580, %f1576, %f1579; - sub.f32 %f1581, %f1576, %f1580; - add.f32 %f1582, %f1579, %f1581; - add.f32 %f1583, %f215, %f1580; - sub.f32 %f1584, %f215, %f1583; - add.f32 %f1585, %f1580, %f1584; - add.f32 %f1586, %f1582, %f1585; - add.f32 %f1587, %f216, %f1586; - add.f32 %f1588, %f1583, %f1587; - sub.f32 %f1589, %f1583, %f1588; - add.f32 %f1590, %f1587, %f1589; - mul.rn.f32 %f1592, %f828, %f1588; - neg.f32 %f1593, %f1592; - fma.rn.f32 %f1594, %f828, %f1588, %f1593; - fma.rn.f32 %f1595, %f828, %f1590, %f1594; - fma.rn.f32 %f1597, %f2795, %f1588, %f1595; - add.rn.f32 %f1598, %f1592, %f1597; - neg.f32 %f1599, %f1598; - add.rn.f32 %f1600, %f1592, %f1599; - add.rn.f32 %f1601, %f1600, %f1597; - mov.b32 %r182, %f1598; - setp.eq.s32 %p168, %r182, 1118925336; - add.s32 %r183, %r182, -1; - mov.b32 %f1602, %r183; - add.f32 %f1603, %f1601, 0f37000000; - selp.f32 %f1604, %f1602, %f1598, %p168; - selp.f32 %f261, %f1603, %f1601, %p168; - mul.f32 %f1605, %f1604, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1606, %f1605; - fma.rn.f32 %f1607, %f1606, %f2790, %f1604; - fma.rn.f32 %f1608, %f1606, %f2791, %f1607; - mul.f32 %f1609, %f1608, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1610, %f1609; - add.f32 %f1611, %f1606, 0f00000000; - ex2.approx.f32 %f1612, %f1611; - mul.f32 %f1613, %f1610, %f1612; - setp.lt.f32 %p169, %f1604, 0fC2D20000; - selp.f32 %f1614, 0f00000000, %f1613, %p169; - setp.gt.f32 %p170, %f1604, 0f42D20000; - selp.f32 %f3061, 0f7F800000, %f1614, %p170; - setp.eq.f32 %p171, %f3061, 0f7F800000; - @%p171 bra BB1_104; - - fma.rn.f32 %f3061, %f3061, %f261, %f3061; - -BB1_104: - setp.eq.f32 %p348, %f210, 0f00000000; - mov.b32 %r184, %f3061; - xor.b32 %r185, %r184, -2147483648; - mov.b32 %f1615, %r185; - selp.f32 %f3063, %f1615, %f3061, %p6; - @%p348 bra BB1_107; - bra.uni BB1_105; - -BB1_107: - add.f32 %f1618, %f210, %f210; - selp.f32 %f3063, %f1618, 0f00000000, %p55; - bra.uni BB1_108; - -BB1_105: - setp.geu.f32 %p173, %f210, 0f00000000; - @%p173 bra BB1_108; - - cvt.rzi.f32.f32 %f1617, %f828; - setp.neu.f32 %p174, %f1617, 0f40000000; - selp.f32 %f3063, 0f7FFFFFFF, %f3063, %p174; - -BB1_108: - abs.f32 %f2861, %f210; - add.f32 %f2860, %f2861, 0f40000000; - mov.b32 %r286, %f2860; - setp.lt.s32 %p349, %r286, 2139095040; - @%p349 bra BB1_113; - - abs.f32 %f2845, %f210; - setp.gtu.f32 %p177, %f2845, 0f7F800000; - @%p177 bra BB1_112; - bra.uni BB1_110; - -BB1_112: - add.f32 %f3063, %f210, 0f40000000; - bra.uni BB1_113; - -BB1_110: - abs.f32 %f2846, %f210; - setp.neu.f32 %p178, %f2846, 0f7F800000; - @%p178 bra BB1_113; - - selp.f32 %f3063, 0fFF800000, 0f7F800000, %p6; - -BB1_113: - setp.eq.f32 %p350, %f210, 0f3F800000; - cvt.rn.f32.s32 %f2801, %r310; - sub.f32 %f2800, %f2801, %f3079; - add.f32 %f2799, %f2800, 0f3F800000; - mov.f32 %f3064, 0f00000000; - mov.f32 %f2797, 0fB5BFBE8E; - mov.f32 %f2796, 0fBF317200; - mul.f32 %f1620, %f2799, %f2799; - mul.f32 %f1621, %f2799, %f1620; - mul.f32 %f1622, %f2800, %f2800; - mul.f32 %f1623, %f2800, %f1622; - mul.f32 %f1624, %f3063, 0fBF000000; - selp.f32 %f1625, 0fBF000000, %f1624, %p350; - mul.f32 %f1626, %f1625, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1627, %f1626; - fma.rn.f32 %f1629, %f1627, %f2796, %f1625; - fma.rn.f32 %f1631, %f1627, %f2797, %f1629; - mul.f32 %f1632, %f1631, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1633, %f1632; - add.f32 %f1634, %f1627, 0f00000000; - ex2.approx.f32 %f1635, %f1634; - mul.f32 %f1636, %f1633, %f1635; - setp.lt.f32 %p180, %f1625, 0fC2D20000; - selp.f32 %f1637, 0f00000000, %f1636, %p180; - setp.gt.f32 %p181, %f1625, 0f42D20000; - selp.f32 %f1638, 0f7F800000, %f1637, %p181; - mul.f32 %f1639, %f2800, %f1638; - mul.f32 %f1640, %f2799, %f260; - sub.f32 %f1641, %f1640, %f1639; - mul.f32 %f1642, %f95, %f1641; - mul.f32 %f1643, %f154, %f1642; - mul.f32 %f1644, %f96, %f1643; - mul.f32 %f1645, %f260, %f1621; - mul.f32 %f1646, %f1638, %f1623; - sub.f32 %f1647, %f1645, %f1646; - mul.f32 %f1648, %f98, %f1647; - mul.f32 %f1649, %f154, %f1648; - sub.f32 %f1650, %f1644, %f1649; - add.f32 %f272, %f247, %f1643; - add.f32 %f273, %f248, %f1650; - mul.f32 %f274, %f154, %f168; - setp.leu.f32 %p182, %f169, 0f3C23D70A; - @%p182 bra BB1_115; - - div.rn.f32 %f1651, %f170, %f169; - add.f32 %f3064, %f1651, 0fBF800000; - -BB1_115: - mov.f32 %f3065, 0f00000000; - @%p182 bra BB1_117; - - mul.f32 %f1653, %f169, %f169; - div.rn.f32 %f3065, %f170, %f1653; - -BB1_117: - mov.f32 %f1654, 0f47C35000; - min.f32 %f1655, %f3064, %f1654; - fma.rn.f32 %f3034, %f1655, %f188, %f3034; - mul.f32 %f1656, %f1655, %f189; - mul.f32 %f1657, %f188, %f188; - min.f32 %f1658, %f3065, %f1654; - mul.f32 %f1659, %f1658, %f1657; - sub.f32 %f1660, %f1656, %f1659; - add.f32 %f3039, %f1660, %f3039; - fma.rn.f32 %f3033, %f1655, %f228, %f3033; - mul.f32 %f1661, %f1655, %f229; - mul.f32 %f1662, %f228, %f228; - mul.f32 %f1663, %f1658, %f1662; - sub.f32 %f1664, %f1661, %f1663; - add.f32 %f3038, %f1664, %f3038; - fma.rn.f32 %f3032, %f1655, %f274, %f3032; - mul.f32 %f1665, %f1655, 0f00000000; - mul.f32 %f1666, %f274, %f274; - mul.f32 %f1667, %f1658, %f1666; - sub.f32 %f1668, %f1665, %f1667; - add.f32 %f3037, %f1668, %f3037; - add.f32 %f3031, %f3031, %f1655; - sub.f32 %f1669, %f1665, %f1658; - add.f32 %f3036, %f1669, %f3036; - fma.rn.f32 %f3030, %f1655, %f272, %f3030; - mul.f32 %f1670, %f1655, %f273; - mul.f32 %f1671, %f272, %f272; - mul.f32 %f1672, %f1658, %f1671; - sub.f32 %f1673, %f1670, %f1672; - add.f32 %f3035, %f1673, %f3035; - add.s32 %r310, %r310, 1; - setp.lt.s32 %p184, %r310, %r63; - @%p184 bra BB1_41; - - add.s32 %r309, %r309, 1; - setp.lt.s32 %p185, %r309, %r63; - @%p185 bra BB1_40; - -BB1_119: - div.rn.f32 %f1674, %f3034, %f3039; - mov.f32 %f1675, 0fBF800000; - max.f32 %f1676, %f1674, %f1675; - mov.f32 %f1677, 0f3F800000; - min.f32 %f1678, %f1676, %f1677; - sub.f32 %f3080, %f3080, %f1678; - div.rn.f32 %f1679, %f3033, %f3038; - max.f32 %f1680, %f1679, %f1675; - min.f32 %f1681, %f1680, %f1677; - sub.f32 %f3079, %f3079, %f1681; - neg.f32 %f1682, %f3078; - div.rn.f32 %f1683, %f3032, %f3037; - max.f32 %f1684, %f1683, %f1682; - min.f32 %f1685, %f1684, %f3078; - sub.f32 %f1686, %f3078, %f1685; - neg.f32 %f1687, %f2993; - div.rn.f32 %f1688, %f3031, %f3036; - max.f32 %f1689, %f1688, %f1687; - min.f32 %f1690, %f1689, %f2993; - sub.f32 %f1691, %f2993, %f1690; - neg.f32 %f1692, %f3076; - div.rn.f32 %f1693, %f3030, %f3035; - max.f32 %f1694, %f1693, %f1692; - min.f32 %f1695, %f1694, %f3076; - sub.f32 %f1696, %f3076, %f1695; - max.f32 %f3078, %f1686, %f1677; - mov.f32 %f1697, 0f3C23D70A; - max.f32 %f2993, %f1691, %f1697; - max.f32 %f1699, %f1696, %f591; - min.f32 %f3076, %f1699, %f87; - add.s32 %r308, %r308, 1; - setp.lt.s32 %p186, %r308, %r64; - @%p186 bra BB1_38; - -BB1_120: - mov.f32 %f3124, 0f00000000; - @%p15 bra BB1_205; - - div.rn.f32 %f1703, %f591, %f3076; - div.rn.f32 %f309, %f1703, %f3076; - div.rn.f32 %f1704, %f3078, 0fC0206C98; - div.rn.f32 %f310, %f1704, %f3076; - div.rn.f32 %f311, %f310, %f3076; + .reg .pred %p<680>; + .reg .f32 %f<2944>; + .reg .b32 %r<785>; + .reg .f64 %fd<558>; + .reg .b64 %rd<52>; + + + ld.param.u64 %rd7, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_0]; + ld.param.f32 %f2864, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_1]; + ld.param.u32 %r102, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_2]; + ld.param.u32 %r104, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_7]; + cvta.to.global.u64 %rd1, %rd7; + mov.u32 %r105, %ntid.x; + mov.u32 %r106, %ctaid.x; + mov.u32 %r107, %tid.x; + mad.lo.s32 %r1, %r106, %r105, %r107; + setp.ge.s32 %p39, %r1, %r104; + @%p39 bra $L__BB1_427; + + mul.lo.s32 %r108, %r102, %r102; + mul.lo.s32 %r2, %r108, %r1; + setp.lt.s32 %p40, %r102, 1; + mov.f32 %f2796, 0f00000000; + mov.f32 %f2787, %f2796; + mov.f32 %f2788, %f2796; + mov.f32 %f2789, %f2796; + @%p40 bra $L__BB1_11; + + add.s32 %r3, %r102, -1; + and.b32 %r4, %r102, 3; + sub.s32 %r5, %r102, %r4; + shl.b32 %r6, %r102, 2; + mov.u32 %r109, 0; + setp.lt.u32 %p41, %r3, 3; + setp.eq.s32 %p43, %r4, 0; + setp.eq.s32 %p44, %r4, 1; + setp.eq.s32 %p45, %r4, 2; + cvt.s64.s32 %rd13, %r6; + mov.u32 %r770, %r109; + +$L__BB1_3: + cvt.rn.f32.s32 %f4, %r770; + mov.u32 %r773, %r109; + @%p41 bra $L__BB1_6; + + mov.u32 %r773, %r109; + mov.u32 %r772, %r5; + +$L__BB1_5: + mad.lo.s32 %r112, %r773, %r102, %r770; + add.s32 %r113, %r112, %r2; + mul.wide.s32 %rd11, %r113, 4; + add.s64 %rd12, %rd1, %rd11; + ld.global.f32 %f464, [%rd12]; + fma.rn.f32 %f465, %f464, %f4, %f2787; + cvt.rn.f32.s32 %f466, %r773; + fma.rn.f32 %f467, %f464, %f466, %f2788; + add.f32 %f468, %f2789, %f464; + add.s64 %rd14, %rd12, %rd13; + ld.global.f32 %f469, [%rd14]; + fma.rn.f32 %f470, %f469, %f4, %f465; + add.s32 %r114, %r773, 1; + cvt.rn.f32.s32 %f471, %r114; + fma.rn.f32 %f472, %f469, %f471, %f467; + add.f32 %f473, %f468, %f469; + add.s64 %rd15, %rd14, %rd13; + ld.global.f32 %f474, [%rd15]; + fma.rn.f32 %f475, %f474, %f4, %f470; + add.s32 %r115, %r773, 2; + cvt.rn.f32.s32 %f476, %r115; + fma.rn.f32 %f477, %f474, %f476, %f472; + add.f32 %f478, %f473, %f474; + add.s64 %rd16, %rd15, %rd13; + ld.global.f32 %f479, [%rd16]; + fma.rn.f32 %f2787, %f479, %f4, %f475; + add.s32 %r116, %r773, 3; + cvt.rn.f32.s32 %f480, %r116; + fma.rn.f32 %f2788, %f479, %f480, %f477; + add.f32 %f2789, %f478, %f479; + add.s32 %r773, %r773, 4; + add.s32 %r772, %r772, -4; + setp.ne.s32 %p42, %r772, 0; + @%p42 bra $L__BB1_5; + +$L__BB1_6: + @%p43 bra $L__BB1_10; + + mad.lo.s32 %r13, %r773, %r102, %r770; + add.s32 %r117, %r13, %r2; + mul.wide.s32 %rd17, %r117, 4; + add.s64 %rd18, %rd1, %rd17; + ld.global.f32 %f481, [%rd18]; + fma.rn.f32 %f2787, %f481, %f4, %f2787; + cvt.rn.f32.s32 %f482, %r773; + fma.rn.f32 %f2788, %f481, %f482, %f2788; + add.f32 %f2789, %f2789, %f481; + @%p44 bra $L__BB1_10; + + add.s32 %r14, %r13, %r102; + add.s32 %r118, %r14, %r2; + mul.wide.s32 %rd19, %r118, 4; + add.s64 %rd20, %rd1, %rd19; + ld.global.f32 %f483, [%rd20]; + fma.rn.f32 %f2787, %f483, %f4, %f2787; + add.s32 %r119, %r773, 1; + cvt.rn.f32.s32 %f484, %r119; + fma.rn.f32 %f2788, %f483, %f484, %f2788; + add.f32 %f2789, %f2789, %f483; + @%p45 bra $L__BB1_10; + + add.s32 %r120, %r773, 2; + add.s32 %r121, %r14, %r102; + add.s32 %r122, %r121, %r2; + mul.wide.s32 %rd21, %r122, 4; + add.s64 %rd22, %rd1, %rd21; + ld.global.f32 %f485, [%rd22]; + fma.rn.f32 %f2787, %f485, %f4, %f2787; + cvt.rn.f32.s32 %f486, %r120; + fma.rn.f32 %f2788, %f485, %f486, %f2788; + add.f32 %f2789, %f2789, %f485; + +$L__BB1_10: + add.s32 %r770, %r770, 1; + setp.lt.s32 %p46, %r770, %r102; + @%p46 bra $L__BB1_3; + +$L__BB1_11: + div.rn.f32 %f2868, %f2787, %f2789; + div.rn.f32 %f2867, %f2788, %f2789; + mov.f32 %f2865, 0f51BA43B7; + @%p40 bra $L__BB1_51; + + mov.f32 %f491, 0f3F000000; + div.rn.f32 %f492, %f491, %f2864; + div.rn.f32 %f493, %f492, %f2864; + cvt.f64.f32 %fd1, %f493; + mov.f64 %fd215, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd215; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p48, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p48; + mov.u32 %r123, 0; + or.b32 %r20, %r19, -2147483648; + mul.wide.s32 %rd23, %r2, 4; + add.s64 %rd2, %rd1, %rd23; + setp.eq.s32 %p50, %r17, 1062207488; + setp.lt.s32 %p51, %r16, 0; + setp.ne.s32 %p56, %r18, 1071644672; + setp.eq.s32 %p83, %r18, 2146435072; + mov.u32 %r774, %r123; + +$L__BB1_13: + mov.u32 %r775, %r123; + +$L__BB1_14: + mov.u32 %r126, 1; + sub.s32 %r24, %r126, %r775; + mov.f32 %f2799, 0f00000000; + mov.f32 %f2800, %f2799; + mov.u32 %r776, %r123; + +$L__BB1_15: + add.s32 %r778, %r775, -1; + sub.s32 %r26, %r776, %r774; + cvt.rn.f32.s32 %f496, %r26; + cvt.f64.f32 %fd2, %f496; + { + .reg .b32 %temp; + mov.b64 {%temp, %r27}, %fd2; + } + abs.f64 %fd216, %fd2; + { // callseq 9, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd216; + .param .b64 param1; + st.param.f64 [param1+0], %fd215; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 9 + setp.lt.s32 %p49, %r27, 0; + and.pred %p1, %p49, %p50; + selp.b32 %r128, %r27, 0, %p50; + or.b32 %r129, %r128, 2146435072; + selp.b32 %r28, %r129, %r128, %p51; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r130}, %fd4; + } + and.b32 %r29, %r130, 2146435072; + setp.ne.s32 %p52, %r29, 2146435072; + setp.gtu.f64 %p53, %fd216, 0d7FF0000000000000; + setp.gt.f64 %p54, %fd216, 0d3FF0000000000000; + selp.b32 %r131, 2146435072, 0, %p54; + xor.b32 %r132, %r131, 2146435072; + selp.b32 %r133, %r132, %r131, %p51; + setp.eq.s32 %p55, %r26, -1; + selp.b32 %r30, 1072693248, %r133, %p55; + and.b32 %r31, %r27, 2147483647; + and.pred %p57, %p56, %p1; + selp.b32 %r32, %r20, %r19, %p57; + or.pred %p2, %p52, %p53; + mul.lo.s32 %r134, %r102, %r776; + mul.wide.s32 %rd24, %r134, 4; + add.s64 %rd51, %rd2, %rd24; + mov.u32 %r777, %r24; + mov.u32 %r779, %r123; + +$L__BB1_16: + not.pred %p58, %p1; + mov.f64 %fd502, %fd3; + @%p58 bra $L__BB1_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r135}, %fd3; + } + xor.b32 %r136, %r135, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r137, %temp}, %fd3; + } + mov.b64 %fd502, {%r137, %r136}; + +$L__BB1_18: + setp.eq.s32 %p59, %r26, 0; + @%p59 bra $L__BB1_22; + + setp.gt.s32 %p60, %r27, -1; + @%p60 bra $L__BB1_23; + + cvt.rzi.f64.f64 %fd219, %fd215; + setp.eq.f64 %p61, %fd219, 0d4000000000000000; + @%p61 bra $L__BB1_23; + + mov.f64 %fd502, 0dFFF8000000000000; + bra.uni $L__BB1_23; + +$L__BB1_22: + mov.u32 %r138, 0; + mov.b64 %fd502, {%r138, %r28}; + +$L__BB1_23: + selp.f64 %fd503, %fd502, %fd4, %p52; + @%p2 bra $L__BB1_28; + + { + .reg .b32 %temp; + mov.b64 {%r139, %temp}, %fd215; + } + setp.eq.s32 %p64, %r139, 0; + and.pred %p65, %p83, %p64; + @%p65 bra $L__BB1_27; + bra.uni $L__BB1_25; + +$L__BB1_27: + mov.u32 %r142, 0; + mov.b64 %fd503, {%r142, %r30}; + bra.uni $L__BB1_28; + +$L__BB1_25: + setp.ne.s32 %p66, %r31, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r140, %temp}, %fd2; + } + setp.ne.s32 %p67, %r140, 0; + or.pred %p68, %p66, %p67; + mov.f64 %fd503, %fd502; + @%p68 bra $L__BB1_28; + + mov.u32 %r141, 0; + mov.b64 %fd503, {%r141, %r32}; + +$L__BB1_28: + setp.eq.s32 %p69, %r26, 1; + selp.f64 %fd222, 0d3FF0000000000000, %fd503, %p69; + mov.f64 %fd223, 0d3FF0000000000000; + mul.f64 %fd13, %fd222, %fd1; + neg.f64 %fd224, %fd13; + mov.f64 %fd225, 0d4338000000000000; + mov.f64 %fd226, 0d3FF71547652B82FE; + fma.rn.f64 %fd227, %fd224, %fd226, %fd225; + { + .reg .b32 %temp; + mov.b64 {%r36, %temp}, %fd227; + } + mov.f64 %fd228, 0dC338000000000000; + add.rn.f64 %fd229, %fd227, %fd228; + mov.f64 %fd230, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd231, %fd229, %fd230, %fd224; + mov.f64 %fd232, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd233, %fd229, %fd232, %fd231; + mov.f64 %fd234, 0d3E928AF3FCA213EA; + mov.f64 %fd235, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd236, %fd235, %fd233, %fd234; + mov.f64 %fd237, 0d3EC71DEE62401315; + fma.rn.f64 %fd238, %fd236, %fd233, %fd237; + mov.f64 %fd239, 0d3EFA01997C89EB71; + fma.rn.f64 %fd240, %fd238, %fd233, %fd239; + mov.f64 %fd241, 0d3F2A01A014761F65; + fma.rn.f64 %fd242, %fd240, %fd233, %fd241; + mov.f64 %fd243, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd244, %fd242, %fd233, %fd243; + mov.f64 %fd245, 0d3F81111111122322; + fma.rn.f64 %fd246, %fd244, %fd233, %fd245; + mov.f64 %fd247, 0d3FA55555555502A1; + fma.rn.f64 %fd248, %fd246, %fd233, %fd247; + mov.f64 %fd249, 0d3FC5555555555511; + fma.rn.f64 %fd250, %fd248, %fd233, %fd249; + mov.f64 %fd251, 0d3FE000000000000B; + fma.rn.f64 %fd252, %fd250, %fd233, %fd251; + fma.rn.f64 %fd253, %fd252, %fd233, %fd223; + fma.rn.f64 %fd254, %fd253, %fd233, %fd223; + { + .reg .b32 %temp; + mov.b64 {%r37, %temp}, %fd254; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r38}, %fd254; + } + shl.b32 %r143, %r36, 20; + add.s32 %r144, %r38, %r143; + mov.b64 %fd504, {%r37, %r144}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r145}, %fd224; + } + mov.b32 %f497, %r145; + abs.f32 %f42, %f497; + setp.lt.f32 %p70, %f42, 0f4086232B; + @%p70 bra $L__BB1_31; + + setp.gt.f64 %p71, %fd13, 0d8000000000000000; + mov.f64 %fd255, 0d7FF0000000000000; + sub.f64 %fd256, %fd255, %fd13; + selp.f64 %fd504, 0d0000000000000000, %fd256, %p71; + setp.geu.f32 %p72, %f42, 0f40874800; + @%p72 bra $L__BB1_31; + + shr.u32 %r146, %r36, 31; + add.s32 %r147, %r36, %r146; + shr.s32 %r148, %r147, 1; + shl.b32 %r149, %r148, 20; + add.s32 %r150, %r38, %r149; + mov.b64 %fd257, {%r37, %r150}; + sub.s32 %r151, %r36, %r148; + shl.b32 %r152, %r151, 20; + add.s32 %r153, %r152, 1072693248; + mov.u32 %r154, 0; + mov.b64 %fd258, {%r154, %r153}; + mul.f64 %fd504, %fd257, %fd258; + +$L__BB1_31: + add.s32 %r155, %r778, 1; + cvt.rn.f32.s32 %f498, %r155; + cvt.f64.f32 %fd18, %f498; + { + .reg .b32 %temp; + mov.b64 {%temp, %r39}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 10, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd215; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd506, [retval0+0]; + } // callseq 10 + setp.lt.s32 %p73, %r39, 0; + and.pred %p3, %p73, %p50; + not.pred %p75, %p3; + @%p75 bra $L__BB1_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r156}, %fd506; + } + xor.b32 %r157, %r156, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r158, %temp}, %fd506; + } + mov.b64 %fd506, {%r158, %r157}; + +$L__BB1_33: + setp.eq.s32 %p76, %r777, 1; + @%p76 bra $L__BB1_37; + bra.uni $L__BB1_34; + +$L__BB1_37: + mov.u32 %r159, 0; + selp.b32 %r160, %r39, 0, %p50; + or.b32 %r161, %r160, 2146435072; + selp.b32 %r162, %r161, %r160, %p51; + mov.b64 %fd506, {%r159, %r162}; + bra.uni $L__BB1_38; + +$L__BB1_34: + setp.gt.s32 %p77, %r39, -1; + @%p77 bra $L__BB1_38; + + cvt.rzi.f64.f64 %fd261, %fd215; + setp.eq.f64 %p78, %fd261, 0d4000000000000000; + @%p78 bra $L__BB1_38; + + mov.f64 %fd506, 0dFFF8000000000000; + +$L__BB1_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r163}, %fd25; + } + and.b32 %r164, %r163, 2146435072; + setp.ne.s32 %p81, %r164, 2146435072; + mov.f64 %fd507, %fd506; + @%p81 bra $L__BB1_44; + + setp.gtu.f64 %p82, %fd19, 0d7FF0000000000000; + mov.f64 %fd507, %fd25; + @%p82 bra $L__BB1_44; + + { + .reg .b32 %temp; + mov.b64 {%r165, %temp}, %fd215; + } + setp.eq.s32 %p84, %r165, 0; + and.pred %p85, %p83, %p84; + @%p85 bra $L__BB1_43; + bra.uni $L__BB1_41; + +$L__BB1_43: + mov.u32 %r170, 0; + setp.gt.f64 %p92, %fd19, 0d3FF0000000000000; + selp.b32 %r171, 2146435072, 0, %p92; + xor.b32 %r172, %r171, 2146435072; + selp.b32 %r173, %r172, %r171, %p51; + setp.eq.s32 %p93, %r778, -2; + selp.b32 %r174, 1072693248, %r173, %p93; + mov.b64 %fd507, {%r170, %r174}; + bra.uni $L__BB1_44; + +$L__BB1_41: + { + .reg .b32 %temp; + mov.b64 {%r166, %temp}, %fd18; + } + and.b32 %r167, %r39, 2147483647; + setp.ne.s32 %p86, %r167, 2146435072; + setp.ne.s32 %p87, %r166, 0; + or.pred %p88, %p86, %p87; + mov.f64 %fd507, %fd506; + @%p88 bra $L__BB1_44; + + and.pred %p90, %p56, %p3; + selp.b32 %r168, %r20, %r19, %p90; + mov.u32 %r169, 0; + mov.b64 %fd507, {%r169, %r168}; + +$L__BB1_44: + mov.f64 %fd500, 0d3FF0000000000000; + mov.f64 %fd499, 0d3FE000000000000B; + mov.f64 %fd498, 0d3FC5555555555511; + mov.f64 %fd497, 0d3FA55555555502A1; + mov.f64 %fd496, 0d3F81111111122322; + mov.f64 %fd495, 0d3F56C16C1852B7AF; + mov.f64 %fd494, 0d3F2A01A014761F65; + mov.f64 %fd493, 0d3EFA01997C89EB71; + mov.f64 %fd492, 0d3EC71DEE62401315; + mov.f64 %fd491, 0d3E928AF3FCA213EA; + mov.f64 %fd490, 0d3E5ADE1569CE2BDF; + mov.f64 %fd489, 0dBC7ABC9E3B39803F; + mov.f64 %fd488, 0dBFE62E42FEFA39EF; + mov.f64 %fd487, 0dC338000000000000; + mov.f64 %fd486, 0d4338000000000000; + mov.f64 %fd485, 0d3FF71547652B82FE; + setp.eq.s32 %p94, %r778, 0; + selp.f64 %fd264, 0d3FF0000000000000, %fd507, %p94; + mul.f64 %fd29, %fd264, %fd1; + neg.f64 %fd266, %fd29; + fma.rn.f64 %fd269, %fd266, %fd485, %fd486; + { + .reg .b32 %temp; + mov.b64 {%r40, %temp}, %fd269; + } + add.rn.f64 %fd271, %fd269, %fd487; + fma.rn.f64 %fd273, %fd271, %fd488, %fd266; + fma.rn.f64 %fd275, %fd271, %fd489, %fd273; + fma.rn.f64 %fd278, %fd490, %fd275, %fd491; + fma.rn.f64 %fd280, %fd278, %fd275, %fd492; + fma.rn.f64 %fd282, %fd280, %fd275, %fd493; + fma.rn.f64 %fd284, %fd282, %fd275, %fd494; + fma.rn.f64 %fd286, %fd284, %fd275, %fd495; + fma.rn.f64 %fd288, %fd286, %fd275, %fd496; + fma.rn.f64 %fd290, %fd288, %fd275, %fd497; + fma.rn.f64 %fd292, %fd290, %fd275, %fd498; + fma.rn.f64 %fd294, %fd292, %fd275, %fd499; + fma.rn.f64 %fd295, %fd294, %fd275, %fd500; + fma.rn.f64 %fd296, %fd295, %fd275, %fd500; + { + .reg .b32 %temp; + mov.b64 {%r41, %temp}, %fd296; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r42}, %fd296; + } + shl.b32 %r175, %r40, 20; + add.s32 %r176, %r42, %r175; + mov.b64 %fd508, {%r41, %r176}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r177}, %fd266; + } + mov.b32 %f499, %r177; + abs.f32 %f43, %f499; + setp.lt.f32 %p95, %f43, 0f4086232B; + @%p95 bra $L__BB1_47; + + setp.gt.f64 %p96, %fd29, 0d8000000000000000; + mov.f64 %fd297, 0d7FF0000000000000; + sub.f64 %fd298, %fd297, %fd29; + selp.f64 %fd508, 0d0000000000000000, %fd298, %p96; + setp.geu.f32 %p97, %f43, 0f40874800; + @%p97 bra $L__BB1_47; + + shr.u32 %r178, %r40, 31; + add.s32 %r179, %r40, %r178; + shr.s32 %r180, %r179, 1; + shl.b32 %r181, %r180, 20; + add.s32 %r182, %r42, %r181; + mov.b64 %fd299, {%r41, %r182}; + sub.s32 %r183, %r40, %r180; + shl.b32 %r184, %r183, 20; + add.s32 %r185, %r184, 1072693248; mov.u32 %r186, 0; - mov.f32 %f3124, 0f00000000; - sqrt.rn.f32 %f315, %f309; - mov.u32 %r311, %r186; - -BB1_122: - cvt.rn.f32.s32 %f1705, %r311; - sub.f32 %f313, %f1705, %f3080; - add.f32 %f314, %f313, 0f3F800000; - mul.f32 %f316, %f314, %f315; - abs.f32 %f317, %f316; - mul.f32 %f318, %f316, %f316; - mul.f32 %f319, %f313, %f315; - abs.f32 %f320, %f319; - add.f32 %f1706, %f1705, 0f3F800000; - sub.f32 %f1707, %f1706, %f3080; - div.rn.f32 %f322, %f1707, %f3076; - mov.f32 %f1708, 0f3F800000; - cvt.rzi.f32.f32 %f1709, %f1708; - add.f32 %f1710, %f1709, %f1709; - mov.f32 %f1711, 0f40000000; - sub.f32 %f1712, %f1711, %f1710; - abs.f32 %f323, %f1712; - setp.eq.f32 %p188, %f323, 0f3F800000; - abs.f32 %f324, %f322; - setp.lt.f32 %p189, %f324, 0f00800000; - mul.f32 %f1713, %f324, 0f4B800000; - selp.f32 %f1714, 0fC3170000, 0fC2FE0000, %p189; - selp.f32 %f1715, %f1713, %f324, %p189; - mov.b32 %r188, %f1715; - and.b32 %r189, %r188, 8388607; - or.b32 %r190, %r189, 1065353216; - mov.b32 %f1716, %r190; - shr.u32 %r191, %r188, 23; - cvt.rn.f32.u32 %f1717, %r191; - add.f32 %f1718, %f1714, %f1717; - setp.gt.f32 %p190, %f1716, 0f3FB504F3; - mul.f32 %f1719, %f1716, 0f3F000000; - add.f32 %f1720, %f1718, 0f3F800000; - selp.f32 %f1721, %f1719, %f1716, %p190; - selp.f32 %f1722, %f1720, %f1718, %p190; - add.f32 %f325, %f1721, 0fBF800000; - add.f32 %f326, %f1721, 0f3F800000; - add.f32 %f327, %f325, %f325; - mov.f32 %f1723, 0f3F317200; - mul.rn.f32 %f328, %f1722, %f1723; - mov.f32 %f1724, 0f35BFBE8E; - mul.rn.f32 %f329, %f1722, %f1724; - setp.lt.f32 %p191, %f322, 0f00000000; - and.pred %p7, %p191, %p188; - add.f32 %f1725, %f322, %f322; - selp.f32 %f330, %f1725, 0f00000000, %p188; - div.rn.f32 %f333, %f313, %f3076; - abs.f32 %f334, %f333; - setp.lt.f32 %p192, %f334, 0f00800000; - mul.f32 %f1727, %f334, 0f4B800000; - selp.f32 %f1728, 0fC3170000, 0fC2FE0000, %p192; - selp.f32 %f1729, %f1727, %f334, %p192; - mov.b32 %r192, %f1729; + mov.b64 %fd300, {%r186, %r185}; + mul.f64 %fd508, %fd299, %fd300; + +$L__BB1_47: + ld.global.f32 %f500, [%rd51]; + cvt.f64.f32 %fd301, %f500; + mul.f64 %fd302, %fd504, %fd508; + cvt.f64.f32 %fd303, %f2800; + fma.rn.f64 %fd304, %fd302, %fd301, %fd303; + cvt.rn.f32.f64 %f2800, %fd304; + cvt.f64.f32 %fd305, %f2799; + add.f64 %fd306, %fd302, %fd305; + cvt.rn.f32.f64 %f2799, %fd306; + add.s32 %r778, %r778, -1; + add.s32 %r777, %r777, 1; + add.s64 %rd51, %rd51, 4; + add.s32 %r779, %r779, 1; + setp.lt.s32 %p98, %r779, %r102; + @%p98 bra $L__BB1_16; + + add.s32 %r776, %r776, 1; + setp.lt.s32 %p99, %r776, %r102; + @%p99 bra $L__BB1_15; + + div.rn.f32 %f501, %f2800, %f2799; + max.f32 %f2796, %f2796, %f501; + min.f32 %f2865, %f2865, %f501; + add.s32 %r775, %r775, 1; + setp.lt.s32 %p100, %r775, %r102; + @%p100 bra $L__BB1_14; + + add.s32 %r774, %r774, 1; + setp.lt.s32 %p101, %r774, %r102; + @%p101 bra $L__BB1_13; + +$L__BB1_51: + ld.param.u32 %r768, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_3]; + mov.f32 %f2774, 0f00000000; + sub.f32 %f502, %f2796, %f2865; + add.f32 %f503, %f502, %f502; + fma.rn.f32 %f504, %f502, 0f40000000, %f503; + mul.f32 %f505, %f504, 0f40490FD8; + mul.f32 %f506, %f505, %f2864; + mul.f32 %f507, %f506, %f2864; + max.f32 %f2866, %f2774, %f507; + setp.lt.s32 %p102, %r768, 1; + @%p102 bra $L__BB1_337; + + cvt.rn.f32.s32 %f509, %r102; + mul.f32 %f51, %f509, 0f3F000000; + mov.u32 %r780, 0; + mov.f64 %fd309, 0d4008000000000000; + +$L__BB1_53: + mov.f32 %f2818, 0f00000000; + mov.f32 %f2819, %f2818; + mov.f32 %f2820, %f2818; + mov.f32 %f2821, %f2818; + mov.f32 %f2822, %f2818; + mov.f32 %f2823, %f2818; + mov.f32 %f2824, %f2818; + mov.f32 %f2825, %f2818; + mov.f32 %f2826, %f2818; + mov.f32 %f2827, %f2818; + @%p40 bra $L__BB1_336; + + mov.f32 %f2818, 0f00000000; + mov.f32 %f530, 0f3F000000; + div.rn.f32 %f531, %f530, %f2864; + div.rn.f32 %f57, %f531, %f2864; + div.rn.f32 %f532, %f2866, 0fC0206C98; + div.rn.f32 %f58, %f532, %f2864; + cvt.f64.f32 %fd34, %f532; + cvt.f64.f32 %fd307, %f2864; + add.f64 %fd35, %fd307, 0d4008000000000000; + div.rn.f32 %f59, %f58, %f2864; + mov.f32 %f533, 0fC0000000; + div.rn.f32 %f60, %f533, %f2864; + div.rn.f32 %f534, %f2866, 0f40206C98; + cvt.f64.f32 %fd36, %f534; + mov.u32 %r781, 0; + +$L__BB1_55: + cvt.f64.f32 %fd477, %f2864; + mov.u32 %r782, 0; + mov.f32 %f2648, 0f00000000; + cvt.rn.f32.s32 %f535, %r781; + sub.f32 %f71, %f535, %f2868; + add.f32 %f72, %f71, 0f3F000000; + sqrt.rn.f32 %f73, %f57; + mul.f32 %f536, %f72, %f73; + abs.f32 %f74, %f536; + setp.ge.f32 %p104, %f74, 0f3F8060FE; + mul.f32 %f537, %f536, %f536; + selp.f32 %f538, %f74, %f537, %p104; + selp.f32 %f539, 0f3789CA3C, 0f38B1E96A, %p104; + selp.f32 %f540, 0fB9F560B9, 0fBA574D20, %p104; + fma.rn.f32 %f541, %f539, %f538, %f540; + selp.f32 %f542, 0f3BAC840B, 0f3BAAD5EA, %p104; + fma.rn.f32 %f543, %f541, %f538, %f542; + selp.f32 %f544, 0fBD0C8162, 0fBCDC1BE7, %p104; + fma.rn.f32 %f545, %f543, %f538, %f544; + selp.f32 %f546, 0f3E1CF906, 0f3DE718AF, %p104; + fma.rn.f32 %f547, %f545, %f538, %f546; + selp.f32 %f548, 0f3F6A937E, 0fBEC093AC, %p104; + fma.rn.f32 %f549, %f547, %f538, %f548; + selp.f32 %f550, 0f3F20D842, 0f3E0375D3, %p104; + fma.rn.f32 %f551, %f549, %f538, %f550; + neg.f32 %f552, %f74; + selp.f32 %f553, %f552, %f536, %p104; + fma.rn.f32 %f75, %f551, %f553, %f553; + mov.b32 %r190, %f536; + and.b32 %r51, %r190, -2147483648; + add.f32 %f76, %f71, 0fBF000000; + mul.f32 %f554, %f76, %f73; + abs.f32 %f77, %f554; + setp.ge.f32 %p105, %f77, 0f3F8060FE; + mul.f32 %f555, %f554, %f554; + selp.f32 %f556, %f77, %f555, %p105; + selp.f32 %f557, 0f3789CA3C, 0f38B1E96A, %p105; + selp.f32 %f558, 0fB9F560B9, 0fBA574D20, %p105; + fma.rn.f32 %f559, %f557, %f556, %f558; + selp.f32 %f560, 0f3BAC840B, 0f3BAAD5EA, %p105; + fma.rn.f32 %f561, %f559, %f556, %f560; + selp.f32 %f562, 0fBD0C8162, 0fBCDC1BE7, %p105; + fma.rn.f32 %f563, %f561, %f556, %f562; + selp.f32 %f564, 0f3E1CF906, 0f3DE718AF, %p105; + fma.rn.f32 %f565, %f563, %f556, %f564; + selp.f32 %f566, 0f3F6A937E, 0fBEC093AC, %p105; + fma.rn.f32 %f567, %f565, %f556, %f566; + selp.f32 %f568, 0f3F20D842, 0f3E0375D3, %p105; + fma.rn.f32 %f569, %f567, %f556, %f568; + neg.f32 %f570, %f77; + selp.f32 %f571, %f570, %f554, %p105; + fma.rn.f32 %f78, %f569, %f571, %f571; + mov.b32 %r191, %f554; + and.b32 %r52, %r191, -2147483648; + add.f32 %f572, %f535, 0f3F000000; + sub.f32 %f79, %f572, %f2868; + div.rn.f32 %f80, %f79, %f2864; + mov.f32 %f573, 0f3F800000; + cvt.rzi.f32.f32 %f574, %f573; + add.f32 %f575, %f574, %f574; + mov.f32 %f576, 0f40000000; + sub.f32 %f577, %f576, %f575; + abs.f32 %f81, %f577; + setp.eq.f32 %p106, %f81, 0f3F800000; + abs.f32 %f82, %f80; + setp.lt.f32 %p107, %f82, 0f00800000; + mul.f32 %f578, %f82, 0f4B800000; + selp.f32 %f579, %f578, %f82, %p107; + selp.f32 %f580, 0fC3170000, 0fC2FE0000, %p107; + mov.b32 %r192, %f579; and.b32 %r193, %r192, 8388607; or.b32 %r194, %r193, 1065353216; - mov.b32 %f1730, %r194; + mov.b32 %f581, %r194; shr.u32 %r195, %r192, 23; - cvt.rn.f32.u32 %f1731, %r195; - add.f32 %f1732, %f1728, %f1731; - setp.gt.f32 %p193, %f1730, 0f3FB504F3; - mul.f32 %f1733, %f1730, 0f3F000000; - add.f32 %f1734, %f1732, 0f3F800000; - selp.f32 %f1735, %f1733, %f1730, %p193; - selp.f32 %f1736, %f1734, %f1732, %p193; - add.f32 %f335, %f1735, 0fBF800000; - add.f32 %f336, %f1735, 0f3F800000; - add.f32 %f337, %f335, %f335; - mul.rn.f32 %f338, %f1736, %f1723; - mul.rn.f32 %f339, %f1736, %f1724; - setp.lt.f32 %p194, %f333, 0f00000000; - and.pred %p8, %p194, %p188; - add.f32 %f1737, %f333, %f333; - selp.f32 %f340, %f1737, 0f00000000, %p188; - mov.b32 %r197, %f316; - and.b32 %r45, %r197, -2147483648; - ld.local.f32 %f3096, [%rd2]; - ld.local.f32 %f3095, [%rd2+4]; - ld.local.f32 %f3094, [%rd2+8]; - ld.local.f32 %f3093, [%rd2+12]; - ld.local.f32 %f3092, [%rd2+16]; - ld.local.f32 %f3091, [%rd2+24]; - ld.local.f32 %f3090, [%rd2+28]; - ld.local.f32 %f3089, [%rd2+32]; - ld.local.f32 %f3088, [%rd2+36]; - ld.local.f32 %f3087, [%rd2+48]; - ld.local.f32 %f3086, [%rd2+52]; - ld.local.f32 %f3085, [%rd2+56]; - ld.local.f32 %f3084, [%rd2+72]; - ld.local.f32 %f3083, [%rd2+76]; - ld.local.f32 %f3082, [%rd2+96]; - mov.u32 %r312, %r186; - -BB1_123: - setp.ltu.f32 %p195, %f317, 0f3F800000; - @%p195 bra BB1_125; - bra.uni BB1_124; - -BB1_125: - cvt.rn.f32.s32 %f2958, %r311; - sub.f32 %f2957, %f2958, %f3080; - add.f32 %f2956, %f2957, 0f3F800000; - mul.f32 %f2955, %f2956, %f315; - mov.f32 %f1757, 0f3BA0C9F8; - mov.f32 %f1758, 0fBA1268FB; - fma.rn.f32 %f1759, %f1758, %f318, %f1757; - mov.f32 %f1760, 0fBCDABFD4; - fma.rn.f32 %f1761, %f1759, %f318, %f1760; - mov.f32 %f1762, 0f3DE70331; - fma.rn.f32 %f1763, %f1761, %f318, %f1762; - mov.f32 %f1764, 0fBEC09330; - fma.rn.f32 %f1765, %f1763, %f318, %f1764; - mov.f32 %f1766, 0f3F906EBA; - fma.rn.f32 %f1767, %f1765, %f318, %f1766; - mul.f32 %f3098, %f2955, %f1767; - bra.uni BB1_126; - -BB1_124: - mov.f32 %f2906, 0f3F800000; - setp.ltu.f32 %p196, %f317, 0f407AD445; - mov.f32 %f1739, 0f3A03BB71; - mov.f32 %f1740, 0fB7B730FB; - fma.rn.f32 %f1741, %f1740, %f317, %f1739; - mov.f32 %f1742, 0fBBACA3B3; - fma.rn.f32 %f1743, %f1741, %f317, %f1742; - mov.f32 %f1744, 0f3D0A7445; - fma.rn.f32 %f1745, %f1743, %f317, %f1744; - mov.f32 %f1746, 0fBE1B3B75; - fma.rn.f32 %f1747, %f1745, %f317, %f1746; - mov.f32 %f1748, 0fBF6B385A; - fma.rn.f32 %f1749, %f1747, %f317, %f1748; - mov.f32 %f1750, 0fBFD0316E; - fma.rn.f32 %f1751, %f1749, %f317, %f1750; - mov.f32 %f1752, 0fBA031CCE; - fma.rn.f32 %f1753, %f1751, %f317, %f1752; - ex2.approx.ftz.f32 %f1754, %f1753; - sub.f32 %f1756, %f2906, %f1754; - mov.b32 %r198, %f1756; - selp.b32 %r199, %r198, 1065353216, %p196; - or.b32 %r200, %r199, %r45; - mov.b32 %f3098, %r200; - -BB1_126: - setp.ltu.f32 %p197, %f320, 0f3F800000; - @%p197 bra BB1_128; - bra.uni BB1_127; - -BB1_128: - cvt.rn.f32.s32 %f2953, %r311; - sub.f32 %f2952, %f2953, %f3080; - mul.f32 %f2951, %f2952, %f315; - mul.f32 %f2950, %f2951, %f2951; - mov.f32 %f1786, 0f3BA0C9F8; - mov.f32 %f1787, 0fBA1268FB; - fma.rn.f32 %f1788, %f1787, %f2950, %f1786; - mov.f32 %f1789, 0fBCDABFD4; - fma.rn.f32 %f1790, %f1788, %f2950, %f1789; - mov.f32 %f1791, 0f3DE70331; - fma.rn.f32 %f1792, %f1790, %f2950, %f1791; - mov.f32 %f1793, 0fBEC09330; - fma.rn.f32 %f1794, %f1792, %f2950, %f1793; - mov.f32 %f1795, 0f3F906EBA; - fma.rn.f32 %f1796, %f1794, %f2950, %f1795; - mul.f32 %f3099, %f2951, %f1796; - bra.uni BB1_129; - -BB1_127: - cvt.rn.f32.s32 %f2910, %r311; - sub.f32 %f2909, %f2910, %f3080; - mul.f32 %f2908, %f2909, %f315; - mov.b32 %r291, %f2908; - and.b32 %r290, %r291, -2147483648; - mov.f32 %f2907, 0f3F800000; - setp.ltu.f32 %p198, %f320, 0f407AD445; - mov.f32 %f1768, 0f3A03BB71; - mov.f32 %f1769, 0fB7B730FB; - fma.rn.f32 %f1770, %f1769, %f320, %f1768; - mov.f32 %f1771, 0fBBACA3B3; - fma.rn.f32 %f1772, %f1770, %f320, %f1771; - mov.f32 %f1773, 0f3D0A7445; - fma.rn.f32 %f1774, %f1772, %f320, %f1773; - mov.f32 %f1775, 0fBE1B3B75; - fma.rn.f32 %f1776, %f1774, %f320, %f1775; - mov.f32 %f1777, 0fBF6B385A; - fma.rn.f32 %f1778, %f1776, %f320, %f1777; - mov.f32 %f1779, 0fBFD0316E; - fma.rn.f32 %f1780, %f1778, %f320, %f1779; - mov.f32 %f1781, 0fBA031CCE; - fma.rn.f32 %f1782, %f1780, %f320, %f1781; - ex2.approx.ftz.f32 %f1783, %f1782; - sub.f32 %f1785, %f2907, %f1783; - mov.b32 %r201, %f1785; - selp.b32 %r202, %r201, 1065353216, %p198; - or.b32 %r203, %r202, %r290; - mov.b32 %f3099, %r203; - -BB1_129: - sub.f32 %f1797, %f3098, %f3099; - mul.f32 %f380, %f1797, 0f3F000000; - cvt.rn.f32.s32 %f381, %r312; - sub.f32 %f382, %f381, %f3079; - add.f32 %f383, %f382, 0f3F800000; - mul.f32 %f384, %f383, %f315; - abs.f32 %f385, %f384; - setp.ltu.f32 %p199, %f385, 0f3F800000; - @%p199 bra BB1_131; - bra.uni BB1_130; - -BB1_131: - mul.f32 %f1816, %f384, %f384; - mov.f32 %f1817, 0f3BA0C9F8; - mov.f32 %f1818, 0fBA1268FB; - fma.rn.f32 %f1819, %f1818, %f1816, %f1817; - mov.f32 %f1820, 0fBCDABFD4; - fma.rn.f32 %f1821, %f1819, %f1816, %f1820; - mov.f32 %f1822, 0f3DE70331; - fma.rn.f32 %f1823, %f1821, %f1816, %f1822; - mov.f32 %f1824, 0fBEC09330; - fma.rn.f32 %f1825, %f1823, %f1816, %f1824; - mov.f32 %f1826, 0f3F906EBA; - fma.rn.f32 %f1827, %f1825, %f1816, %f1826; - mul.f32 %f3100, %f384, %f1827; - bra.uni BB1_132; - -BB1_130: - mov.f32 %f2911, 0f3F800000; - mov.f32 %f1798, 0f3A03BB71; - mov.f32 %f1799, 0fB7B730FB; - fma.rn.f32 %f1800, %f1799, %f385, %f1798; - mov.f32 %f1801, 0fBBACA3B3; - fma.rn.f32 %f1802, %f1800, %f385, %f1801; - mov.f32 %f1803, 0f3D0A7445; - fma.rn.f32 %f1804, %f1802, %f385, %f1803; - mov.f32 %f1805, 0fBE1B3B75; - fma.rn.f32 %f1806, %f1804, %f385, %f1805; - mov.f32 %f1807, 0fBF6B385A; - fma.rn.f32 %f1808, %f1806, %f385, %f1807; - mov.f32 %f1809, 0fBFD0316E; - fma.rn.f32 %f1810, %f1808, %f385, %f1809; - mov.f32 %f1811, 0fBA031CCE; - fma.rn.f32 %f1812, %f1810, %f385, %f1811; - ex2.approx.ftz.f32 %f1813, %f1812; - sub.f32 %f1815, %f2911, %f1813; - mov.b32 %r204, %f1815; - setp.ltu.f32 %p200, %f385, 0f407AD445; - selp.b32 %r205, %r204, 1065353216, %p200; - mov.b32 %r206, %f384; - and.b32 %r207, %r206, -2147483648; - or.b32 %r208, %r205, %r207; - mov.b32 %f3100, %r208; - -BB1_132: - cvt.rn.f32.s32 %f2913, %r312; - sub.f32 %f2912, %f2913, %f3079; - mul.f32 %f389, %f2912, %f315; - abs.f32 %f390, %f389; - setp.ltu.f32 %p201, %f390, 0f3F800000; - @%p201 bra BB1_134; - bra.uni BB1_133; - -BB1_134: - mul.f32 %f1846, %f389, %f389; - mov.f32 %f1847, 0f3BA0C9F8; - mov.f32 %f1848, 0fBA1268FB; - fma.rn.f32 %f1849, %f1848, %f1846, %f1847; - mov.f32 %f1850, 0fBCDABFD4; - fma.rn.f32 %f1851, %f1849, %f1846, %f1850; - mov.f32 %f1852, 0f3DE70331; - fma.rn.f32 %f1853, %f1851, %f1846, %f1852; - mov.f32 %f1854, 0fBEC09330; - fma.rn.f32 %f1855, %f1853, %f1846, %f1854; - mov.f32 %f1856, 0f3F906EBA; - fma.rn.f32 %f1857, %f1855, %f1846, %f1856; - mul.f32 %f3101, %f389, %f1857; - bra.uni BB1_135; - -BB1_133: - mov.f32 %f2914, 0f3F800000; - mov.f32 %f1828, 0f3A03BB71; - mov.f32 %f1829, 0fB7B730FB; - fma.rn.f32 %f1830, %f1829, %f390, %f1828; - mov.f32 %f1831, 0fBBACA3B3; - fma.rn.f32 %f1832, %f1830, %f390, %f1831; - mov.f32 %f1833, 0f3D0A7445; - fma.rn.f32 %f1834, %f1832, %f390, %f1833; - mov.f32 %f1835, 0fBE1B3B75; - fma.rn.f32 %f1836, %f1834, %f390, %f1835; - mov.f32 %f1837, 0fBF6B385A; - fma.rn.f32 %f1838, %f1836, %f390, %f1837; - mov.f32 %f1839, 0fBFD0316E; - fma.rn.f32 %f1840, %f1838, %f390, %f1839; - mov.f32 %f1841, 0fBA031CCE; - fma.rn.f32 %f1842, %f1840, %f390, %f1841; - ex2.approx.ftz.f32 %f1843, %f1842; - sub.f32 %f1845, %f2914, %f1843; - mov.b32 %r209, %f1845; - setp.ltu.f32 %p202, %f390, 0f407AD445; - selp.b32 %r210, %r209, 1065353216, %p202; - mov.b32 %r211, %f389; - and.b32 %r212, %r211, -2147483648; - or.b32 %r213, %r210, %r212; - mov.b32 %f3101, %r213; - -BB1_135: - sub.f32 %f1860, %f3100, %f3101; - mul.f32 %f394, %f1860, 0f3F000000; - mul.f32 %f1861, %f380, %f3078; - fma.rn.f32 %f395, %f394, %f1861, %f2993; - mad.lo.s32 %r214, %r312, %r63, %r311; - add.s32 %r215, %r214, %r4; - mul.wide.s32 %rd73, %r215, 4; - add.s64 %rd74, %rd1, %rd73; - ld.global.f32 %f396, [%rd74]; - // inline asm - rcp.approx.ftz.f32 %f1858,%f326; - // inline asm - mul.f32 %f1862, %f1858, %f327; - mul.f32 %f1863, %f1862, %f1862; - mov.f32 %f1864, 0f3C4CAF63; - mov.f32 %f1865, 0f3B18F0FE; - fma.rn.f32 %f1866, %f1865, %f1863, %f1864; - mov.f32 %f1867, 0f3DAAAABD; - fma.rn.f32 %f1868, %f1866, %f1863, %f1867; - mul.rn.f32 %f1869, %f1868, %f1863; - mul.rn.f32 %f1870, %f1869, %f1862; - sub.f32 %f1871, %f325, %f1862; - neg.f32 %f1872, %f1862; - add.f32 %f1873, %f1871, %f1871; - fma.rn.f32 %f1874, %f1872, %f325, %f1873; - mul.rn.f32 %f1875, %f1858, %f1874; - add.f32 %f1876, %f1870, %f1862; - sub.f32 %f1877, %f1862, %f1876; - add.f32 %f1878, %f1870, %f1877; - add.f32 %f1879, %f1875, %f1878; - add.f32 %f1880, %f1876, %f1879; - sub.f32 %f1881, %f1876, %f1880; - add.f32 %f1882, %f1879, %f1881; - add.f32 %f1883, %f328, %f1880; - sub.f32 %f1884, %f328, %f1883; - add.f32 %f1885, %f1880, %f1884; - add.f32 %f1886, %f1882, %f1885; - add.f32 %f1887, %f329, %f1886; - add.f32 %f1888, %f1883, %f1887; - sub.f32 %f1889, %f1883, %f1888; - add.f32 %f1890, %f1887, %f1889; - mul.rn.f32 %f1892, %f1711, %f1888; - neg.f32 %f1893, %f1892; - fma.rn.f32 %f1894, %f1711, %f1888, %f1893; - fma.rn.f32 %f1895, %f1711, %f1890, %f1894; - mov.f32 %f1896, 0f00000000; - fma.rn.f32 %f1897, %f1896, %f1888, %f1895; - add.rn.f32 %f1898, %f1892, %f1897; - neg.f32 %f1899, %f1898; - add.rn.f32 %f1900, %f1892, %f1899; - add.rn.f32 %f1901, %f1900, %f1897; - mov.b32 %r216, %f1898; - setp.eq.s32 %p203, %r216, 1118925336; - add.s32 %r217, %r216, -1; - mov.b32 %f1902, %r217; - add.f32 %f1903, %f1901, 0f37000000; - selp.f32 %f1904, %f1902, %f1898, %p203; - selp.f32 %f397, %f1903, %f1901, %p203; - mul.f32 %f1905, %f1904, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1906, %f1905; - mov.f32 %f1907, 0fBF317200; - fma.rn.f32 %f1908, %f1906, %f1907, %f1904; - mov.f32 %f1909, 0fB5BFBE8E; - fma.rn.f32 %f1910, %f1906, %f1909, %f1908; - mul.f32 %f1911, %f1910, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1912, %f1911; - add.f32 %f1913, %f1906, 0f00000000; - ex2.approx.f32 %f1914, %f1913; - mul.f32 %f1915, %f1912, %f1914; - setp.lt.f32 %p204, %f1904, 0fC2D20000; - selp.f32 %f1916, 0f00000000, %f1915, %p204; - setp.gt.f32 %p205, %f1904, 0f42D20000; - selp.f32 %f3102, 0f7F800000, %f1916, %p205; - setp.eq.f32 %p206, %f3102, 0f7F800000; - @%p206 bra BB1_137; - - fma.rn.f32 %f3102, %f3102, %f397, %f3102; - -BB1_137: - setp.geu.f32 %p366, %f322, 0f00000000; - mov.b32 %r218, %f3102; - xor.b32 %r219, %r218, -2147483648; - mov.b32 %f1917, %r219; - selp.f32 %f401, %f1917, %f3102, %p7; - setp.eq.f32 %p207, %f322, 0f00000000; - selp.f32 %f3103, %f330, %f401, %p207; - @%p366 bra BB1_139; - - cvt.rzi.f32.f32 %f1919, %f1711; - setp.neu.f32 %p208, %f1919, 0f40000000; - selp.f32 %f3103, 0f7FFFFFFF, %f401, %p208; - -BB1_139: - abs.f32 %f2924, %f322; - add.f32 %f2923, %f2924, 0f40000000; - mov.b32 %r292, %f2923; - mov.f32 %f2922, 0f00000000; - mov.f32 %f2921, 0f3DAAAABD; - mov.f32 %f2920, 0f3C4CAF63; - mov.f32 %f2919, 0f3B18F0FE; - mov.f32 %f2918, 0fB5BFBE8E; - mov.f32 %f2917, 0fBF317200; - selp.f32 %f2916, 0fFF800000, 0f7F800000, %p7; - add.f32 %f2915, %f322, 0f40000000; - setp.gtu.f32 %p209, %f2924, 0f7F800000; - selp.f32 %f1922, %f2915, %f3103, %p209; - setp.neu.f32 %p210, %f2924, 0f7F800000; - selp.f32 %f1923, %f1922, %f2916, %p210; - setp.gt.s32 %p211, %r292, 2139095039; - selp.f32 %f1924, %f1923, %f3103, %p211; - mul.f32 %f1925, %f1924, 0fBF000000; - setp.eq.f32 %p212, %f322, 0f3F800000; - selp.f32 %f1926, 0fBF000000, %f1925, %p212; - mul.f32 %f1927, %f1926, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1928, %f1927; - fma.rn.f32 %f1930, %f1928, %f2917, %f1926; - fma.rn.f32 %f1932, %f1928, %f2918, %f1930; - mul.f32 %f1933, %f1932, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1934, %f1933; - add.f32 %f1935, %f1928, 0f00000000; - ex2.approx.f32 %f1936, %f1935; - mul.f32 %f1937, %f1934, %f1936; - setp.lt.f32 %p213, %f1926, 0fC2D20000; - selp.f32 %f1938, 0f00000000, %f1937, %p213; - setp.gt.f32 %p214, %f1926, 0f42D20000; - selp.f32 %f405, 0f7F800000, %f1938, %p214; - // inline asm - rcp.approx.ftz.f32 %f1920,%f336; - // inline asm - mul.f32 %f1939, %f1920, %f337; - mul.f32 %f1940, %f1939, %f1939; - fma.rn.f32 %f1943, %f2919, %f1940, %f2920; - fma.rn.f32 %f1945, %f1943, %f1940, %f2921; - mul.rn.f32 %f1946, %f1945, %f1940; - mul.rn.f32 %f1947, %f1946, %f1939; - sub.f32 %f1948, %f335, %f1939; - neg.f32 %f1949, %f1939; - add.f32 %f1950, %f1948, %f1948; - fma.rn.f32 %f1951, %f1949, %f335, %f1950; - mul.rn.f32 %f1952, %f1920, %f1951; - add.f32 %f1953, %f1947, %f1939; - sub.f32 %f1954, %f1939, %f1953; - add.f32 %f1955, %f1947, %f1954; - add.f32 %f1956, %f1952, %f1955; - add.f32 %f1957, %f1953, %f1956; - sub.f32 %f1958, %f1953, %f1957; - add.f32 %f1959, %f1956, %f1958; - add.f32 %f1960, %f338, %f1957; - sub.f32 %f1961, %f338, %f1960; - add.f32 %f1962, %f1957, %f1961; - add.f32 %f1963, %f1959, %f1962; - add.f32 %f1964, %f339, %f1963; - add.f32 %f1965, %f1960, %f1964; - sub.f32 %f1966, %f1960, %f1965; - add.f32 %f1967, %f1964, %f1966; - mul.rn.f32 %f1969, %f1711, %f1965; - neg.f32 %f1970, %f1969; - fma.rn.f32 %f1971, %f1711, %f1965, %f1970; - fma.rn.f32 %f1972, %f1711, %f1967, %f1971; - fma.rn.f32 %f1974, %f2922, %f1965, %f1972; - add.rn.f32 %f1975, %f1969, %f1974; - neg.f32 %f1976, %f1975; - add.rn.f32 %f1977, %f1969, %f1976; - add.rn.f32 %f1978, %f1977, %f1974; - mov.b32 %r220, %f1975; - setp.eq.s32 %p215, %r220, 1118925336; - add.s32 %r221, %r220, -1; - mov.b32 %f1979, %r221; - add.f32 %f1980, %f1978, 0f37000000; - selp.f32 %f1981, %f1979, %f1975, %p215; - selp.f32 %f406, %f1980, %f1978, %p215; - mul.f32 %f1982, %f1981, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1983, %f1982; - fma.rn.f32 %f1984, %f1983, %f2917, %f1981; - fma.rn.f32 %f1985, %f1983, %f2918, %f1984; - mul.f32 %f1986, %f1985, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1987, %f1986; - add.f32 %f1988, %f1983, 0f00000000; - ex2.approx.f32 %f1989, %f1988; - mul.f32 %f1990, %f1987, %f1989; - setp.lt.f32 %p216, %f1981, 0fC2D20000; - selp.f32 %f1991, 0f00000000, %f1990, %p216; - setp.gt.f32 %p217, %f1981, 0f42D20000; - selp.f32 %f3104, 0f7F800000, %f1991, %p217; - setp.eq.f32 %p218, %f3104, 0f7F800000; - @%p218 bra BB1_141; - - fma.rn.f32 %f3104, %f3104, %f406, %f3104; - -BB1_141: - setp.geu.f32 %p367, %f333, 0f00000000; - mov.b32 %r222, %f3104; - xor.b32 %r223, %r222, -2147483648; - mov.b32 %f1992, %r223; - selp.f32 %f410, %f1992, %f3104, %p8; - setp.eq.f32 %p219, %f333, 0f00000000; - selp.f32 %f3105, %f340, %f410, %p219; - @%p367 bra BB1_143; - - cvt.rzi.f32.f32 %f1994, %f1711; - setp.neu.f32 %p220, %f1994, 0f40000000; - selp.f32 %f3105, 0f7FFFFFFF, %f410, %p220; - -BB1_143: - abs.f32 %f2936, %f333; - add.f32 %f2935, %f2936, 0f40000000; - mov.b32 %r293, %f2935; - mov.f32 %f2934, 0f35BFBE8E; - mov.f32 %f2933, 0f3F317200; - selp.f32 %f2932, 0fFF800000, 0f7F800000, %p8; - add.f32 %f2931, %f333, 0f40000000; - mov.f32 %f2930, 0f00000000; - mov.f32 %f2929, 0f3DAAAABD; - mov.f32 %f2928, 0f3C4CAF63; - mov.f32 %f2927, 0f3B18F0FE; - mov.f32 %f2926, 0fB5BFBE8E; - mov.f32 %f2925, 0fBF317200; - setp.gtu.f32 %p221, %f2936, 0f7F800000; - selp.f32 %f1997, %f2931, %f3105, %p221; - setp.neu.f32 %p222, %f2936, 0f7F800000; - selp.f32 %f1998, %f1997, %f2932, %p222; - setp.gt.s32 %p223, %r293, 2139095039; - selp.f32 %f1999, %f1998, %f3105, %p223; - mul.f32 %f2000, %f1999, 0fBF000000; - setp.eq.f32 %p224, %f333, 0f3F800000; - selp.f32 %f2001, 0fBF000000, %f2000, %p224; - mul.f32 %f2002, %f2001, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2003, %f2002; - fma.rn.f32 %f2005, %f2003, %f2925, %f2001; - fma.rn.f32 %f2007, %f2003, %f2926, %f2005; - mul.f32 %f2008, %f2007, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2009, %f2008; - add.f32 %f2010, %f2003, 0f00000000; - ex2.approx.f32 %f2011, %f2010; - mul.f32 %f2012, %f2009, %f2011; - setp.lt.f32 %p225, %f2001, 0fC2D20000; - selp.f32 %f2013, 0f00000000, %f2012, %p225; - setp.gt.f32 %p226, %f2001, 0f42D20000; - selp.f32 %f2014, 0f7F800000, %f2013, %p226; - sub.f32 %f2015, %f405, %f2014; - mul.f32 %f2016, %f310, %f2015; - mul.f32 %f414, %f394, %f2016; - add.f32 %f2017, %f381, 0f3F800000; - sub.f32 %f2018, %f2017, %f3079; - div.rn.f32 %f415, %f2018, %f3076; - abs.f32 %f416, %f415; - setp.lt.f32 %p227, %f416, 0f00800000; - mul.f32 %f2019, %f416, 0f4B800000; - selp.f32 %f2020, 0fC3170000, 0fC2FE0000, %p227; - selp.f32 %f2021, %f2019, %f416, %p227; - mov.b32 %r224, %f2021; - and.b32 %r225, %r224, 8388607; - or.b32 %r226, %r225, 1065353216; - mov.b32 %f2022, %r226; - shr.u32 %r227, %r224, 23; - cvt.rn.f32.u32 %f2023, %r227; - add.f32 %f2024, %f2020, %f2023; - setp.gt.f32 %p228, %f2022, 0f3FB504F3; - mul.f32 %f2025, %f2022, 0f3F000000; - add.f32 %f2026, %f2024, 0f3F800000; - selp.f32 %f2027, %f2025, %f2022, %p228; - selp.f32 %f2028, %f2026, %f2024, %p228; - add.f32 %f417, %f2027, 0fBF800000; - add.f32 %f1996, %f2027, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1995,%f1996; - // inline asm - add.f32 %f419, %f417, %f417; - mul.f32 %f2029, %f1995, %f419; - mul.f32 %f2030, %f2029, %f2029; - fma.rn.f32 %f2033, %f2927, %f2030, %f2928; - fma.rn.f32 %f2035, %f2033, %f2030, %f2929; - mul.rn.f32 %f2036, %f2035, %f2030; - mul.rn.f32 %f2037, %f2036, %f2029; - sub.f32 %f2038, %f417, %f2029; - neg.f32 %f2039, %f2029; - add.f32 %f2040, %f2038, %f2038; - fma.rn.f32 %f2041, %f2039, %f417, %f2040; - mul.rn.f32 %f2042, %f1995, %f2041; - add.f32 %f2043, %f2037, %f2029; - sub.f32 %f2044, %f2029, %f2043; - add.f32 %f2045, %f2037, %f2044; - add.f32 %f2046, %f2042, %f2045; - add.f32 %f2047, %f2043, %f2046; - sub.f32 %f2048, %f2043, %f2047; - add.f32 %f2049, %f2046, %f2048; - mul.rn.f32 %f420, %f2028, %f2933; - mul.rn.f32 %f421, %f2028, %f2934; - add.f32 %f2052, %f420, %f2047; - sub.f32 %f2053, %f420, %f2052; - add.f32 %f2054, %f2047, %f2053; - add.f32 %f2055, %f2049, %f2054; - add.f32 %f2056, %f421, %f2055; - add.f32 %f2057, %f2052, %f2056; - sub.f32 %f2058, %f2052, %f2057; - add.f32 %f2059, %f2056, %f2058; - mul.rn.f32 %f2061, %f1711, %f2057; - neg.f32 %f2062, %f2061; - fma.rn.f32 %f2063, %f1711, %f2057, %f2062; - fma.rn.f32 %f2064, %f1711, %f2059, %f2063; - fma.rn.f32 %f2066, %f2930, %f2057, %f2064; - add.rn.f32 %f2067, %f2061, %f2066; - neg.f32 %f2068, %f2067; - add.rn.f32 %f2069, %f2061, %f2068; - add.rn.f32 %f2070, %f2069, %f2066; - mov.b32 %r228, %f2067; - setp.eq.s32 %p229, %r228, 1118925336; - add.s32 %r229, %r228, -1; - mov.b32 %f2071, %r229; - add.f32 %f2072, %f2070, 0f37000000; - selp.f32 %f2073, %f2071, %f2067, %p229; - selp.f32 %f422, %f2072, %f2070, %p229; - mul.f32 %f2074, %f2073, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2075, %f2074; - fma.rn.f32 %f2076, %f2075, %f2925, %f2073; - fma.rn.f32 %f2077, %f2075, %f2926, %f2076; - mul.f32 %f2078, %f2077, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2079, %f2078; - add.f32 %f2080, %f2075, 0f00000000; - ex2.approx.f32 %f2081, %f2080; - mul.f32 %f2082, %f2079, %f2081; - setp.lt.f32 %p230, %f2073, 0fC2D20000; - selp.f32 %f2083, 0f00000000, %f2082, %p230; - setp.gt.f32 %p231, %f2073, 0f42D20000; - selp.f32 %f3106, 0f7F800000, %f2083, %p231; - setp.eq.f32 %p232, %f3106, 0f7F800000; - @%p232 bra BB1_145; - - fma.rn.f32 %f3106, %f3106, %f422, %f3106; - -BB1_145: - setp.lt.f32 %p233, %f415, 0f00000000; - and.pred %p11, %p233, %p188; - mov.b32 %r230, %f3106; - xor.b32 %r231, %r230, -2147483648; - mov.b32 %f2084, %r231; - selp.f32 %f3108, %f2084, %f3106, %p11; - setp.eq.f32 %p235, %f415, 0f00000000; - @%p235 bra BB1_148; - bra.uni BB1_146; - -BB1_148: - add.f32 %f2087, %f415, %f415; - selp.f32 %f3108, %f2087, 0f00000000, %p188; - bra.uni BB1_149; - -BB1_146: - setp.geu.f32 %p236, %f415, 0f00000000; - @%p236 bra BB1_149; - - cvt.rzi.f32.f32 %f2086, %f1711; - setp.neu.f32 %p237, %f2086, 0f40000000; - selp.f32 %f3108, 0f7FFFFFFF, %f3108, %p237; - -BB1_149: - abs.f32 %f2937, %f415; - add.f32 %f2088, %f2937, 0f40000000; - mov.b32 %r47, %f2088; - setp.lt.s32 %p239, %r47, 2139095040; - @%p239 bra BB1_154; - - abs.f32 %f2948, %f415; - setp.gtu.f32 %p240, %f2948, 0f7F800000; - @%p240 bra BB1_153; - bra.uni BB1_151; - -BB1_153: - add.f32 %f3108, %f415, 0f40000000; - bra.uni BB1_154; - -BB1_151: - abs.f32 %f2949, %f415; - setp.neu.f32 %p241, %f2949, 0f7F800000; - @%p241 bra BB1_154; - - selp.f32 %f3108, 0fFF800000, 0f7F800000, %p11; - -BB1_154: - mov.f32 %f2947, 0f35BFBE8E; - mov.f32 %f2946, 0f3F317200; - mov.f32 %f2945, 0f00000000; - mov.f32 %f2944, 0f3DAAAABD; - mov.f32 %f2943, 0f3C4CAF63; - mov.f32 %f2942, 0f3B18F0FE; - mov.f32 %f2941, 0fB5BFBE8E; - mov.f32 %f2940, 0fBF317200; - cvt.rn.f32.s32 %f2939, %r312; - sub.f32 %f2938, %f2939, %f3079; - mul.f32 %f2091, %f3108, 0fBF000000; - setp.eq.f32 %p242, %f415, 0f3F800000; - selp.f32 %f2092, 0fBF000000, %f2091, %p242; - mul.f32 %f2093, %f2092, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2094, %f2093; - fma.rn.f32 %f2096, %f2094, %f2940, %f2092; - fma.rn.f32 %f2098, %f2094, %f2941, %f2096; - mul.f32 %f2099, %f2098, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2100, %f2099; - add.f32 %f2101, %f2094, 0f00000000; - ex2.approx.f32 %f2102, %f2101; - mul.f32 %f2103, %f2100, %f2102; - setp.lt.f32 %p243, %f2092, 0fC2D20000; - selp.f32 %f2104, 0f00000000, %f2103, %p243; - setp.gt.f32 %p244, %f2092, 0f42D20000; - selp.f32 %f433, 0f7F800000, %f2104, %p244; - div.rn.f32 %f434, %f2938, %f3076; - abs.f32 %f435, %f434; - setp.lt.f32 %p245, %f435, 0f00800000; - mul.f32 %f2105, %f435, 0f4B800000; - selp.f32 %f2106, 0fC3170000, 0fC2FE0000, %p245; - selp.f32 %f2107, %f2105, %f435, %p245; - mov.b32 %r232, %f2107; - and.b32 %r233, %r232, 8388607; - or.b32 %r234, %r233, 1065353216; - mov.b32 %f2108, %r234; - shr.u32 %r235, %r232, 23; - cvt.rn.f32.u32 %f2109, %r235; - add.f32 %f2110, %f2106, %f2109; - setp.gt.f32 %p246, %f2108, 0f3FB504F3; - mul.f32 %f2111, %f2108, 0f3F000000; - add.f32 %f2112, %f2110, 0f3F800000; - selp.f32 %f2113, %f2111, %f2108, %p246; - selp.f32 %f2114, %f2112, %f2110, %p246; - add.f32 %f436, %f2113, 0fBF800000; - add.f32 %f2090, %f2113, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2089,%f2090; - // inline asm - add.f32 %f438, %f436, %f436; - mul.f32 %f2115, %f2089, %f438; - mul.f32 %f2116, %f2115, %f2115; - fma.rn.f32 %f2119, %f2942, %f2116, %f2943; - fma.rn.f32 %f2121, %f2119, %f2116, %f2944; - mul.rn.f32 %f2122, %f2121, %f2116; - mul.rn.f32 %f2123, %f2122, %f2115; - sub.f32 %f2124, %f436, %f2115; - neg.f32 %f2125, %f2115; - add.f32 %f2126, %f2124, %f2124; - fma.rn.f32 %f2127, %f2125, %f436, %f2126; - mul.rn.f32 %f2128, %f2089, %f2127; - add.f32 %f2129, %f2123, %f2115; - sub.f32 %f2130, %f2115, %f2129; - add.f32 %f2131, %f2123, %f2130; - add.f32 %f2132, %f2128, %f2131; - add.f32 %f2133, %f2129, %f2132; - sub.f32 %f2134, %f2129, %f2133; - add.f32 %f2135, %f2132, %f2134; - mul.rn.f32 %f439, %f2114, %f2946; - mul.rn.f32 %f440, %f2114, %f2947; - add.f32 %f2138, %f439, %f2133; - sub.f32 %f2139, %f439, %f2138; - add.f32 %f2140, %f2133, %f2139; - add.f32 %f2141, %f2135, %f2140; - add.f32 %f2142, %f440, %f2141; - add.f32 %f2143, %f2138, %f2142; - sub.f32 %f2144, %f2138, %f2143; - add.f32 %f2145, %f2142, %f2144; - mul.rn.f32 %f2147, %f1711, %f2143; - neg.f32 %f2148, %f2147; - fma.rn.f32 %f2149, %f1711, %f2143, %f2148; - fma.rn.f32 %f2150, %f1711, %f2145, %f2149; - fma.rn.f32 %f2152, %f2945, %f2143, %f2150; - add.rn.f32 %f2153, %f2147, %f2152; - neg.f32 %f2154, %f2153; - add.rn.f32 %f2155, %f2147, %f2154; - add.rn.f32 %f2156, %f2155, %f2152; - mov.b32 %r236, %f2153; - setp.eq.s32 %p247, %r236, 1118925336; - add.s32 %r237, %r236, -1; - mov.b32 %f2157, %r237; - add.f32 %f2158, %f2156, 0f37000000; - selp.f32 %f2159, %f2157, %f2153, %p247; - selp.f32 %f441, %f2158, %f2156, %p247; - mul.f32 %f2160, %f2159, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2161, %f2160; - fma.rn.f32 %f2162, %f2161, %f2940, %f2159; - fma.rn.f32 %f2163, %f2161, %f2941, %f2162; - mul.f32 %f2164, %f2163, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2165, %f2164; - add.f32 %f2166, %f2161, 0f00000000; - ex2.approx.f32 %f2167, %f2166; - mul.f32 %f2168, %f2165, %f2167; - setp.lt.f32 %p248, %f2159, 0fC2D20000; - selp.f32 %f2169, 0f00000000, %f2168, %p248; - setp.gt.f32 %p249, %f2159, 0f42D20000; - selp.f32 %f3109, 0f7F800000, %f2169, %p249; - setp.eq.f32 %p250, %f3109, 0f7F800000; - @%p250 bra BB1_156; - - fma.rn.f32 %f3109, %f3109, %f441, %f3109; - -BB1_156: - setp.lt.f32 %p251, %f434, 0f00000000; - and.pred %p12, %p251, %p188; - mov.b32 %r238, %f3109; - xor.b32 %r239, %r238, -2147483648; - mov.b32 %f2170, %r239; - selp.f32 %f3111, %f2170, %f3109, %p12; - setp.eq.f32 %p253, %f434, 0f00000000; - @%p253 bra BB1_159; - bra.uni BB1_157; - -BB1_159: - add.f32 %f2173, %f434, %f434; - selp.f32 %f3111, %f2173, 0f00000000, %p188; - bra.uni BB1_160; - -BB1_157: - setp.geu.f32 %p254, %f434, 0f00000000; - @%p254 bra BB1_160; - - cvt.rzi.f32.f32 %f2172, %f1711; - setp.neu.f32 %p255, %f2172, 0f40000000; - selp.f32 %f3111, 0f7FFFFFFF, %f3111, %p255; - -BB1_160: - abs.f32 %f2862, %f434; - add.f32 %f2174, %f2862, 0f40000000; - mov.b32 %r48, %f2174; - setp.lt.s32 %p257, %r48, 2139095040; - @%p257 bra BB1_165; - - abs.f32 %f2963, %f434; - setp.gtu.f32 %p258, %f2963, 0f7F800000; - @%p258 bra BB1_164; - bra.uni BB1_162; - -BB1_164: - add.f32 %f3111, %f434, 0f40000000; - bra.uni BB1_165; - -BB1_162: - abs.f32 %f2964, %f434; - setp.neu.f32 %p259, %f2964, 0f7F800000; - @%p259 bra BB1_165; - - selp.f32 %f3111, 0fFF800000, 0f7F800000, %p12; - -BB1_165: - mov.f32 %f2868, 0f00000000; - mov.f32 %f2867, 0f3DAAAABD; - mov.f32 %f2866, 0f3C4CAF63; - mov.f32 %f2865, 0f3B18F0FE; - mov.f32 %f2864, 0fB5BFBE8E; - mov.f32 %f2863, 0fBF317200; - mul.f32 %f2177, %f3111, 0fBF000000; - setp.eq.f32 %p260, %f434, 0f3F800000; - selp.f32 %f2178, 0fBF000000, %f2177, %p260; - mul.f32 %f2179, %f2178, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2180, %f2179; - fma.rn.f32 %f2182, %f2180, %f2863, %f2178; - fma.rn.f32 %f2184, %f2180, %f2864, %f2182; - mul.f32 %f2185, %f2184, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2186, %f2185; - add.f32 %f2187, %f2180, 0f00000000; - ex2.approx.f32 %f2188, %f2187; - mul.f32 %f2189, %f2186, %f2188; - setp.lt.f32 %p261, %f2178, 0fC2D20000; - selp.f32 %f2190, 0f00000000, %f2189, %p261; - setp.gt.f32 %p262, %f2178, 0f42D20000; - selp.f32 %f2191, 0f7F800000, %f2190, %p262; - sub.f32 %f2192, %f433, %f2191; - mul.f32 %f2193, %f310, %f2192; - mul.f32 %f452, %f380, %f2193; - // inline asm - rcp.approx.ftz.f32 %f2175,%f326; - // inline asm - mul.f32 %f2194, %f2175, %f327; - mul.f32 %f2195, %f2194, %f2194; - fma.rn.f32 %f2198, %f2865, %f2195, %f2866; - fma.rn.f32 %f2200, %f2198, %f2195, %f2867; - mul.rn.f32 %f2201, %f2200, %f2195; - mul.rn.f32 %f2202, %f2201, %f2194; - sub.f32 %f2203, %f325, %f2194; - neg.f32 %f2204, %f2194; - add.f32 %f2205, %f2203, %f2203; - fma.rn.f32 %f2206, %f2204, %f325, %f2205; - mul.rn.f32 %f2207, %f2175, %f2206; - add.f32 %f2208, %f2202, %f2194; - sub.f32 %f2209, %f2194, %f2208; - add.f32 %f2210, %f2202, %f2209; - add.f32 %f2211, %f2207, %f2210; - add.f32 %f2212, %f2208, %f2211; - sub.f32 %f2213, %f2208, %f2212; - add.f32 %f2214, %f2211, %f2213; - add.f32 %f2215, %f328, %f2212; - sub.f32 %f2216, %f328, %f2215; - add.f32 %f2217, %f2212, %f2216; - add.f32 %f2218, %f2214, %f2217; - add.f32 %f2219, %f329, %f2218; - add.f32 %f2220, %f2215, %f2219; - sub.f32 %f2221, %f2215, %f2220; - add.f32 %f2222, %f2219, %f2221; - mul.rn.f32 %f2224, %f1711, %f2220; - neg.f32 %f2225, %f2224; - fma.rn.f32 %f2226, %f1711, %f2220, %f2225; - fma.rn.f32 %f2227, %f1711, %f2222, %f2226; - fma.rn.f32 %f2229, %f2868, %f2220, %f2227; - add.rn.f32 %f2230, %f2224, %f2229; - neg.f32 %f2231, %f2230; - add.rn.f32 %f2232, %f2224, %f2231; - add.rn.f32 %f2233, %f2232, %f2229; - mov.b32 %r240, %f2230; - setp.eq.s32 %p263, %r240, 1118925336; - add.s32 %r241, %r240, -1; - mov.b32 %f2234, %r241; - add.f32 %f2235, %f2233, 0f37000000; - selp.f32 %f2236, %f2234, %f2230, %p263; - selp.f32 %f453, %f2235, %f2233, %p263; - mul.f32 %f2237, %f2236, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2238, %f2237; - fma.rn.f32 %f2239, %f2238, %f2863, %f2236; - fma.rn.f32 %f2240, %f2238, %f2864, %f2239; - mul.f32 %f2241, %f2240, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2242, %f2241; - add.f32 %f2243, %f2238, 0f00000000; - ex2.approx.f32 %f2244, %f2243; - mul.f32 %f2245, %f2242, %f2244; - setp.lt.f32 %p264, %f2236, 0fC2D20000; - selp.f32 %f2246, 0f00000000, %f2245, %p264; - setp.gt.f32 %p265, %f2236, 0f42D20000; - selp.f32 %f3112, 0f7F800000, %f2246, %p265; - setp.eq.f32 %p266, %f3112, 0f7F800000; - @%p266 bra BB1_167; - - fma.rn.f32 %f3112, %f3112, %f453, %f3112; - -BB1_167: - setp.eq.f32 %p352, %f322, 0f00000000; - setp.geu.f32 %p351, %f322, 0f00000000; - mov.b32 %r242, %f3112; - xor.b32 %r243, %r242, -2147483648; - mov.b32 %f2247, %r243; - selp.f32 %f457, %f2247, %f3112, %p7; - selp.f32 %f3113, %f330, %f457, %p352; - @%p351 bra BB1_169; - - cvt.rzi.f32.f32 %f2249, %f1711; - setp.neu.f32 %p268, %f2249, 0f40000000; - selp.f32 %f3113, 0f7FFFFFFF, %f457, %p268; - -BB1_169: - abs.f32 %f2878, %f322; - setp.eq.f32 %p356, %f322, 0f3F800000; - add.f32 %f2877, %f2878, 0f40000000; - mov.b32 %r287, %f2877; - setp.gt.s32 %p355, %r287, 2139095039; - setp.neu.f32 %p354, %f2878, 0f7F800000; - setp.gtu.f32 %p353, %f2878, 0f7F800000; - mov.f32 %f2876, 0f00000000; - mov.f32 %f2875, 0f3DAAAABD; - mov.f32 %f2874, 0f3C4CAF63; - mov.f32 %f2873, 0f3B18F0FE; - mov.f32 %f2872, 0fB5BFBE8E; - mov.f32 %f2871, 0fBF317200; - selp.f32 %f2870, 0fFF800000, 0f7F800000, %p7; - add.f32 %f2869, %f322, 0f40000000; - selp.f32 %f2252, %f2869, %f3113, %p353; - selp.f32 %f2253, %f2252, %f2870, %p354; - selp.f32 %f2254, %f2253, %f3113, %p355; - mul.f32 %f2255, %f2254, 0fBF000000; - selp.f32 %f2256, 0fBF000000, %f2255, %p356; - mul.f32 %f2257, %f2256, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2258, %f2257; - fma.rn.f32 %f2260, %f2258, %f2871, %f2256; - fma.rn.f32 %f2262, %f2258, %f2872, %f2260; - mul.f32 %f2263, %f2262, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2264, %f2263; - add.f32 %f2265, %f2258, 0f00000000; - ex2.approx.f32 %f2266, %f2265; - mul.f32 %f2267, %f2264, %f2266; - setp.lt.f32 %p273, %f2256, 0fC2D20000; - selp.f32 %f2268, 0f00000000, %f2267, %p273; - setp.gt.f32 %p274, %f2256, 0f42D20000; - selp.f32 %f461, 0f7F800000, %f2268, %p274; - // inline asm - rcp.approx.ftz.f32 %f2250,%f336; - // inline asm - mul.f32 %f2269, %f2250, %f337; - mul.f32 %f2270, %f2269, %f2269; - fma.rn.f32 %f2273, %f2873, %f2270, %f2874; - fma.rn.f32 %f2275, %f2273, %f2270, %f2875; - mul.rn.f32 %f2276, %f2275, %f2270; - mul.rn.f32 %f2277, %f2276, %f2269; - sub.f32 %f2278, %f335, %f2269; - neg.f32 %f2279, %f2269; - add.f32 %f2280, %f2278, %f2278; - fma.rn.f32 %f2281, %f2279, %f335, %f2280; - mul.rn.f32 %f2282, %f2250, %f2281; - add.f32 %f2283, %f2277, %f2269; - sub.f32 %f2284, %f2269, %f2283; - add.f32 %f2285, %f2277, %f2284; - add.f32 %f2286, %f2282, %f2285; - add.f32 %f2287, %f2283, %f2286; - sub.f32 %f2288, %f2283, %f2287; - add.f32 %f2289, %f2286, %f2288; - add.f32 %f2290, %f338, %f2287; - sub.f32 %f2291, %f338, %f2290; - add.f32 %f2292, %f2287, %f2291; - add.f32 %f2293, %f2289, %f2292; - add.f32 %f2294, %f339, %f2293; - add.f32 %f2295, %f2290, %f2294; - sub.f32 %f2296, %f2290, %f2295; - add.f32 %f2297, %f2294, %f2296; - mul.rn.f32 %f2299, %f1711, %f2295; - neg.f32 %f2300, %f2299; - fma.rn.f32 %f2301, %f1711, %f2295, %f2300; - fma.rn.f32 %f2302, %f1711, %f2297, %f2301; - fma.rn.f32 %f2304, %f2876, %f2295, %f2302; - add.rn.f32 %f2305, %f2299, %f2304; - neg.f32 %f2306, %f2305; - add.rn.f32 %f2307, %f2299, %f2306; - add.rn.f32 %f2308, %f2307, %f2304; - mov.b32 %r244, %f2305; - setp.eq.s32 %p275, %r244, 1118925336; - add.s32 %r245, %r244, -1; - mov.b32 %f2309, %r245; - add.f32 %f2310, %f2308, 0f37000000; - selp.f32 %f2311, %f2309, %f2305, %p275; - selp.f32 %f462, %f2310, %f2308, %p275; - mul.f32 %f2312, %f2311, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2313, %f2312; - fma.rn.f32 %f2314, %f2313, %f2871, %f2311; - fma.rn.f32 %f2315, %f2313, %f2872, %f2314; - mul.f32 %f2316, %f2315, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2317, %f2316; - add.f32 %f2318, %f2313, 0f00000000; - ex2.approx.f32 %f2319, %f2318; - mul.f32 %f2320, %f2317, %f2319; - setp.lt.f32 %p276, %f2311, 0fC2D20000; - selp.f32 %f2321, 0f00000000, %f2320, %p276; - setp.gt.f32 %p277, %f2311, 0f42D20000; - selp.f32 %f3114, 0f7F800000, %f2321, %p277; - setp.eq.f32 %p278, %f3114, 0f7F800000; - @%p278 bra BB1_171; - - fma.rn.f32 %f3114, %f3114, %f462, %f3114; - -BB1_171: - setp.eq.f32 %p358, %f333, 0f00000000; - setp.geu.f32 %p357, %f333, 0f00000000; - mov.b32 %r246, %f3114; - xor.b32 %r247, %r246, -2147483648; - mov.b32 %f2322, %r247; - selp.f32 %f466, %f2322, %f3114, %p8; - selp.f32 %f3115, %f340, %f466, %p358; - @%p357 bra BB1_173; - - cvt.rzi.f32.f32 %f2324, %f1711; - setp.neu.f32 %p280, %f2324, 0f40000000; - selp.f32 %f3115, 0f7FFFFFFF, %f466, %p280; - -BB1_173: - abs.f32 %f2891, %f333; - cvt.rn.f32.s32 %f2890, %r311; - sub.f32 %f2889, %f2890, %f3080; - add.f32 %f2888, %f2889, 0f3F800000; - setp.eq.f32 %p362, %f333, 0f3F800000; - add.f32 %f2887, %f2891, 0f40000000; - mov.b32 %r288, %f2887; - setp.gt.s32 %p361, %r288, 2139095039; - setp.neu.f32 %p360, %f2891, 0f7F800000; - setp.gtu.f32 %p359, %f2891, 0f7F800000; - selp.f32 %f2886, 0fFF800000, 0f7F800000, %p8; - add.f32 %f2885, %f333, 0f40000000; - mov.f32 %f2884, 0f00000000; - mov.f32 %f2883, 0f3DAAAABD; - mov.f32 %f2882, 0f3C4CAF63; - mov.f32 %f2881, 0f3B18F0FE; - mov.f32 %f2880, 0fB5BFBE8E; - mov.f32 %f2879, 0fBF317200; - selp.f32 %f2327, %f2885, %f3115, %p359; - selp.f32 %f2328, %f2327, %f2886, %p360; - selp.f32 %f2329, %f2328, %f3115, %p361; - mul.f32 %f2330, %f2329, 0fBF000000; - selp.f32 %f2331, 0fBF000000, %f2330, %p362; - mul.f32 %f2332, %f2331, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2333, %f2332; - fma.rn.f32 %f2335, %f2333, %f2879, %f2331; - fma.rn.f32 %f2337, %f2333, %f2880, %f2335; - mul.f32 %f2338, %f2337, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2339, %f2338; - add.f32 %f2340, %f2333, 0f00000000; - ex2.approx.f32 %f2341, %f2340; - mul.f32 %f2342, %f2339, %f2341; - setp.lt.f32 %p285, %f2331, 0fC2D20000; - selp.f32 %f2343, 0f00000000, %f2342, %p285; - setp.gt.f32 %p286, %f2331, 0f42D20000; - selp.f32 %f2344, 0f7F800000, %f2343, %p286; - mul.f32 %f2345, %f2889, %f2344; - mul.f32 %f2346, %f2888, %f461; - sub.f32 %f2347, %f2346, %f2345; - mul.f32 %f2348, %f311, %f2347; - mul.f32 %f470, %f394, %f2348; - // inline asm - rcp.approx.ftz.f32 %f2325,%f1996; - // inline asm - mul.f32 %f2349, %f2325, %f419; - mul.f32 %f2350, %f2349, %f2349; - fma.rn.f32 %f2353, %f2881, %f2350, %f2882; - fma.rn.f32 %f2355, %f2353, %f2350, %f2883; - mul.rn.f32 %f2356, %f2355, %f2350; - mul.rn.f32 %f2357, %f2356, %f2349; - sub.f32 %f2358, %f417, %f2349; - neg.f32 %f2359, %f2349; - add.f32 %f2360, %f2358, %f2358; - fma.rn.f32 %f2361, %f2359, %f417, %f2360; - mul.rn.f32 %f2362, %f2325, %f2361; - add.f32 %f2363, %f2357, %f2349; - sub.f32 %f2364, %f2349, %f2363; - add.f32 %f2365, %f2357, %f2364; - add.f32 %f2366, %f2362, %f2365; - add.f32 %f2367, %f2363, %f2366; - sub.f32 %f2368, %f2363, %f2367; - add.f32 %f2369, %f2366, %f2368; - add.f32 %f2370, %f420, %f2367; - sub.f32 %f2371, %f420, %f2370; - add.f32 %f2372, %f2367, %f2371; - add.f32 %f2373, %f2369, %f2372; - add.f32 %f2374, %f421, %f2373; - add.f32 %f2375, %f2370, %f2374; - sub.f32 %f2376, %f2370, %f2375; - add.f32 %f2377, %f2374, %f2376; - mul.rn.f32 %f2379, %f1711, %f2375; - neg.f32 %f2380, %f2379; - fma.rn.f32 %f2381, %f1711, %f2375, %f2380; - fma.rn.f32 %f2382, %f1711, %f2377, %f2381; - fma.rn.f32 %f2384, %f2884, %f2375, %f2382; - add.rn.f32 %f2385, %f2379, %f2384; - neg.f32 %f2386, %f2385; - add.rn.f32 %f2387, %f2379, %f2386; - add.rn.f32 %f2388, %f2387, %f2384; - mov.b32 %r248, %f2385; - setp.eq.s32 %p287, %r248, 1118925336; - add.s32 %r249, %r248, -1; - mov.b32 %f2389, %r249; - add.f32 %f2390, %f2388, 0f37000000; - selp.f32 %f2391, %f2389, %f2385, %p287; - selp.f32 %f471, %f2390, %f2388, %p287; - mul.f32 %f2392, %f2391, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2393, %f2392; - fma.rn.f32 %f2394, %f2393, %f2879, %f2391; - fma.rn.f32 %f2395, %f2393, %f2880, %f2394; - mul.f32 %f2396, %f2395, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2397, %f2396; - add.f32 %f2398, %f2393, 0f00000000; - ex2.approx.f32 %f2399, %f2398; - mul.f32 %f2400, %f2397, %f2399; - setp.lt.f32 %p288, %f2391, 0fC2D20000; - selp.f32 %f2401, 0f00000000, %f2400, %p288; - setp.gt.f32 %p289, %f2391, 0f42D20000; - selp.f32 %f3116, 0f7F800000, %f2401, %p289; - setp.eq.f32 %p290, %f3116, 0f7F800000; - @%p290 bra BB1_175; - - fma.rn.f32 %f3116, %f3116, %f471, %f3116; - -BB1_175: - setp.eq.f32 %p363, %f415, 0f00000000; - mov.b32 %r250, %f3116; - xor.b32 %r251, %r250, -2147483648; - mov.b32 %f2402, %r251; - selp.f32 %f3118, %f2402, %f3116, %p11; - @%p363 bra BB1_178; - bra.uni BB1_176; - -BB1_178: - add.f32 %f2405, %f415, %f415; - selp.f32 %f3118, %f2405, 0f00000000, %p188; - bra.uni BB1_179; - -BB1_176: - setp.geu.f32 %p292, %f415, 0f00000000; - @%p292 bra BB1_179; - - cvt.rzi.f32.f32 %f2404, %f1711; - setp.neu.f32 %p293, %f2404, 0f40000000; - selp.f32 %f3118, 0f7FFFFFFF, %f3118, %p293; - -BB1_179: - abs.f32 %f2893, %f415; - add.f32 %f2892, %f2893, 0f40000000; - mov.b32 %r289, %f2892; - setp.lt.s32 %p364, %r289, 2139095040; - @%p364 bra BB1_184; - - abs.f32 %f2961, %f415; - setp.gtu.f32 %p296, %f2961, 0f7F800000; - @%p296 bra BB1_183; - bra.uni BB1_181; - -BB1_183: - add.f32 %f3118, %f415, 0f40000000; - bra.uni BB1_184; - -BB1_181: - abs.f32 %f2962, %f415; - setp.neu.f32 %p297, %f2962, 0f7F800000; - @%p297 bra BB1_184; - - selp.f32 %f3118, 0fFF800000, 0f7F800000, %p11; - -BB1_184: - setp.eq.f32 %p365, %f415, 0f3F800000; - mov.f32 %f2899, 0f00000000; - mov.f32 %f2898, 0f3DAAAABD; - mov.f32 %f2897, 0f3C4CAF63; - mov.f32 %f2896, 0f3B18F0FE; - mov.f32 %f2895, 0fB5BFBE8E; - mov.f32 %f2894, 0fBF317200; - mul.f32 %f2408, %f3118, 0fBF000000; - selp.f32 %f2409, 0fBF000000, %f2408, %p365; - mul.f32 %f2410, %f2409, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2411, %f2410; - fma.rn.f32 %f2413, %f2411, %f2894, %f2409; - fma.rn.f32 %f2415, %f2411, %f2895, %f2413; - mul.f32 %f2416, %f2415, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2417, %f2416; - add.f32 %f2418, %f2411, 0f00000000; - ex2.approx.f32 %f2419, %f2418; - mul.f32 %f2420, %f2417, %f2419; - setp.lt.f32 %p299, %f2409, 0fC2D20000; - selp.f32 %f2421, 0f00000000, %f2420, %p299; - setp.gt.f32 %p300, %f2409, 0f42D20000; - selp.f32 %f482, 0f7F800000, %f2421, %p300; - // inline asm - rcp.approx.ftz.f32 %f2406,%f2090; - // inline asm - mul.f32 %f2422, %f2406, %f438; - mul.f32 %f2423, %f2422, %f2422; - fma.rn.f32 %f2426, %f2896, %f2423, %f2897; - fma.rn.f32 %f2428, %f2426, %f2423, %f2898; - mul.rn.f32 %f2429, %f2428, %f2423; - mul.rn.f32 %f2430, %f2429, %f2422; - sub.f32 %f2431, %f436, %f2422; - neg.f32 %f2432, %f2422; - add.f32 %f2433, %f2431, %f2431; - fma.rn.f32 %f2434, %f2432, %f436, %f2433; - mul.rn.f32 %f2435, %f2406, %f2434; - add.f32 %f2436, %f2430, %f2422; - sub.f32 %f2437, %f2422, %f2436; - add.f32 %f2438, %f2430, %f2437; - add.f32 %f2439, %f2435, %f2438; - add.f32 %f2440, %f2436, %f2439; - sub.f32 %f2441, %f2436, %f2440; - add.f32 %f2442, %f2439, %f2441; - add.f32 %f2443, %f439, %f2440; - sub.f32 %f2444, %f439, %f2443; - add.f32 %f2445, %f2440, %f2444; - add.f32 %f2446, %f2442, %f2445; - add.f32 %f2447, %f440, %f2446; - add.f32 %f2448, %f2443, %f2447; - sub.f32 %f2449, %f2443, %f2448; - add.f32 %f2450, %f2447, %f2449; - mul.rn.f32 %f2452, %f1711, %f2448; - neg.f32 %f2453, %f2452; - fma.rn.f32 %f2454, %f1711, %f2448, %f2453; - fma.rn.f32 %f2455, %f1711, %f2450, %f2454; - fma.rn.f32 %f2457, %f2899, %f2448, %f2455; - add.rn.f32 %f2458, %f2452, %f2457; - neg.f32 %f2459, %f2458; - add.rn.f32 %f2460, %f2452, %f2459; - add.rn.f32 %f2461, %f2460, %f2457; - mov.b32 %r252, %f2458; - setp.eq.s32 %p301, %r252, 1118925336; - add.s32 %r253, %r252, -1; - mov.b32 %f2462, %r253; - add.f32 %f2463, %f2461, 0f37000000; - selp.f32 %f2464, %f2462, %f2458, %p301; - selp.f32 %f483, %f2463, %f2461, %p301; - mul.f32 %f2465, %f2464, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2466, %f2465; - fma.rn.f32 %f2467, %f2466, %f2894, %f2464; - fma.rn.f32 %f2468, %f2466, %f2895, %f2467; - mul.f32 %f2469, %f2468, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2470, %f2469; - add.f32 %f2471, %f2466, 0f00000000; - ex2.approx.f32 %f2472, %f2471; - mul.f32 %f2473, %f2470, %f2472; - setp.lt.f32 %p302, %f2464, 0fC2D20000; - selp.f32 %f2474, 0f00000000, %f2473, %p302; - setp.gt.f32 %p303, %f2464, 0f42D20000; - selp.f32 %f3119, 0f7F800000, %f2474, %p303; - setp.eq.f32 %p304, %f3119, 0f7F800000; - @%p304 bra BB1_186; - - fma.rn.f32 %f3119, %f3119, %f483, %f3119; - -BB1_186: - setp.eq.f32 %p368, %f434, 0f00000000; - mov.b32 %r254, %f3119; - xor.b32 %r255, %r254, -2147483648; - mov.b32 %f2475, %r255; - selp.f32 %f3121, %f2475, %f3119, %p12; - @%p368 bra BB1_189; - bra.uni BB1_187; - -BB1_189: - add.f32 %f2478, %f434, %f434; - selp.f32 %f3121, %f2478, 0f00000000, %p188; - bra.uni BB1_190; - -BB1_187: - setp.geu.f32 %p306, %f434, 0f00000000; - @%p306 bra BB1_190; - - cvt.rzi.f32.f32 %f2477, %f1711; - setp.neu.f32 %p307, %f2477, 0f40000000; - selp.f32 %f3121, 0f7FFFFFFF, %f3121, %p307; - -BB1_190: - abs.f32 %f2966, %f434; - add.f32 %f2965, %f2966, 0f40000000; - mov.b32 %r294, %f2965; - setp.lt.s32 %p369, %r294, 2139095040; - @%p369 bra BB1_195; - - abs.f32 %f2959, %f434; - setp.gtu.f32 %p310, %f2959, 0f7F800000; - @%p310 bra BB1_194; - bra.uni BB1_192; - -BB1_194: - add.f32 %f3121, %f434, 0f40000000; - bra.uni BB1_195; - -BB1_192: - abs.f32 %f2960, %f434; - setp.neu.f32 %p311, %f2960, 0f7F800000; - @%p311 bra BB1_195; - - selp.f32 %f3121, 0fFF800000, 0f7F800000, %p12; - -BB1_195: - setp.eq.f32 %p370, %f434, 0f3F800000; - cvt.rn.f32.s32 %f2904, %r312; - sub.f32 %f2903, %f2904, %f3079; - add.f32 %f2902, %f2903, 0f3F800000; - mov.f32 %f2901, 0fB5BFBE8E; - mov.f32 %f2900, 0fBF317200; - mul.f32 %f2479, %f3121, 0fBF000000; - selp.f32 %f2480, 0fBF000000, %f2479, %p370; - mul.f32 %f2481, %f2480, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2482, %f2481; - fma.rn.f32 %f2484, %f2482, %f2900, %f2480; - fma.rn.f32 %f2486, %f2482, %f2901, %f2484; - mul.f32 %f2487, %f2486, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2488, %f2487; - add.f32 %f2489, %f2482, 0f00000000; - ex2.approx.f32 %f2490, %f2489; - mul.f32 %f2491, %f2488, %f2490; - setp.lt.f32 %p313, %f2480, 0fC2D20000; - selp.f32 %f2492, 0f00000000, %f2491, %p313; - setp.gt.f32 %p314, %f2480, 0f42D20000; - selp.f32 %f2493, 0f7F800000, %f2492, %p314; - mul.f32 %f2494, %f2903, %f2493; - mul.f32 %f2495, %f2902, %f482; - sub.f32 %f2496, %f2495, %f2494; - mul.f32 %f2497, %f311, %f2496; - fma.rn.f32 %f2498, %f380, %f2497, %f470; - mul.f32 %f2499, %f414, %f414; - div.rn.f32 %f2500, %f2499, %f395; - add.f32 %f3096, %f2500, %f3096; - mul.f32 %f2501, %f452, %f414; - div.rn.f32 %f2502, %f2501, %f395; - add.f32 %f3095, %f2502, %f3095; - mul.f32 %f2503, %f380, %f394; - mul.f32 %f2504, %f2503, %f414; - div.rn.f32 %f2505, %f2504, %f395; - add.f32 %f3094, %f2505, %f3094; - div.rn.f32 %f2506, %f414, %f395; - add.f32 %f3093, %f2506, %f3093; - mul.f32 %f2507, %f2498, %f414; - div.rn.f32 %f2508, %f2507, %f395; - add.f32 %f3092, %f2508, %f3092; - mul.f32 %f2509, %f452, %f452; - div.rn.f32 %f2510, %f2509, %f395; - add.f32 %f3091, %f2510, %f3091; - mul.f32 %f2511, %f2503, %f452; - div.rn.f32 %f2512, %f2511, %f395; - add.f32 %f3090, %f2512, %f3090; - div.rn.f32 %f2513, %f452, %f395; - add.f32 %f3089, %f2513, %f3089; - mul.f32 %f2514, %f2498, %f452; - div.rn.f32 %f2515, %f2514, %f395; - add.f32 %f3088, %f2515, %f3088; - mul.f32 %f2516, %f2503, %f2503; - div.rn.f32 %f2517, %f2516, %f395; - add.f32 %f3087, %f2517, %f3087; - div.rn.f32 %f2518, %f2503, %f395; - add.f32 %f3086, %f2518, %f3086; - mul.f32 %f2519, %f2498, %f2503; - div.rn.f32 %f2520, %f2519, %f395; - add.f32 %f3085, %f2520, %f3085; - rcp.rn.f32 %f2521, %f395; - add.f32 %f3084, %f2521, %f3084; - div.rn.f32 %f2522, %f2498, %f395; - add.f32 %f3083, %f2522, %f3083; - mul.f32 %f2523, %f2498, %f2498; - div.rn.f32 %f2524, %f2523, %f395; - add.f32 %f3082, %f2524, %f3082; - setp.leu.f32 %p315, %f395, 0f00000000; - @%p315 bra BB1_203; - - setp.gt.f32 %p316, %f396, 0f00000000; - @%p316 bra BB1_198; - bra.uni BB1_197; - -BB1_198: - setp.lt.f32 %p317, %f395, 0f00800000; - mul.f32 %f2525, %f395, 0f4B000000; - selp.f32 %f510, %f2525, %f395, %p317; - selp.f32 %f2526, 0fC1B80000, 0f00000000, %p317; - mov.b32 %r256, %f510; - add.s32 %r257, %r256, -1059760811; - and.b32 %r258, %r257, -8388608; - sub.s32 %r259, %r256, %r258; - mov.b32 %f2527, %r259; - cvt.rn.f32.s32 %f2528, %r258; - mov.f32 %f2529, 0f34000000; - fma.rn.f32 %f2530, %f2528, %f2529, %f2526; - add.f32 %f2531, %f2527, 0fBF800000; - mov.f32 %f2532, 0f3E1039F6; - mov.f32 %f2533, 0fBE055027; - fma.rn.f32 %f2534, %f2533, %f2531, %f2532; - mov.f32 %f2535, 0fBDF8CDCC; - fma.rn.f32 %f2536, %f2534, %f2531, %f2535; - mov.f32 %f2537, 0f3E0F2955; - fma.rn.f32 %f2538, %f2536, %f2531, %f2537; - mov.f32 %f2539, 0fBE2AD8B9; - fma.rn.f32 %f2540, %f2538, %f2531, %f2539; - mov.f32 %f2541, 0f3E4CED0B; - fma.rn.f32 %f2542, %f2540, %f2531, %f2541; - mov.f32 %f2543, 0fBE7FFF22; - fma.rn.f32 %f2544, %f2542, %f2531, %f2543; - mov.f32 %f2545, 0f3EAAAA78; - fma.rn.f32 %f2546, %f2544, %f2531, %f2545; - mov.f32 %f2547, 0fBF000000; - fma.rn.f32 %f2548, %f2546, %f2531, %f2547; - mul.f32 %f2549, %f2531, %f2548; - fma.rn.f32 %f2550, %f2549, %f2531, %f2531; - mov.f32 %f2551, 0f3F317218; - fma.rn.f32 %f3122, %f2530, %f2551, %f2550; - setp.lt.u32 %p318, %r256, 2139095040; - @%p318 bra BB1_200; - - mov.f32 %f2552, 0f7F800000; - fma.rn.f32 %f3122, %f510, %f2552, %f2552; - -BB1_200: - setp.eq.f32 %p319, %f510, 0f00000000; - selp.f32 %f2553, 0fFF800000, %f3122, %p319; - mul.f32 %f2554, %f396, %f2553; - sub.f32 %f514, %f2554, %f395; - mul.f32 %f2555, %f396, 0f4B000000; - setp.lt.f32 %p320, %f396, 0f00800000; - selp.f32 %f515, %f2555, %f396, %p320; - selp.f32 %f2556, 0fC1B80000, 0f00000000, %p320; - mov.b32 %r260, %f515; - add.s32 %r261, %r260, -1059760811; - and.b32 %r262, %r261, -8388608; - sub.s32 %r263, %r260, %r262; - mov.b32 %f2557, %r263; - cvt.rn.f32.s32 %f2558, %r262; - fma.rn.f32 %f2560, %f2558, %f2529, %f2556; - add.f32 %f2561, %f2557, 0fBF800000; - fma.rn.f32 %f2564, %f2533, %f2561, %f2532; - fma.rn.f32 %f2566, %f2564, %f2561, %f2535; - fma.rn.f32 %f2568, %f2566, %f2561, %f2537; - fma.rn.f32 %f2570, %f2568, %f2561, %f2539; - fma.rn.f32 %f2572, %f2570, %f2561, %f2541; - fma.rn.f32 %f2574, %f2572, %f2561, %f2543; - fma.rn.f32 %f2576, %f2574, %f2561, %f2545; - fma.rn.f32 %f2578, %f2576, %f2561, %f2547; - mul.f32 %f2579, %f2561, %f2578; - fma.rn.f32 %f2580, %f2579, %f2561, %f2561; - fma.rn.f32 %f3123, %f2560, %f2551, %f2580; - setp.lt.u32 %p321, %r260, 2139095040; - @%p321 bra BB1_202; - - mov.f32 %f2582, 0f7F800000; - fma.rn.f32 %f3123, %f515, %f2582, %f2582; - -BB1_202: - setp.eq.f32 %p322, %f515, 0f00000000; - selp.f32 %f2583, 0fFF800000, %f3123, %p322; - mul.f32 %f2584, %f396, %f2583; - sub.f32 %f2585, %f514, %f2584; - add.f32 %f2586, %f396, %f2585; - add.f32 %f3124, %f3124, %f2586; - bra.uni BB1_203; - -BB1_197: - sub.f32 %f3124, %f3124, %f395; - -BB1_203: - add.s32 %r312, %r312, 1; - setp.lt.s32 %p323, %r312, %r63; - @%p323 bra BB1_123; - - st.local.f32 [%rd2], %f3096; - st.local.f32 [%rd2+4], %f3095; - st.local.f32 [%rd2+20], %f3095; - st.local.f32 [%rd2+8], %f3094; - st.local.f32 [%rd2+40], %f3094; - st.local.f32 [%rd2+12], %f3093; - st.local.f32 [%rd2+60], %f3093; - st.local.f32 [%rd2+16], %f3092; - st.local.f32 [%rd2+80], %f3092; - st.local.f32 [%rd2+24], %f3091; - st.local.f32 [%rd2+28], %f3090; - st.local.f32 [%rd2+44], %f3090; - st.local.f32 [%rd2+32], %f3089; - st.local.f32 [%rd2+64], %f3089; - st.local.f32 [%rd2+36], %f3088; - st.local.f32 [%rd2+84], %f3088; - st.local.f32 [%rd2+48], %f3087; - st.local.f32 [%rd2+52], %f3086; - st.local.f32 [%rd2+68], %f3086; - st.local.f32 [%rd2+56], %f3085; - st.local.f32 [%rd2+88], %f3085; - st.local.f32 [%rd2+72], %f3084; - st.local.f32 [%rd2+76], %f3083; - st.local.f32 [%rd2+92], %f3083; - st.local.f32 [%rd2+96], %f3082; - add.s32 %r311, %r311, 1; - setp.lt.s32 %p324, %r311, %r63; - @%p324 bra BB1_122; - -BB1_205: - mov.f32 %f3126, 0f00000000; - ld.local.f32 %f2588, [%rd2]; - rcp.rn.f32 %f522, %f2588; - ld.local.f32 %f2589, [%rd2+4]; - mul.f32 %f523, %f522, %f2589; - ld.local.f32 %f2590, [%rd2+8]; - ld.local.f32 %f2591, [%rd2+12]; - ld.local.f32 %f2592, [%rd2+16]; - ld.local.f32 %f2593, [%rd2+20]; - ld.local.f32 %f2594, [%rd2+24]; - ld.local.f32 %f2595, [%rd2+28]; - ld.local.f32 %f2596, [%rd2+32]; - ld.local.f32 %f2597, [%rd2+36]; - ld.local.f32 %f2598, [%rd2+40]; - ld.local.f32 %f2599, [%rd2+44]; - st.local.f32 [%rd2+4], %f523; - mul.f32 %f524, %f522, %f2590; - st.local.f32 [%rd2+8], %f524; - mul.f32 %f525, %f522, %f2591; - st.local.f32 [%rd2+12], %f525; - mul.f32 %f526, %f522, %f2592; - st.local.f32 [%rd2+16], %f526; - ld.local.f32 %f2600, [%rd2+4]; - fma.rn.f32 %f2601, %f2600, %f2593, 0f00000000; - sub.f32 %f2602, %f2594, %f2601; - ld.local.f32 %f527, [%rd2+20]; - st.local.f32 [%rd2+24], %f2602; - fma.rn.f32 %f2603, %f524, %f527, 0f00000000; - rcp.rn.f32 %f528, %f2602; - sub.f32 %f2604, %f2595, %f2603; - mul.f32 %f529, %f528, %f2604; - st.local.f32 [%rd2+28], %f529; - fma.rn.f32 %f2605, %f525, %f527, 0f00000000; - sub.f32 %f2606, %f2596, %f2605; - mul.f32 %f530, %f528, %f2606; - st.local.f32 [%rd2+32], %f530; - fma.rn.f32 %f2607, %f526, %f527, 0f00000000; - sub.f32 %f2608, %f2597, %f2607; - mul.f32 %f531, %f528, %f2608; - st.local.f32 [%rd2+36], %f531; - ld.local.f32 %f2609, [%rd2+4]; - fma.rn.f32 %f2610, %f2609, %f2598, 0f00000000; - sub.f32 %f532, %f2599, %f2610; - st.local.f32 [%rd2+44], %f532; - add.s64 %rd96, %rd2, 40; - add.s64 %rd95, %rd2, 8; - mov.u32 %r313, -1; - -BB1_206: - ld.local.f32 %f2611, [%rd96]; - ld.local.f32 %f2612, [%rd95]; - fma.rn.f32 %f3126, %f2612, %f2611, %f3126; - add.s64 %rd96, %rd96, 4; - add.s64 %rd95, %rd95, 20; - add.s32 %r313, %r313, 1; - setp.lt.s32 %p325, %r313, 1; - @%p325 bra BB1_206; - - ld.local.f32 %f2614, [%rd2+48]; - sub.f32 %f2615, %f2614, %f3126; - ld.local.f32 %f535, [%rd2+40]; - ld.local.f32 %f2616, [%rd2+52]; - ld.local.f32 %f2617, [%rd2+56]; - ld.local.f32 %f2618, [%rd2+60]; - ld.local.f32 %f2619, [%rd2+4]; - ld.local.f32 %f2620, [%rd2+64]; - st.local.f32 [%rd2+48], %f2615; - fma.rn.f32 %f2621, %f525, %f535, 0f00000000; - fma.rn.f32 %f2622, %f530, %f532, %f2621; - rcp.rn.f32 %f536, %f2615; - sub.f32 %f2623, %f2616, %f2622; - mul.f32 %f537, %f536, %f2623; - st.local.f32 [%rd2+52], %f537; - fma.rn.f32 %f2624, %f526, %f535, 0f00000000; - fma.rn.f32 %f2625, %f531, %f532, %f2624; - sub.f32 %f2626, %f2617, %f2625; - mul.f32 %f538, %f536, %f2626; - st.local.f32 [%rd2+56], %f538; - fma.rn.f32 %f2627, %f2619, %f2618, 0f00000000; - sub.f32 %f539, %f2620, %f2627; - st.local.f32 [%rd2+64], %f539; - add.s64 %rd98, %rd2, 60; - add.s64 %rd97, %rd2, 8; - mov.f32 %f3127, 0f00000000; - mov.u32 %r314, -1; - -BB1_208: - ld.local.f32 %f2628, [%rd98]; - ld.local.f32 %f2629, [%rd97]; - fma.rn.f32 %f3127, %f2629, %f2628, %f3127; - add.s64 %rd98, %rd98, 4; - add.s64 %rd97, %rd97, 20; - add.s32 %r314, %r314, 1; - setp.lt.s32 %p326, %r314, 1; - @%p326 bra BB1_208; - - ld.local.f32 %f2631, [%rd2+68]; - sub.f32 %f542, %f2631, %f3127; - st.local.f32 [%rd2+68], %f542; - add.s64 %rd100, %rd2, 60; - add.s64 %rd99, %rd2, 12; - mov.f32 %f3128, 0f00000000; - mov.u32 %r315, -1; - -BB1_210: - ld.local.f32 %f2632, [%rd100]; - ld.local.f32 %f2633, [%rd99]; - fma.rn.f32 %f3128, %f2633, %f2632, %f3128; - add.s64 %rd100, %rd100, 4; - add.s64 %rd99, %rd99, 20; - add.s32 %r315, %r315, 1; - setp.lt.s32 %p327, %r315, 2; - @%p327 bra BB1_210; - - ld.local.f32 %f2635, [%rd2+72]; - sub.f32 %f2636, %f2635, %f3128; - ld.local.f32 %f545, [%rd2+60]; - ld.local.f32 %f2637, [%rd2+76]; - ld.local.f32 %f2638, [%rd2+80]; - ld.local.f32 %f2639, [%rd2+4]; - ld.local.f32 %f2640, [%rd2+84]; - st.local.f32 [%rd2+72], %f2636; - fma.rn.f32 %f2641, %f526, %f545, 0f00000000; - fma.rn.f32 %f2642, %f531, %f539, %f2641; - fma.rn.f32 %f2643, %f538, %f542, %f2642; - rcp.rn.f32 %f546, %f2636; - sub.f32 %f2644, %f2637, %f2643; - mul.f32 %f547, %f546, %f2644; - st.local.f32 [%rd2+76], %f547; - fma.rn.f32 %f2645, %f2639, %f2638, 0f00000000; - sub.f32 %f548, %f2640, %f2645; - st.local.f32 [%rd2+84], %f548; - add.s64 %rd102, %rd2, 80; - add.s64 %rd101, %rd2, 8; - mov.f32 %f3129, 0f00000000; - mov.u32 %r316, -1; - -BB1_212: - ld.local.f32 %f2646, [%rd102]; - ld.local.f32 %f2647, [%rd101]; - fma.rn.f32 %f3129, %f2647, %f2646, %f3129; - add.s64 %rd102, %rd102, 4; - add.s64 %rd101, %rd101, 20; - add.s32 %r316, %r316, 1; - setp.lt.s32 %p328, %r316, 1; - @%p328 bra BB1_212; - - ld.local.f32 %f2649, [%rd2+88]; - sub.f32 %f551, %f2649, %f3129; - st.local.f32 [%rd2+88], %f551; - add.s64 %rd104, %rd2, 80; - add.s64 %rd103, %rd2, 12; - mov.f32 %f3130, 0f00000000; - mov.u32 %r317, -1; - -BB1_214: - ld.local.f32 %f2650, [%rd104]; - ld.local.f32 %f2651, [%rd103]; - fma.rn.f32 %f3130, %f2651, %f2650, %f3130; - add.s64 %rd104, %rd104, 4; - add.s64 %rd103, %rd103, 20; - add.s32 %r317, %r317, 1; - setp.lt.s32 %p329, %r317, 2; - @%p329 bra BB1_214; - - ld.local.f32 %f2653, [%rd2+92]; - sub.f32 %f554, %f2653, %f3130; - st.local.f32 [%rd2+92], %f554; - add.s64 %rd106, %rd2, 80; - add.s64 %rd105, %rd2, 16; - mov.f32 %f3131, 0f00000000; - mov.u32 %r318, -1; - -BB1_216: - ld.local.f32 %f2654, [%rd106]; - ld.local.f32 %f2655, [%rd105]; - fma.rn.f32 %f3131, %f2655, %f2654, %f3131; - add.s64 %rd106, %rd106, 4; - add.s64 %rd105, %rd105, 20; - add.s32 %r318, %r318, 1; - setp.lt.s32 %p330, %r318, 3; - @%p330 bra BB1_216; - - ld.param.u64 %rd93, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_6]; - ld.param.u64 %rd92, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_5]; - ld.param.u32 %r282, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_7]; - ld.param.u64 %rd91, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_4]; - mov.u32 %r281, %tid.x; - mov.u32 %r280, %ctaid.x; - mov.u32 %r279, %ntid.x; - mad.lo.s32 %r278, %r279, %r280, %r281; - ld.local.f32 %f2656, [%rd2+96]; - sub.f32 %f2657, %f2656, %f3131; - ld.local.f32 %f2658, [%rd2+80]; - st.local.f32 [%rd2+96], %f2657; - add.f32 %f2659, %f523, 0f00000000; - mov.f32 %f2660, 0f00000000; - sub.f32 %f2661, %f2660, %f2659; - add.f32 %f2662, %f524, 0f00000000; - fma.rn.f32 %f2663, %f529, %f2661, %f2662; - sub.f32 %f2664, %f2660, %f2663; - add.f32 %f2665, %f525, 0f00000000; - fma.rn.f32 %f2666, %f530, %f2661, %f2665; - fma.rn.f32 %f2667, %f537, %f2664, %f2666; - sub.f32 %f2668, %f2660, %f2667; - add.f32 %f2669, %f526, 0f00000000; - fma.rn.f32 %f2670, %f531, %f2661, %f2669; - fma.rn.f32 %f2671, %f538, %f2664, %f2670; - fma.rn.f32 %f2672, %f547, %f2668, %f2671; - sub.f32 %f2673, %f2660, %f2672; - div.rn.f32 %f2674, %f2673, %f2657; - fma.rn.f32 %f2675, %f554, %f2674, 0f00000000; - sub.f32 %f2676, %f2668, %f2675; - mul.f32 %f2677, %f546, %f2676; - fma.rn.f32 %f2678, %f542, %f2677, 0f00000000; - fma.rn.f32 %f2679, %f551, %f2674, %f2678; - sub.f32 %f2680, %f2664, %f2679; - mul.f32 %f2681, %f536, %f2680; - fma.rn.f32 %f2682, %f532, %f2681, 0f00000000; - fma.rn.f32 %f2683, %f539, %f2677, %f2682; - fma.rn.f32 %f2684, %f548, %f2674, %f2683; - sub.f32 %f2685, %f2661, %f2684; - mul.f32 %f2686, %f528, %f2685; - fma.rn.f32 %f2687, %f527, %f2686, 0f00000000; - fma.rn.f32 %f2688, %f535, %f2681, %f2687; - fma.rn.f32 %f2689, %f545, %f2677, %f2688; - fma.rn.f32 %f2690, %f2658, %f2674, %f2689; - mov.f32 %f2691, 0f3F800000; - sub.f32 %f2692, %f2691, %f2690; - mul.f32 %f2693, %f522, %f2692; - fma.rn.f32 %f2694, %f523, 0f00000000, 0f00000000; - sub.f32 %f2695, %f2691, %f2694; - fma.rn.f32 %f2696, %f524, 0f00000000, 0f00000000; - fma.rn.f32 %f2697, %f529, %f2695, %f2696; - sub.f32 %f2698, %f2660, %f2697; - fma.rn.f32 %f2699, %f525, 0f00000000, 0f00000000; - fma.rn.f32 %f2700, %f530, %f2695, %f2699; - fma.rn.f32 %f2701, %f537, %f2698, %f2700; - sub.f32 %f2702, %f2660, %f2701; - fma.rn.f32 %f2703, %f526, 0f00000000, 0f00000000; - fma.rn.f32 %f2704, %f531, %f2695, %f2703; - fma.rn.f32 %f2705, %f538, %f2698, %f2704; - fma.rn.f32 %f2706, %f547, %f2702, %f2705; - sub.f32 %f2707, %f2660, %f2706; - div.rn.f32 %f2708, %f2707, %f2657; - fma.rn.f32 %f2709, %f554, %f2708, 0f00000000; - sub.f32 %f2710, %f2702, %f2709; - mul.f32 %f2711, %f546, %f2710; - fma.rn.f32 %f2712, %f542, %f2711, 0f00000000; - fma.rn.f32 %f2713, %f551, %f2708, %f2712; - sub.f32 %f2714, %f2698, %f2713; - mul.f32 %f2715, %f536, %f2714; - fma.rn.f32 %f2716, %f532, %f2715, 0f00000000; - fma.rn.f32 %f2717, %f539, %f2711, %f2716; - fma.rn.f32 %f2718, %f548, %f2708, %f2717; - sub.f32 %f2719, %f2695, %f2718; - mul.f32 %f2720, %f528, %f2719; - sub.f32 %f2721, %f2660, %f2694; - fma.rn.f32 %f2722, %f529, %f2721, %f2696; - sub.f32 %f2723, %f2691, %f2722; - fma.rn.f32 %f2724, %f530, %f2721, %f2699; - fma.rn.f32 %f2725, %f537, %f2723, %f2724; - sub.f32 %f2726, %f2660, %f2725; - fma.rn.f32 %f2727, %f531, %f2721, %f2703; - fma.rn.f32 %f2728, %f538, %f2723, %f2727; - fma.rn.f32 %f2729, %f547, %f2726, %f2728; - sub.f32 %f2730, %f2660, %f2729; - div.rn.f32 %f2731, %f2730, %f2657; - fma.rn.f32 %f2732, %f554, %f2731, 0f00000000; - sub.f32 %f2733, %f2726, %f2732; - mul.f32 %f2734, %f546, %f2733; - fma.rn.f32 %f2735, %f542, %f2734, 0f00000000; - fma.rn.f32 %f2736, %f551, %f2731, %f2735; - sub.f32 %f2737, %f2723, %f2736; - mul.f32 %f2738, %f536, %f2737; - sub.f32 %f2739, %f2660, %f2722; - fma.rn.f32 %f2740, %f537, %f2739, %f2724; - sub.f32 %f2741, %f2691, %f2740; - fma.rn.f32 %f2742, %f538, %f2739, %f2727; - fma.rn.f32 %f2743, %f547, %f2741, %f2742; - sub.f32 %f2744, %f2660, %f2743; - div.rn.f32 %f2745, %f2744, %f2657; - fma.rn.f32 %f2746, %f554, %f2745, 0f00000000; - sub.f32 %f2747, %f2741, %f2746; - mul.f32 %f2748, %f546, %f2747; - sub.f32 %f2749, %f2660, %f2740; - fma.rn.f32 %f2750, %f547, %f2749, %f2742; - sub.f32 %f2751, %f2691, %f2750; - div.rn.f32 %f2752, %f2751, %f2657; - cvta.to.global.u64 %rd75, %rd91; - mul.wide.s32 %rd76, %r278, 4; - add.s64 %rd77, %rd75, %rd76; - st.global.f32 [%rd77], %f3080; - shl.b32 %r274, %r282, 2; - cvt.s64.s32 %rd78, %r274; - add.s64 %rd79, %rd77, %rd78; - st.global.f32 [%rd79], %f3079; - add.s64 %rd80, %rd79, %rd78; - st.global.f32 [%rd80], %f3078; - add.s64 %rd81, %rd80, %rd78; - st.global.f32 [%rd81], %f2993; - add.s64 %rd82, %rd81, %rd78; - st.global.f32 [%rd82], %f3076; - cvta.to.global.u64 %rd83, %rd92; - add.s64 %rd84, %rd83, %rd76; - st.global.f32 [%rd84], %f2693; - add.s64 %rd85, %rd84, %rd78; - st.global.f32 [%rd85], %f2720; - add.s64 %rd86, %rd85, %rd78; - st.global.f32 [%rd86], %f2738; - add.s64 %rd87, %rd86, %rd78; - st.global.f32 [%rd87], %f2748; - add.s64 %rd88, %rd87, %rd78; - st.global.f32 [%rd88], %f2752; - cvta.to.global.u64 %rd89, %rd93; - add.s64 %rd90, %rd89, %rd76; - st.global.f32 [%rd90], %f3124; - -BB1_218: + cvt.rn.f32.u32 %f582, %r195; + add.f32 %f583, %f580, %f582; + setp.gt.f32 %p108, %f581, 0f3FB504F3; + mul.f32 %f584, %f581, 0f3F000000; + add.f32 %f585, %f583, 0f3F800000; + selp.f32 %f586, %f585, %f583, %p108; + selp.f32 %f587, %f584, %f581, %p108; + add.f32 %f588, %f587, 0fBF800000; + add.f32 %f589, %f587, 0f3F800000; + rcp.approx.ftz.f32 %f590, %f589; + add.f32 %f591, %f588, %f588; + mul.f32 %f592, %f591, %f590; + mul.f32 %f593, %f592, %f592; + mov.f32 %f594, 0f3C4CAF63; + mov.f32 %f595, 0f3B18F0FE; + fma.rn.f32 %f596, %f595, %f593, %f594; + mov.f32 %f597, 0f3DAAAABD; + fma.rn.f32 %f598, %f596, %f593, %f597; + mul.rn.f32 %f599, %f598, %f593; + mul.rn.f32 %f600, %f599, %f592; + sub.f32 %f601, %f588, %f592; + add.f32 %f602, %f601, %f601; + neg.f32 %f603, %f592; + fma.rn.f32 %f604, %f603, %f588, %f602; + mul.rn.f32 %f605, %f590, %f604; + add.f32 %f606, %f600, %f592; + sub.f32 %f607, %f592, %f606; + add.f32 %f608, %f600, %f607; + add.f32 %f609, %f605, %f608; + add.f32 %f610, %f606, %f609; + sub.f32 %f611, %f606, %f610; + add.f32 %f612, %f609, %f611; + mov.f32 %f613, 0f3F317200; + mul.rn.f32 %f614, %f586, %f613; + mov.f32 %f615, 0f35BFBE8E; + mul.rn.f32 %f616, %f586, %f615; + add.f32 %f617, %f614, %f610; + sub.f32 %f618, %f614, %f617; + add.f32 %f619, %f610, %f618; + add.f32 %f620, %f612, %f619; + add.f32 %f621, %f616, %f620; + add.f32 %f622, %f617, %f621; + sub.f32 %f623, %f617, %f622; + add.f32 %f624, %f621, %f623; + mul.rn.f32 %f625, %f576, %f622; + neg.f32 %f626, %f625; + fma.rn.f32 %f627, %f576, %f622, %f626; + fma.rn.f32 %f628, %f576, %f624, %f627; + fma.rn.f32 %f630, %f2648, %f622, %f628; + add.rn.f32 %f631, %f625, %f630; + neg.f32 %f632, %f631; + add.rn.f32 %f633, %f625, %f632; + add.rn.f32 %f634, %f633, %f630; + mov.b32 %r196, %f631; + setp.eq.s32 %p109, %r196, 1118925336; + add.s32 %r197, %r196, -1; + mov.b32 %f635, %r197; + add.f32 %f636, %f634, 0f37000000; + selp.f32 %f83, %f636, %f634, %p109; + selp.f32 %f637, %f635, %f631, %p109; + mov.f32 %f638, 0f3FB8AA3B; + mul.rn.f32 %f639, %f637, %f638; + cvt.rzi.f32.f32 %f640, %f639; + abs.f32 %f641, %f640; + setp.gt.f32 %p110, %f641, 0f42FC0000; + mov.b32 %r198, %f640; + and.b32 %r199, %r198, -2147483648; + or.b32 %r200, %r199, 1123811328; + mov.b32 %f642, %r200; + selp.f32 %f643, %f642, %f640, %p110; + mov.f32 %f644, 0fBF317218; + fma.rn.f32 %f645, %f643, %f644, %f637; + mov.f32 %f646, 0f3102E308; + fma.rn.f32 %f647, %f643, %f646, %f645; + mul.f32 %f648, %f647, 0f3FB8AA3B; + add.f32 %f649, %f643, 0f4B40007F; + mov.b32 %r201, %f649; + shl.b32 %r202, %r201, 23; + mov.b32 %f650, %r202; + ex2.approx.ftz.f32 %f651, %f648; + mul.f32 %f84, %f651, %f650; + setp.lt.f32 %p111, %f80, 0f00000000; + and.pred %p4, %p111, %p106; + div.rn.f32 %f85, %f76, %f2864; + abs.f32 %f86, %f85; + setp.lt.f32 %p112, %f86, 0f00800000; + mul.f32 %f652, %f86, 0f4B800000; + selp.f32 %f653, %f652, %f86, %p112; + selp.f32 %f654, 0fC3170000, 0fC2FE0000, %p112; + mov.b32 %r203, %f653; + and.b32 %r204, %r203, 8388607; + or.b32 %r205, %r204, 1065353216; + mov.b32 %f655, %r205; + shr.u32 %r206, %r203, 23; + cvt.rn.f32.u32 %f656, %r206; + add.f32 %f657, %f654, %f656; + setp.gt.f32 %p113, %f655, 0f3FB504F3; + mul.f32 %f658, %f655, 0f3F000000; + add.f32 %f659, %f657, 0f3F800000; + selp.f32 %f660, %f659, %f657, %p113; + selp.f32 %f661, %f658, %f655, %p113; + add.f32 %f662, %f661, 0fBF800000; + add.f32 %f663, %f661, 0f3F800000; + rcp.approx.ftz.f32 %f664, %f663; + add.f32 %f665, %f662, %f662; + mul.f32 %f666, %f665, %f664; + mul.f32 %f667, %f666, %f666; + fma.rn.f32 %f668, %f595, %f667, %f594; + fma.rn.f32 %f669, %f668, %f667, %f597; + mul.rn.f32 %f670, %f669, %f667; + mul.rn.f32 %f671, %f670, %f666; + sub.f32 %f672, %f662, %f666; + add.f32 %f673, %f672, %f672; + neg.f32 %f674, %f666; + fma.rn.f32 %f675, %f674, %f662, %f673; + mul.rn.f32 %f676, %f664, %f675; + add.f32 %f677, %f671, %f666; + sub.f32 %f678, %f666, %f677; + add.f32 %f679, %f671, %f678; + add.f32 %f680, %f676, %f679; + add.f32 %f681, %f677, %f680; + sub.f32 %f682, %f677, %f681; + add.f32 %f683, %f680, %f682; + mul.rn.f32 %f684, %f660, %f613; + mul.rn.f32 %f685, %f660, %f615; + add.f32 %f686, %f684, %f681; + sub.f32 %f687, %f684, %f686; + add.f32 %f688, %f681, %f687; + add.f32 %f689, %f683, %f688; + add.f32 %f690, %f685, %f689; + add.f32 %f691, %f686, %f690; + sub.f32 %f692, %f686, %f691; + add.f32 %f693, %f690, %f692; + mul.rn.f32 %f694, %f576, %f691; + neg.f32 %f695, %f694; + fma.rn.f32 %f696, %f576, %f691, %f695; + fma.rn.f32 %f697, %f576, %f693, %f696; + fma.rn.f32 %f698, %f2648, %f691, %f697; + add.rn.f32 %f699, %f694, %f698; + neg.f32 %f700, %f699; + add.rn.f32 %f701, %f694, %f700; + add.rn.f32 %f702, %f701, %f698; + mov.b32 %r207, %f699; + setp.eq.s32 %p114, %r207, 1118925336; + add.s32 %r208, %r207, -1; + mov.b32 %f703, %r208; + add.f32 %f704, %f702, 0f37000000; + selp.f32 %f87, %f704, %f702, %p114; + selp.f32 %f705, %f703, %f699, %p114; + mul.rn.f32 %f706, %f705, %f638; + cvt.rzi.f32.f32 %f707, %f706; + abs.f32 %f708, %f707; + setp.gt.f32 %p115, %f708, 0f42FC0000; + mov.b32 %r209, %f707; + and.b32 %r210, %r209, -2147483648; + or.b32 %r211, %r210, 1123811328; + mov.b32 %f709, %r211; + selp.f32 %f710, %f709, %f707, %p115; + fma.rn.f32 %f711, %f710, %f644, %f705; + fma.rn.f32 %f712, %f710, %f646, %f711; + mul.f32 %f713, %f712, 0f3FB8AA3B; + add.f32 %f714, %f710, 0f4B40007F; + mov.b32 %r212, %f714; + shl.b32 %r213, %r212, 23; + mov.b32 %f715, %r213; + ex2.approx.ftz.f32 %f716, %f713; + mul.f32 %f88, %f716, %f715; + { + .reg .b32 %temp; + mov.b64 {%temp, %r53}, %fd477; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r54}, %fd309; + } + and.b32 %r55, %r54, 2146435072; + setp.eq.s32 %p117, %r55, 1073741824; + abs.f64 %fd310, %fd477; + { // callseq 11, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd310; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd37, [retval0+0]; + } // callseq 11 + setp.lt.s32 %p118, %r53, 0; + and.pred %p6, %p118, %p117; + selp.b32 %r214, %r53, 0, %p117; + setp.lt.s32 %p119, %r54, 0; + or.b32 %r215, %r214, 2146435072; + selp.b32 %r56, %r215, %r214, %p119; + { + .reg .b32 %temp; + mov.b64 {%temp, %r216}, %fd35; + } + and.b32 %r57, %r216, 2146435072; + setp.ne.s32 %p120, %r57, 2146435072; + setp.gtu.f64 %p121, %fd310, 0d7FF0000000000000; + and.b32 %r58, %r54, 2147483647; + setp.gt.f64 %p122, %fd310, 0d3FF0000000000000; + selp.b32 %r217, 2146435072, 0, %p122; + xor.b32 %r218, %r217, 2146435072; + selp.b32 %r219, %r218, %r217, %p119; + setp.eq.f32 %p123, %f2864, 0fBF800000; + selp.b32 %r59, 1072693248, %r219, %p123; + setp.gt.s32 %p124, %r54, -1; + selp.b32 %r60, 2146435072, 0, %p124; + setp.ne.s32 %p125, %r58, 1071644672; + and.pred %p126, %p125, %p6; + or.b32 %r61, %r60, -2147483648; + selp.b32 %r62, %r61, %r60, %p126; + add.f32 %f717, %f535, 0f3F800000; + sub.f32 %f718, %f717, %f2868; + div.rn.f32 %f89, %f718, %f2864; + abs.f32 %f90, %f89; + setp.lt.f32 %p127, %f90, 0f00800000; + mul.f32 %f719, %f90, 0f4B800000; + selp.f32 %f720, %f719, %f90, %p127; + selp.f32 %f721, 0fC3170000, 0fC2FE0000, %p127; + mov.b32 %r220, %f720; + and.b32 %r221, %r220, 8388607; + or.b32 %r222, %r221, 1065353216; + mov.b32 %f722, %r222; + shr.u32 %r223, %r220, 23; + cvt.rn.f32.u32 %f723, %r223; + add.f32 %f724, %f721, %f723; + setp.gt.f32 %p128, %f722, 0f3FB504F3; + mul.f32 %f725, %f722, 0f3F000000; + add.f32 %f726, %f724, 0f3F800000; + selp.f32 %f727, %f726, %f724, %p128; + selp.f32 %f728, %f725, %f722, %p128; + add.f32 %f729, %f728, 0fBF800000; + add.f32 %f730, %f728, 0f3F800000; + rcp.approx.ftz.f32 %f731, %f730; + add.f32 %f732, %f729, %f729; + mul.f32 %f733, %f732, %f731; + mul.f32 %f734, %f733, %f733; + fma.rn.f32 %f735, %f595, %f734, %f594; + fma.rn.f32 %f736, %f735, %f734, %f597; + mul.rn.f32 %f737, %f736, %f734; + mul.rn.f32 %f738, %f737, %f733; + sub.f32 %f739, %f729, %f733; + add.f32 %f740, %f739, %f739; + neg.f32 %f741, %f733; + fma.rn.f32 %f742, %f741, %f729, %f740; + mul.rn.f32 %f743, %f731, %f742; + add.f32 %f744, %f738, %f733; + sub.f32 %f745, %f733, %f744; + add.f32 %f746, %f738, %f745; + add.f32 %f747, %f743, %f746; + add.f32 %f748, %f744, %f747; + sub.f32 %f749, %f744, %f748; + add.f32 %f750, %f747, %f749; + mul.rn.f32 %f751, %f727, %f613; + mul.rn.f32 %f752, %f727, %f615; + add.f32 %f753, %f751, %f748; + sub.f32 %f754, %f751, %f753; + add.f32 %f755, %f748, %f754; + add.f32 %f756, %f750, %f755; + add.f32 %f757, %f752, %f756; + add.f32 %f758, %f753, %f757; + sub.f32 %f759, %f753, %f758; + add.f32 %f760, %f757, %f759; + mul.rn.f32 %f761, %f576, %f758; + neg.f32 %f762, %f761; + fma.rn.f32 %f763, %f576, %f758, %f762; + fma.rn.f32 %f764, %f576, %f760, %f763; + fma.rn.f32 %f765, %f2648, %f758, %f764; + add.rn.f32 %f766, %f761, %f765; + neg.f32 %f767, %f766; + add.rn.f32 %f768, %f761, %f767; + add.rn.f32 %f769, %f768, %f765; + mov.b32 %r224, %f766; + setp.eq.s32 %p129, %r224, 1118925336; + add.s32 %r225, %r224, -1; + mov.b32 %f770, %r225; + add.f32 %f771, %f769, 0f37000000; + selp.f32 %f91, %f771, %f769, %p129; + selp.f32 %f772, %f770, %f766, %p129; + mul.rn.f32 %f773, %f772, %f638; + cvt.rzi.f32.f32 %f774, %f773; + abs.f32 %f775, %f774; + setp.gt.f32 %p130, %f775, 0f42FC0000; + mov.b32 %r226, %f774; + and.b32 %r227, %r226, -2147483648; + or.b32 %r228, %r227, 1123811328; + mov.b32 %f776, %r228; + selp.f32 %f777, %f776, %f774, %p130; + fma.rn.f32 %f778, %f777, %f644, %f772; + fma.rn.f32 %f779, %f777, %f646, %f778; + mul.f32 %f780, %f779, 0f3FB8AA3B; + add.f32 %f781, %f777, 0f4B40007F; + mov.b32 %r229, %f781; + shl.b32 %r230, %r229, 23; + mov.b32 %f782, %r230; + ex2.approx.ftz.f32 %f783, %f780; + mul.f32 %f92, %f783, %f782; + div.rn.f32 %f93, %f71, %f2864; + abs.f32 %f94, %f93; + setp.lt.f32 %p132, %f94, 0f00800000; + mul.f32 %f784, %f94, 0f4B800000; + selp.f32 %f785, %f784, %f94, %p132; + selp.f32 %f786, 0fC3170000, 0fC2FE0000, %p132; + mov.b32 %r231, %f785; + and.b32 %r232, %r231, 8388607; + or.b32 %r233, %r232, 1065353216; + mov.b32 %f787, %r233; + shr.u32 %r234, %r231, 23; + cvt.rn.f32.u32 %f788, %r234; + add.f32 %f789, %f786, %f788; + setp.gt.f32 %p133, %f787, 0f3FB504F3; + mul.f32 %f790, %f787, 0f3F000000; + add.f32 %f791, %f789, 0f3F800000; + selp.f32 %f792, %f791, %f789, %p133; + selp.f32 %f793, %f790, %f787, %p133; + add.f32 %f794, %f793, 0fBF800000; + add.f32 %f795, %f793, 0f3F800000; + rcp.approx.ftz.f32 %f796, %f795; + add.f32 %f797, %f794, %f794; + mul.f32 %f798, %f797, %f796; + mul.f32 %f799, %f798, %f798; + fma.rn.f32 %f800, %f595, %f799, %f594; + fma.rn.f32 %f801, %f800, %f799, %f597; + mul.rn.f32 %f802, %f801, %f799; + mul.rn.f32 %f803, %f802, %f798; + sub.f32 %f804, %f794, %f798; + add.f32 %f805, %f804, %f804; + neg.f32 %f806, %f798; + fma.rn.f32 %f807, %f806, %f794, %f805; + mul.rn.f32 %f808, %f796, %f807; + add.f32 %f809, %f803, %f798; + sub.f32 %f810, %f798, %f809; + add.f32 %f811, %f803, %f810; + add.f32 %f812, %f808, %f811; + add.f32 %f813, %f809, %f812; + sub.f32 %f814, %f809, %f813; + add.f32 %f815, %f812, %f814; + mul.rn.f32 %f816, %f792, %f613; + mul.rn.f32 %f817, %f792, %f615; + add.f32 %f818, %f816, %f813; + sub.f32 %f819, %f816, %f818; + add.f32 %f820, %f813, %f819; + add.f32 %f821, %f815, %f820; + add.f32 %f822, %f817, %f821; + add.f32 %f823, %f818, %f822; + sub.f32 %f824, %f818, %f823; + add.f32 %f825, %f822, %f824; + mul.rn.f32 %f826, %f576, %f823; + neg.f32 %f827, %f826; + fma.rn.f32 %f828, %f576, %f823, %f827; + fma.rn.f32 %f829, %f576, %f825, %f828; + fma.rn.f32 %f830, %f2648, %f823, %f829; + add.rn.f32 %f831, %f826, %f830; + neg.f32 %f832, %f831; + add.rn.f32 %f833, %f826, %f832; + add.rn.f32 %f834, %f833, %f830; + mov.b32 %r235, %f831; + setp.eq.s32 %p134, %r235, 1118925336; + add.s32 %r236, %r235, -1; + mov.b32 %f835, %r236; + add.f32 %f836, %f834, 0f37000000; + selp.f32 %f95, %f836, %f834, %p134; + selp.f32 %f837, %f835, %f831, %p134; + mul.rn.f32 %f838, %f837, %f638; + cvt.rzi.f32.f32 %f839, %f838; + abs.f32 %f840, %f839; + setp.gt.f32 %p135, %f840, 0f42FC0000; + mov.b32 %r237, %f839; + and.b32 %r238, %r237, -2147483648; + or.b32 %r239, %r238, 1123811328; + mov.b32 %f841, %r239; + selp.f32 %f842, %f841, %f839, %p135; + fma.rn.f32 %f843, %f842, %f644, %f837; + fma.rn.f32 %f844, %f842, %f646, %f843; + mul.f32 %f845, %f844, 0f3FB8AA3B; + add.f32 %f846, %f842, 0f4B40007F; + mov.b32 %r240, %f846; + shl.b32 %r241, %r240, 23; + mov.b32 %f847, %r241; + ex2.approx.ftz.f32 %f848, %f845; + mul.f32 %f96, %f848, %f847; + mov.f64 %fd311, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r242}, %fd311; + } + and.b32 %r243, %r242, 2146435072; + setp.eq.s32 %p137, %r243, 1074790400; + { // callseq 12, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd310; + .param .b64 param1; + st.param.f64 [param1+0], %fd311; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd38, [retval0+0]; + } // callseq 12 + and.pred %p9, %p118, %p137; + selp.b32 %r244, %r53, 0, %p137; + setp.lt.s32 %p138, %r242, 0; + or.b32 %r245, %r244, 2146435072; + selp.b32 %r63, %r245, %r244, %p138; + add.f64 %fd312, %fd477, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r246}, %fd312; + } + and.b32 %r64, %r246, 2146435072; + setp.ne.s32 %p139, %r64, 2146435072; + cvt.f64.f32 %fd39, %f72; + { + .reg .b32 %temp; + mov.b64 {%temp, %r65}, %fd39; + } + abs.f64 %fd313, %fd39; + { // callseq 13, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd313; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd40, [retval0+0]; + } // callseq 13 + setp.lt.s32 %p140, %r65, 0; + and.pred %p10, %p140, %p117; + and.b32 %r66, %r242, 2147483647; + selp.b32 %r247, %r218, %r217, %p138; + selp.b32 %r67, 1072693248, %r247, %p123; + add.f64 %fd41, %fd39, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r248}, %fd41; + } + and.b32 %r68, %r248, 2146435072; + setp.ne.s32 %p141, %r68, 2146435072; + setp.gt.s32 %p142, %r242, -1; + selp.b32 %r249, 2146435072, 0, %p142; + setp.ne.s32 %p143, %r66, 1071644672; + and.pred %p144, %p143, %p9; + or.b32 %r250, %r249, -2147483648; + selp.b32 %r69, %r250, %r249, %p144; + setp.gtu.f64 %p145, %fd313, 0d7FF0000000000000; + cvt.f64.f32 %fd42, %f76; + { + .reg .b32 %temp; + mov.b64 {%temp, %r70}, %fd42; + } + abs.f64 %fd314, %fd42; + { // callseq 14, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd314; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd43, [retval0+0]; + } // callseq 14 + setp.lt.s32 %p146, %r70, 0; + and.pred %p11, %p146, %p117; + setp.gt.f64 %p147, %fd313, 0d3FF0000000000000; + selp.b32 %r251, 2146435072, 0, %p147; + xor.b32 %r252, %r251, 2146435072; + selp.b32 %r253, %r252, %r251, %p119; + setp.eq.f32 %p148, %f72, 0fBF800000; + selp.b32 %r71, 1072693248, %r253, %p148; + add.f64 %fd44, %fd42, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r254}, %fd44; + } + and.b32 %r72, %r254, 2146435072; + setp.ne.s32 %p149, %r72, 2146435072; + setp.gtu.f64 %p150, %fd314, 0d7FF0000000000000; + setp.gt.f64 %p151, %fd314, 0d3FF0000000000000; + selp.b32 %r255, 2146435072, 0, %p151; + xor.b32 %r256, %r255, 2146435072; + selp.b32 %r257, %r256, %r255, %p119; + setp.eq.f32 %p152, %f76, 0fBF800000; + selp.b32 %r73, 1072693248, %r257, %p152; + mov.f64 %fd315, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r74}, %fd315; + } + and.b32 %r75, %r74, 2147483647; + setp.gt.s32 %p153, %r74, -1; + selp.b32 %r76, 2146435072, 0, %p153; + or.pred %p14, %p120, %p121; + or.pred %p17, %p139, %p121; + or.pred %p18, %p141, %p145; + or.pred %p19, %p149, %p150; + shr.s32 %r258, %r74, 31; + and.b32 %r77, %r258, 2146435072; + +$L__BB1_56: + cvt.rn.f32.s32 %f2653, %r781; + sub.f32 %f2652, %f2653, %f2868; + add.f32 %f2651, %f2652, 0f3F000000; + mul.f32 %f2650, %f2651, %f73; + abs.f32 %f2649, %f2650; + setp.ltu.f32 %p154, %f2649, 0f3F8060FE; + mov.f32 %f2828, %f75; + @%p154 bra $L__BB1_58; + + mov.f32 %f2758, 0f3F800000; + ex2.approx.ftz.f32 %f849, %f75; + sub.f32 %f851, %f2758, %f849; + mov.b32 %r259, %f851; + or.b32 %r260, %r51, %r259; + mov.b32 %f2828, %r260; + +$L__BB1_58: + cvt.rn.f32.s32 %f2658, %r781; + sub.f32 %f2657, %f2658, %f2868; + add.f32 %f2656, %f2657, 0fBF000000; + mul.f32 %f2655, %f2656, %f73; + abs.f32 %f2654, %f2655; + setp.ltu.f32 %p155, %f2654, 0f3F8060FE; + mov.f32 %f2829, %f78; + @%p155 bra $L__BB1_60; + + mov.f32 %f2757, 0f3F800000; + ex2.approx.ftz.f32 %f852, %f78; + sub.f32 %f854, %f2757, %f852; + mov.b32 %r261, %f854; + or.b32 %r262, %r52, %r261; + mov.b32 %f2829, %r262; + +$L__BB1_60: + sub.f32 %f855, %f2828, %f2829; + mul.f32 %f111, %f855, 0f3F000000; + cvt.rn.f32.s32 %f112, %r782; + sub.f32 %f113, %f112, %f2867; + add.f32 %f114, %f113, 0f3F000000; + mul.f32 %f115, %f73, %f114; + abs.f32 %f856, %f115; + setp.ltu.f32 %p156, %f856, 0f3F8060FE; + setp.ge.f32 %p157, %f856, 0f3F8060FE; + mul.f32 %f857, %f115, %f115; + selp.f32 %f858, %f856, %f857, %p157; + selp.f32 %f859, 0f3789CA3C, 0f38B1E96A, %p157; + selp.f32 %f860, 0fB9F560B9, 0fBA574D20, %p157; + fma.rn.f32 %f861, %f859, %f858, %f860; + selp.f32 %f862, 0f3BAC840B, 0f3BAAD5EA, %p157; + fma.rn.f32 %f863, %f861, %f858, %f862; + selp.f32 %f864, 0fBD0C8162, 0fBCDC1BE7, %p157; + fma.rn.f32 %f865, %f863, %f858, %f864; + selp.f32 %f866, 0f3E1CF906, 0f3DE718AF, %p157; + fma.rn.f32 %f867, %f865, %f858, %f866; + selp.f32 %f868, 0f3F6A937E, 0fBEC093AC, %p157; + fma.rn.f32 %f869, %f867, %f858, %f868; + selp.f32 %f870, 0f3F20D842, 0f3E0375D3, %p157; + fma.rn.f32 %f871, %f869, %f858, %f870; + neg.f32 %f872, %f856; + selp.f32 %f873, %f872, %f115, %p157; + fma.rn.f32 %f2830, %f871, %f873, %f873; + @%p156 bra $L__BB1_62; + + mov.f32 %f2756, 0f3F800000; + ex2.approx.ftz.f32 %f874, %f2830; + sub.f32 %f876, %f2756, %f874; + mov.b32 %r263, %f876; + mov.b32 %r264, %f115; + and.b32 %r265, %r264, -2147483648; + or.b32 %r266, %r265, %r263; + mov.b32 %f2830, %r266; + +$L__BB1_62: + cvt.rn.f32.s32 %f2660, %r782; + sub.f32 %f2659, %f2660, %f2867; + add.f32 %f119, %f2659, 0fBF000000; + mul.f32 %f120, %f73, %f119; + abs.f32 %f877, %f120; + setp.ltu.f32 %p158, %f877, 0f3F8060FE; + setp.ge.f32 %p159, %f877, 0f3F8060FE; + mul.f32 %f878, %f120, %f120; + selp.f32 %f879, %f877, %f878, %p159; + selp.f32 %f880, 0f3789CA3C, 0f38B1E96A, %p159; + selp.f32 %f881, 0fB9F560B9, 0fBA574D20, %p159; + fma.rn.f32 %f882, %f880, %f879, %f881; + selp.f32 %f883, 0f3BAC840B, 0f3BAAD5EA, %p159; + fma.rn.f32 %f884, %f882, %f879, %f883; + selp.f32 %f885, 0fBD0C8162, 0fBCDC1BE7, %p159; + fma.rn.f32 %f886, %f884, %f879, %f885; + selp.f32 %f887, 0f3E1CF906, 0f3DE718AF, %p159; + fma.rn.f32 %f888, %f886, %f879, %f887; + selp.f32 %f889, 0f3F6A937E, 0fBEC093AC, %p159; + fma.rn.f32 %f890, %f888, %f879, %f889; + selp.f32 %f891, 0f3F20D842, 0f3E0375D3, %p159; + fma.rn.f32 %f892, %f890, %f879, %f891; + neg.f32 %f893, %f877; + selp.f32 %f894, %f893, %f120, %p159; + fma.rn.f32 %f2831, %f892, %f894, %f894; + @%p158 bra $L__BB1_64; + + mov.f32 %f2755, 0f3F800000; + ex2.approx.ftz.f32 %f895, %f2831; + sub.f32 %f897, %f2755, %f895; + mov.b32 %r267, %f897; + mov.b32 %r268, %f120; + and.b32 %r269, %r268, -2147483648; + or.b32 %r270, %r269, %r267; + mov.b32 %f2831, %r270; + +$L__BB1_64: + sub.f32 %f899, %f2830, %f2831; + mul.f32 %f124, %f899, 0f3F000000; + mul.f32 %f900, %f111, %f2866; + fma.rn.f32 %f125, %f124, %f900, %f2865; + mad.lo.s32 %r271, %r782, %r102, %r781; + add.s32 %r272, %r271, %r2; + mul.wide.s32 %rd26, %r272, 4; + add.s64 %rd27, %rd1, %rd26; + ld.global.f32 %f126, [%rd27]; + setp.eq.f32 %p160, %f84, 0f7F800000; + mov.f32 %f2832, 0f7F800000; + @%p160 bra $L__BB1_66; + + fma.rn.f32 %f2832, %f84, %f83, %f84; + +$L__BB1_66: + setp.geu.f32 %p660, %f80, 0f00000000; + mov.b32 %r273, %f2832; + xor.b32 %r274, %r273, -2147483648; + mov.b32 %f901, %r274; + selp.f32 %f129, %f901, %f2832, %p4; + add.f32 %f902, %f80, %f80; + selp.f32 %f903, %f902, 0f00000000, %p106; + setp.eq.f32 %p162, %f80, 0f00000000; + selp.f32 %f2833, %f903, %f129, %p162; + @%p660 bra $L__BB1_69; + + cvt.rzi.f32.f32 %f905, %f576; + setp.eq.f32 %p163, %f905, 0f40000000; + mov.f32 %f2833, %f129; + @%p163 bra $L__BB1_69; + + mov.f32 %f2833, 0f7FFFFFFF; + +$L__BB1_69: + mov.f32 %f2663, 0f3FB8AA3B; + mov.f32 %f2662, 0f3F000000; + abs.f32 %f2661, %f80; + add.f32 %f908, %f2661, 0f40000000; + mov.b32 %r275, %f908; + setp.gt.s32 %p164, %r275, 2139095039; + add.f32 %f909, %f80, 0f40000000; + setp.gtu.f32 %p165, %f2661, 0f7F800000; + mov.f32 %f2834, 0f7F800000; + selp.f32 %f910, %f909, %f2833, %p165; + selp.f32 %f911, 0fFF800000, 0f7F800000, %p4; + setp.neu.f32 %p166, %f2661, 0f7F800000; + selp.f32 %f912, %f910, %f911, %p166; + selp.f32 %f913, %f912, %f2833, %p164; + mul.f32 %f914, %f913, 0fBF000000; + setp.eq.f32 %p167, %f80, 0f3F800000; + selp.f32 %f915, 0fBF000000, %f914, %p167; + mov.f32 %f917, 0f3BBB989D; + fma.rn.f32 %f918, %f915, %f917, %f2662; + mov.f32 %f920, 0f437C0000; + cvt.sat.f32.f32 %f921, %f918; + mov.f32 %f922, 0f4B400001; + fma.rm.f32 %f923, %f921, %f920, %f922; + add.f32 %f924, %f923, 0fCB40007F; + neg.f32 %f925, %f924; + fma.rn.f32 %f926, %f915, %f2663, %f925; + mov.f32 %f927, 0f32A57060; + fma.rn.f32 %f928, %f915, %f927, %f926; + mov.b32 %r276, %f923; + shl.b32 %r277, %r276, 23; + mov.b32 %f929, %r277; + ex2.approx.ftz.f32 %f930, %f928; + mul.f32 %f132, %f930, %f929; + setp.eq.f32 %p168, %f88, 0f7F800000; + @%p168 bra $L__BB1_71; + + fma.rn.f32 %f2834, %f88, %f87, %f88; + +$L__BB1_71: + setp.geu.f32 %p663, %f85, 0f00000000; + setp.lt.f32 %p662, %f85, 0f00000000; + and.pred %p661, %p662, %p106; + mov.b32 %r278, %f2834; + xor.b32 %r279, %r278, -2147483648; + mov.b32 %f931, %r279; + selp.f32 %f135, %f931, %f2834, %p661; + add.f32 %f932, %f85, %f85; + selp.f32 %f933, %f932, 0f00000000, %p106; + setp.eq.f32 %p170, %f85, 0f00000000; + selp.f32 %f2835, %f933, %f135, %p170; + @%p663 bra $L__BB1_74; + + cvt.rzi.f32.f32 %f935, %f576; + setp.eq.f32 %p171, %f935, 0f40000000; + mov.f32 %f2835, %f135; + @%p171 bra $L__BB1_74; + + mov.f32 %f2835, 0f7FFFFFFF; + +$L__BB1_74: + mov.f32 %f2670, 0f32A57060; + mov.f32 %f2669, 0f4B400001; + mov.f32 %f2668, 0f437C0000; + mov.f32 %f2667, 0f3BBB989D; + abs.f32 %f2666, %f85; + setp.lt.f32 %p665, %f85, 0f00000000; + and.pred %p664, %p665, %p106; + mov.f32 %f2665, 0f3FB8AA3B; + mov.f32 %f2664, 0f3F000000; + add.f32 %f937, %f2666, 0f40000000; + mov.b32 %r280, %f937; + setp.gt.s32 %p172, %r280, 2139095039; + add.f32 %f938, %f85, 0f40000000; + setp.gtu.f32 %p173, %f2666, 0f7F800000; + selp.f32 %f939, %f938, %f2835, %p173; + selp.f32 %f940, 0fFF800000, 0f7F800000, %p664; + setp.neu.f32 %p174, %f2666, 0f7F800000; + selp.f32 %f941, %f939, %f940, %p174; + selp.f32 %f942, %f941, %f2835, %p172; + mul.f32 %f943, %f942, 0fBF000000; + setp.eq.f32 %p175, %f85, 0f3F800000; + selp.f32 %f944, 0fBF000000, %f943, %p175; + fma.rn.f32 %f947, %f944, %f2667, %f2664; + cvt.sat.f32.f32 %f950, %f947; + fma.rm.f32 %f952, %f950, %f2668, %f2669; + add.f32 %f953, %f952, 0fCB40007F; + neg.f32 %f954, %f953; + fma.rn.f32 %f955, %f944, %f2665, %f954; + fma.rn.f32 %f957, %f944, %f2670, %f955; + mov.b32 %r281, %f952; + shl.b32 %r282, %r281, 23; + mov.b32 %f958, %r282; + ex2.approx.ftz.f32 %f959, %f957; + mul.f32 %f138, %f959, %f958; + sub.f32 %f960, %f132, %f138; + mul.f32 %f961, %f58, %f960; + mul.f32 %f139, %f124, %f961; + not.pred %p176, %p6; + mov.f64 %fd510, %fd37; + @%p176 bra $L__BB1_76; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r283}, %fd37; + } + xor.b32 %r284, %r283, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r285, %temp}, %fd37; + } + mov.b64 %fd510, {%r285, %r284}; + +$L__BB1_76: + setp.eq.f32 %p177, %f2864, 0f00000000; + @%p177 bra $L__BB1_80; + bra.uni $L__BB1_77; + +$L__BB1_80: + mov.u32 %r286, 0; + mov.b64 %fd510, {%r286, %r56}; + bra.uni $L__BB1_81; + +$L__BB1_77: + setp.gt.s32 %p178, %r53, -1; + @%p178 bra $L__BB1_81; + + cvt.rzi.f64.f64 %fd317, %fd309; + setp.eq.f64 %p179, %fd317, 0d4008000000000000; + @%p179 bra $L__BB1_81; + + mov.f64 %fd510, 0dFFF8000000000000; + +$L__BB1_81: + selp.f64 %fd511, %fd510, %fd35, %p120; + @%p14 bra $L__BB1_86; + + setp.eq.s32 %p181, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r287, %temp}, %fd309; + } + setp.eq.s32 %p182, %r287, 0; + and.pred %p183, %p181, %p182; + @%p183 bra $L__BB1_85; + bra.uni $L__BB1_83; + +$L__BB1_85: + mov.u32 %r291, 0; + mov.b64 %fd511, {%r291, %r59}; + bra.uni $L__BB1_86; + +$L__BB1_83: + and.b32 %r288, %r53, 2147483647; + setp.ne.s32 %p184, %r288, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r289, %temp}, %fd307; + } + setp.ne.s32 %p185, %r289, 0; + or.pred %p186, %p184, %p185; + mov.f64 %fd511, %fd510; + @%p186 bra $L__BB1_86; + + mov.u32 %r290, 0; + mov.b64 %fd511, {%r290, %r62}; + +$L__BB1_86: + mov.f32 %f2681, 0f3102E308; + mov.f32 %f2680, 0fBF317218; + mov.f32 %f2679, 0f35BFBE8E; + mov.f32 %f2678, 0f3F317200; + mov.f32 %f2677, 0f3DAAAABD; + mov.f32 %f2676, 0f3C4CAF63; + mov.f32 %f2675, 0f3B18F0FE; + cvt.rn.f32.s32 %f2674, %r781; + add.f32 %f2673, %f2674, 0f3F000000; + sub.f32 %f2672, %f2673, %f2868; + mov.f32 %f2671, 0f3FB8AA3B; + setp.eq.f32 %p187, %f2864, 0f3F800000; + selp.f64 %fd321, 0d3FF0000000000000, %fd511, %p187; + div.rn.f64 %fd322, %fd34, %fd321; + mul.f32 %f963, %f76, %f138; + mul.f32 %f964, %f2672, %f132; + sub.f32 %f965, %f964, %f963; + cvt.f64.f32 %fd323, %f965; + mul.f64 %fd324, %fd322, %fd323; + cvt.f64.f32 %fd53, %f124; + mul.f64 %fd325, %fd324, %fd53; + cvt.rn.f32.f64 %f140, %fd325; + add.f32 %f966, %f112, 0f3F000000; + sub.f32 %f141, %f966, %f2867; + div.rn.f32 %f142, %f141, %f2864; + abs.f32 %f143, %f142; + setp.lt.f32 %p188, %f143, 0f00800000; + mul.f32 %f967, %f143, 0f4B800000; + selp.f32 %f968, %f967, %f143, %p188; + selp.f32 %f969, 0fC3170000, 0fC2FE0000, %p188; + mov.b32 %r292, %f968; + and.b32 %r293, %r292, 8388607; + or.b32 %r294, %r293, 1065353216; + mov.b32 %f970, %r294; + shr.u32 %r295, %r292, 23; + cvt.rn.f32.u32 %f971, %r295; + add.f32 %f972, %f969, %f971; + setp.gt.f32 %p189, %f970, 0f3FB504F3; + mul.f32 %f973, %f970, 0f3F000000; + add.f32 %f974, %f972, 0f3F800000; + selp.f32 %f975, %f974, %f972, %p189; + selp.f32 %f976, %f973, %f970, %p189; + add.f32 %f977, %f976, 0fBF800000; + add.f32 %f978, %f976, 0f3F800000; + rcp.approx.ftz.f32 %f979, %f978; + add.f32 %f980, %f977, %f977; + mul.f32 %f982, %f980, %f979; + mul.f32 %f983, %f982, %f982; + fma.rn.f32 %f986, %f2675, %f983, %f2676; + fma.rn.f32 %f988, %f986, %f983, %f2677; + mul.rn.f32 %f989, %f988, %f983; + mul.rn.f32 %f990, %f989, %f982; + sub.f32 %f991, %f977, %f982; + add.f32 %f992, %f991, %f991; + neg.f32 %f993, %f982; + fma.rn.f32 %f994, %f993, %f977, %f992; + mul.rn.f32 %f995, %f979, %f994; + add.f32 %f996, %f990, %f982; + sub.f32 %f997, %f982, %f996; + add.f32 %f998, %f990, %f997; + add.f32 %f999, %f995, %f998; + add.f32 %f1000, %f996, %f999; + sub.f32 %f1001, %f996, %f1000; + add.f32 %f1002, %f999, %f1001; + mul.rn.f32 %f1004, %f975, %f2678; + mul.rn.f32 %f1006, %f975, %f2679; + add.f32 %f1007, %f1004, %f1000; + sub.f32 %f1008, %f1004, %f1007; + add.f32 %f1009, %f1000, %f1008; + add.f32 %f1010, %f1002, %f1009; + add.f32 %f1011, %f1006, %f1010; + add.f32 %f1012, %f1007, %f1011; + sub.f32 %f1013, %f1007, %f1012; + add.f32 %f1014, %f1011, %f1013; + mul.rn.f32 %f1015, %f576, %f1012; + neg.f32 %f1016, %f1015; + fma.rn.f32 %f1017, %f576, %f1012, %f1016; + fma.rn.f32 %f1018, %f576, %f1014, %f1017; + mov.f32 %f1019, 0f00000000; + fma.rn.f32 %f1020, %f1019, %f1012, %f1018; + add.rn.f32 %f1021, %f1015, %f1020; + neg.f32 %f1022, %f1021; + add.rn.f32 %f1023, %f1015, %f1022; + add.rn.f32 %f1024, %f1023, %f1020; + mov.b32 %r296, %f1021; + setp.eq.s32 %p190, %r296, 1118925336; + add.s32 %r297, %r296, -1; + mov.b32 %f1025, %r297; + add.f32 %f1026, %f1024, 0f37000000; + selp.f32 %f144, %f1026, %f1024, %p190; + selp.f32 %f1027, %f1025, %f1021, %p190; + mul.rn.f32 %f1029, %f1027, %f2671; + cvt.rzi.f32.f32 %f1030, %f1029; + abs.f32 %f1031, %f1030; + setp.gt.f32 %p191, %f1031, 0f42FC0000; + mov.b32 %r298, %f1030; + and.b32 %r299, %r298, -2147483648; + or.b32 %r300, %r299, 1123811328; + mov.b32 %f1032, %r300; + selp.f32 %f1033, %f1032, %f1030, %p191; + fma.rn.f32 %f1035, %f1033, %f2680, %f1027; + fma.rn.f32 %f1037, %f1033, %f2681, %f1035; + mul.f32 %f1038, %f1037, 0f3FB8AA3B; + add.f32 %f1039, %f1033, 0f4B40007F; + mov.b32 %r301, %f1039; + shl.b32 %r302, %r301, 23; + mov.b32 %f1040, %r302; + ex2.approx.ftz.f32 %f1041, %f1038; + mul.f32 %f145, %f1041, %f1040; + setp.eq.f32 %p192, %f145, 0f7F800000; + mov.f32 %f2836, 0f7F800000; + @%p192 bra $L__BB1_88; + + fma.rn.f32 %f2836, %f145, %f144, %f145; + +$L__BB1_88: + setp.lt.f32 %p193, %f142, 0f00000000; + and.pred %p20, %p193, %p106; + setp.eq.f32 %p195, %f142, 0f00000000; + @%p195 bra $L__BB1_92; + bra.uni $L__BB1_89; + +$L__BB1_92: + add.f32 %f1046, %f142, %f142; + selp.f32 %f2838, %f1046, 0f00000000, %p106; + bra.uni $L__BB1_93; + +$L__BB1_89: + mov.b32 %r303, %f2836; + xor.b32 %r304, %r303, -2147483648; + mov.b32 %f1042, %r304; + selp.f32 %f2838, %f1042, %f2836, %p20; + setp.geu.f32 %p196, %f142, 0f00000000; + @%p196 bra $L__BB1_93; + + cvt.rzi.f32.f32 %f1044, %f576; + setp.eq.f32 %p197, %f1044, 0f40000000; + @%p197 bra $L__BB1_93; + + mov.f32 %f2838, 0f7FFFFFFF; + +$L__BB1_93: + abs.f32 %f2761, %f142; + add.f32 %f1047, %f2761, 0f40000000; + mov.b32 %r305, %f1047; + setp.lt.s32 %p199, %r305, 2139095040; + @%p199 bra $L__BB1_98; + + abs.f32 %f2766, %f142; + setp.gtu.f32 %p200, %f2766, 0f7F800000; + @%p200 bra $L__BB1_97; + bra.uni $L__BB1_95; + +$L__BB1_97: + add.f32 %f2838, %f142, 0f40000000; + bra.uni $L__BB1_98; + +$L__BB1_95: + abs.f32 %f2767, %f142; + setp.neu.f32 %p201, %f2767, 0f7F800000; + @%p201 bra $L__BB1_98; + + selp.f32 %f2838, 0fFF800000, 0f7F800000, %p20; + +$L__BB1_98: + mov.f32 %f2695, 0f00000000; + mov.f32 %f2694, 0f3102E308; + mov.f32 %f2693, 0fBF317218; + mov.f32 %f2692, 0f35BFBE8E; + mov.f32 %f2691, 0f3F317200; + mov.f32 %f2690, 0f3DAAAABD; + mov.f32 %f2689, 0f3C4CAF63; + mov.f32 %f2688, 0f3B18F0FE; + mov.f32 %f2687, 0f32A57060; + mov.f32 %f2686, 0f4B400001; + mov.f32 %f2685, 0f437C0000; + mov.f32 %f2684, 0f3BBB989D; + mov.f32 %f2683, 0f3FB8AA3B; + mov.f32 %f2682, 0f3F000000; + mul.f32 %f1049, %f2838, 0fBF000000; + setp.eq.f32 %p202, %f142, 0f3F800000; + selp.f32 %f1050, 0fBF000000, %f1049, %p202; + fma.rn.f32 %f1053, %f1050, %f2684, %f2682; + cvt.sat.f32.f32 %f1056, %f1053; + fma.rm.f32 %f1058, %f1056, %f2685, %f2686; + add.f32 %f1059, %f1058, 0fCB40007F; + neg.f32 %f1060, %f1059; + fma.rn.f32 %f1061, %f1050, %f2683, %f1060; + fma.rn.f32 %f1063, %f1050, %f2687, %f1061; + mov.b32 %r306, %f1058; + shl.b32 %r307, %r306, 23; + mov.b32 %f1064, %r307; + ex2.approx.ftz.f32 %f1065, %f1063; + mul.f32 %f154, %f1065, %f1064; + div.rn.f32 %f155, %f119, %f2864; + abs.f32 %f156, %f155; + setp.lt.f32 %p203, %f156, 0f00800000; + mul.f32 %f1066, %f156, 0f4B800000; + selp.f32 %f1067, %f1066, %f156, %p203; + selp.f32 %f1068, 0fC3170000, 0fC2FE0000, %p203; + mov.b32 %r308, %f1067; + and.b32 %r309, %r308, 8388607; + or.b32 %r310, %r309, 1065353216; + mov.b32 %f1069, %r310; + shr.u32 %r311, %r308, 23; + cvt.rn.f32.u32 %f1070, %r311; + add.f32 %f1071, %f1068, %f1070; + setp.gt.f32 %p204, %f1069, 0f3FB504F3; + mul.f32 %f1072, %f1069, 0f3F000000; + add.f32 %f1073, %f1071, 0f3F800000; + selp.f32 %f1074, %f1073, %f1071, %p204; + selp.f32 %f1075, %f1072, %f1069, %p204; + add.f32 %f1076, %f1075, 0fBF800000; + add.f32 %f1077, %f1075, 0f3F800000; + rcp.approx.ftz.f32 %f1078, %f1077; + add.f32 %f1079, %f1076, %f1076; + mul.f32 %f1081, %f1079, %f1078; + mul.f32 %f1082, %f1081, %f1081; + fma.rn.f32 %f1085, %f2688, %f1082, %f2689; + fma.rn.f32 %f1087, %f1085, %f1082, %f2690; + mul.rn.f32 %f1088, %f1087, %f1082; + mul.rn.f32 %f1089, %f1088, %f1081; + sub.f32 %f1090, %f1076, %f1081; + add.f32 %f1091, %f1090, %f1090; + neg.f32 %f1092, %f1081; + fma.rn.f32 %f1093, %f1092, %f1076, %f1091; + mul.rn.f32 %f1094, %f1078, %f1093; + add.f32 %f1095, %f1089, %f1081; + sub.f32 %f1096, %f1081, %f1095; + add.f32 %f1097, %f1089, %f1096; + add.f32 %f1098, %f1094, %f1097; + add.f32 %f1099, %f1095, %f1098; + sub.f32 %f1100, %f1095, %f1099; + add.f32 %f1101, %f1098, %f1100; + mul.rn.f32 %f1103, %f1074, %f2691; + mul.rn.f32 %f1105, %f1074, %f2692; + add.f32 %f1106, %f1103, %f1099; + sub.f32 %f1107, %f1103, %f1106; + add.f32 %f1108, %f1099, %f1107; + add.f32 %f1109, %f1101, %f1108; + add.f32 %f1110, %f1105, %f1109; + add.f32 %f1111, %f1106, %f1110; + sub.f32 %f1112, %f1106, %f1111; + add.f32 %f1113, %f1110, %f1112; + mul.rn.f32 %f1114, %f576, %f1111; + neg.f32 %f1115, %f1114; + fma.rn.f32 %f1116, %f576, %f1111, %f1115; + fma.rn.f32 %f1117, %f576, %f1113, %f1116; + fma.rn.f32 %f1119, %f2695, %f1111, %f1117; + add.rn.f32 %f1120, %f1114, %f1119; + neg.f32 %f1121, %f1120; + add.rn.f32 %f1122, %f1114, %f1121; + add.rn.f32 %f1123, %f1122, %f1119; + mov.b32 %r312, %f1120; + setp.eq.s32 %p205, %r312, 1118925336; + add.s32 %r313, %r312, -1; + mov.b32 %f1124, %r313; + add.f32 %f1125, %f1123, 0f37000000; + selp.f32 %f157, %f1125, %f1123, %p205; + selp.f32 %f1126, %f1124, %f1120, %p205; + mul.rn.f32 %f1127, %f1126, %f2683; + cvt.rzi.f32.f32 %f1128, %f1127; + abs.f32 %f1129, %f1128; + setp.gt.f32 %p206, %f1129, 0f42FC0000; + mov.b32 %r314, %f1128; + and.b32 %r315, %r314, -2147483648; + or.b32 %r316, %r315, 1123811328; + mov.b32 %f1130, %r316; + selp.f32 %f1131, %f1130, %f1128, %p206; + fma.rn.f32 %f1133, %f1131, %f2693, %f1126; + fma.rn.f32 %f1135, %f1131, %f2694, %f1133; + mul.f32 %f1136, %f1135, 0f3FB8AA3B; + add.f32 %f1137, %f1131, 0f4B40007F; + mov.b32 %r317, %f1137; + shl.b32 %r318, %r317, 23; + mov.b32 %f1138, %r318; + ex2.approx.ftz.f32 %f1139, %f1136; + mul.f32 %f158, %f1139, %f1138; + setp.eq.f32 %p207, %f158, 0f7F800000; + mov.f32 %f2839, 0f7F800000; + @%p207 bra $L__BB1_100; + + fma.rn.f32 %f2839, %f158, %f157, %f158; + +$L__BB1_100: + setp.lt.f32 %p208, %f155, 0f00000000; + and.pred %p21, %p208, %p106; + setp.eq.f32 %p210, %f155, 0f00000000; + @%p210 bra $L__BB1_104; + bra.uni $L__BB1_101; + +$L__BB1_104: + add.f32 %f1144, %f155, %f155; + selp.f32 %f2841, %f1144, 0f00000000, %p106; + bra.uni $L__BB1_105; + +$L__BB1_101: + mov.b32 %r319, %f2839; + xor.b32 %r320, %r319, -2147483648; + mov.b32 %f1140, %r320; + selp.f32 %f2841, %f1140, %f2839, %p21; + setp.geu.f32 %p211, %f155, 0f00000000; + @%p211 bra $L__BB1_105; + + cvt.rzi.f32.f32 %f1142, %f576; + setp.eq.f32 %p212, %f1142, 0f40000000; + @%p212 bra $L__BB1_105; + + mov.f32 %f2841, 0f7FFFFFFF; + +$L__BB1_105: + abs.f32 %f2768, %f155; + add.f32 %f1145, %f2768, 0f40000000; + mov.b32 %r321, %f1145; + setp.lt.s32 %p214, %r321, 2139095040; + @%p214 bra $L__BB1_110; + + abs.f32 %f2769, %f155; + setp.gtu.f32 %p215, %f2769, 0f7F800000; + @%p215 bra $L__BB1_109; + bra.uni $L__BB1_107; + +$L__BB1_109: + add.f32 %f2841, %f155, 0f40000000; + bra.uni $L__BB1_110; + +$L__BB1_107: + abs.f32 %f2770, %f155; + setp.neu.f32 %p216, %f2770, 0f7F800000; + @%p216 bra $L__BB1_110; + + selp.f32 %f2841, 0fFF800000, 0f7F800000, %p21; + +$L__BB1_110: + mov.f32 %f2701, 0f32A57060; + mov.f32 %f2700, 0f4B400001; + mov.f32 %f2699, 0f437C0000; + mov.f32 %f2698, 0f3BBB989D; + mov.f32 %f2697, 0f3FB8AA3B; + mov.f32 %f2696, 0f3F000000; + mul.f32 %f1146, %f2841, 0fBF000000; + setp.eq.f32 %p217, %f155, 0f3F800000; + selp.f32 %f1147, 0fBF000000, %f1146, %p217; + fma.rn.f32 %f1150, %f1147, %f2698, %f2696; + cvt.sat.f32.f32 %f1153, %f1150; + fma.rm.f32 %f1155, %f1153, %f2699, %f2700; + add.f32 %f1156, %f1155, 0fCB40007F; + neg.f32 %f1157, %f1156; + fma.rn.f32 %f1158, %f1147, %f2697, %f1157; + fma.rn.f32 %f1160, %f1147, %f2701, %f1158; + mov.b32 %r322, %f1155; + shl.b32 %r323, %r322, 23; + mov.b32 %f1161, %r323; + ex2.approx.ftz.f32 %f1162, %f1160; + mul.f32 %f167, %f1162, %f1161; + sub.f32 %f1163, %f154, %f167; + mul.f32 %f1164, %f58, %f1163; + mul.f32 %f168, %f111, %f1164; + mov.f64 %fd513, %fd37; + @%p176 bra $L__BB1_112; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r324}, %fd37; + } + xor.b32 %r325, %r324, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r326, %temp}, %fd37; + } + mov.b64 %fd513, {%r326, %r325}; + +$L__BB1_112: + setp.eq.f32 %p676, %f2864, 0f00000000; + @%p676 bra $L__BB1_116; + bra.uni $L__BB1_113; + +$L__BB1_116: + mov.u32 %r327, 0; + mov.b64 %fd513, {%r327, %r56}; + bra.uni $L__BB1_117; + +$L__BB1_113: + setp.gt.s32 %p220, %r53, -1; + @%p220 bra $L__BB1_117; + + cvt.rzi.f64.f64 %fd327, %fd309; + setp.eq.f64 %p221, %fd327, 0d4008000000000000; + @%p221 bra $L__BB1_117; + + mov.f64 %fd513, 0dFFF8000000000000; + +$L__BB1_117: + selp.f64 %fd514, %fd513, %fd35, %p120; + @%p14 bra $L__BB1_122; + + setp.eq.s32 %p223, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r328, %temp}, %fd309; + } + setp.eq.s32 %p224, %r328, 0; + and.pred %p225, %p223, %p224; + @%p225 bra $L__BB1_121; + bra.uni $L__BB1_119; + +$L__BB1_121: + mov.u32 %r332, 0; + mov.b64 %fd514, {%r332, %r59}; + bra.uni $L__BB1_122; + +$L__BB1_119: + and.b32 %r329, %r53, 2147483647; + setp.ne.s32 %p226, %r329, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r330, %temp}, %fd307; + } + setp.ne.s32 %p227, %r330, 0; + or.pred %p228, %p226, %p227; + mov.f64 %fd514, %fd513; + @%p228 bra $L__BB1_122; + + mov.u32 %r331, 0; + mov.b64 %fd514, {%r331, %r62}; + +$L__BB1_122: + cvt.rn.f32.s32 %f2764, %r782; + add.f32 %f2763, %f2764, 0f3F000000; + sub.f32 %f2762, %f2763, %f2867; + setp.eq.f32 %p677, %f2864, 0f3F800000; + selp.f64 %fd331, 0d3FF0000000000000, %fd514, %p677; + div.rn.f64 %fd332, %fd34, %fd331; + mul.f32 %f1166, %f119, %f167; + mul.f32 %f1167, %f2762, %f154; + sub.f32 %f1168, %f1167, %f1166; + cvt.f64.f32 %fd333, %f1168; + mul.f64 %fd334, %fd332, %fd333; + cvt.f64.f32 %fd335, %f111; + mul.f64 %fd336, %fd334, %fd335; + cvt.rn.f32.f64 %f169, %fd336; + setp.eq.f32 %p230, %f92, 0f7F800000; + mov.f32 %f2842, 0f7F800000; + @%p230 bra $L__BB1_124; + + fma.rn.f32 %f2842, %f92, %f91, %f92; + +$L__BB1_124: + setp.geu.f32 %p668, %f89, 0f00000000; + setp.lt.f32 %p667, %f89, 0f00000000; + and.pred %p666, %p667, %p106; + mov.b32 %r333, %f2842; + xor.b32 %r334, %r333, -2147483648; + mov.b32 %f1169, %r334; + selp.f32 %f172, %f1169, %f2842, %p666; + add.f32 %f1170, %f89, %f89; + selp.f32 %f1171, %f1170, 0f00000000, %p106; + setp.eq.f32 %p232, %f89, 0f00000000; + selp.f32 %f2843, %f1171, %f172, %p232; + @%p668 bra $L__BB1_127; + + cvt.rzi.f32.f32 %f1173, %f576; + setp.eq.f32 %p233, %f1173, 0f40000000; + mov.f32 %f2843, %f172; + @%p233 bra $L__BB1_127; + + mov.f32 %f2843, 0f7FFFFFFF; + +$L__BB1_127: + abs.f32 %f2708, %f89; + setp.lt.f32 %p670, %f89, 0f00000000; + and.pred %p669, %p670, %p106; + mov.f32 %f2707, 0f32A57060; + mov.f32 %f2706, 0f4B400001; + mov.f32 %f2705, 0f437C0000; + mov.f32 %f2704, 0f3BBB989D; + mov.f32 %f2703, 0f3FB8AA3B; + mov.f32 %f2702, 0f3F000000; + add.f32 %f1176, %f2708, 0f40000000; + mov.b32 %r335, %f1176; + setp.gt.s32 %p234, %r335, 2139095039; + add.f32 %f1177, %f89, 0f40000000; + setp.gtu.f32 %p235, %f2708, 0f7F800000; + mov.f32 %f2844, 0f7F800000; + selp.f32 %f1178, %f1177, %f2843, %p235; + selp.f32 %f1179, 0fFF800000, 0f7F800000, %p669; + setp.neu.f32 %p236, %f2708, 0f7F800000; + selp.f32 %f1180, %f1178, %f1179, %p236; + selp.f32 %f1181, %f1180, %f2843, %p234; + mul.f32 %f1182, %f1181, 0fBF000000; + setp.eq.f32 %p237, %f89, 0f3F800000; + selp.f32 %f1183, 0fBF000000, %f1182, %p237; + fma.rn.f32 %f1186, %f1183, %f2704, %f2702; + cvt.sat.f32.f32 %f1189, %f1186; + fma.rm.f32 %f1191, %f1189, %f2705, %f2706; + add.f32 %f1192, %f1191, 0fCB40007F; + neg.f32 %f1193, %f1192; + fma.rn.f32 %f1194, %f1183, %f2703, %f1193; + fma.rn.f32 %f1196, %f1183, %f2707, %f1194; + mov.b32 %r336, %f1191; + shl.b32 %r337, %r336, 23; + mov.b32 %f1197, %r337; + ex2.approx.ftz.f32 %f1198, %f1196; + mul.f32 %f175, %f1198, %f1197; + setp.eq.f32 %p238, %f96, 0f7F800000; + @%p238 bra $L__BB1_129; + + fma.rn.f32 %f2844, %f96, %f95, %f96; + +$L__BB1_129: + setp.geu.f32 %p673, %f93, 0f00000000; + setp.lt.f32 %p672, %f93, 0f00000000; + and.pred %p671, %p672, %p106; + mov.b32 %r338, %f2844; + xor.b32 %r339, %r338, -2147483648; + mov.b32 %f1199, %r339; + selp.f32 %f178, %f1199, %f2844, %p671; + add.f32 %f1200, %f93, %f93; + selp.f32 %f1201, %f1200, 0f00000000, %p106; + setp.eq.f32 %p240, %f93, 0f00000000; + selp.f32 %f2845, %f1201, %f178, %p240; + @%p673 bra $L__BB1_132; + + cvt.rzi.f32.f32 %f1203, %f576; + setp.eq.f32 %p241, %f1203, 0f40000000; + mov.f32 %f2845, %f178; + @%p241 bra $L__BB1_132; + + mov.f32 %f2845, 0f7FFFFFFF; + +$L__BB1_132: + cvt.rn.f32.s32 %f2717, %r781; + sub.f32 %f2716, %f2717, %f2868; + abs.f32 %f2715, %f93; + setp.lt.f32 %p675, %f93, 0f00000000; + and.pred %p674, %p675, %p106; + mov.f32 %f2714, 0f32A57060; + mov.f32 %f2713, 0f4B400001; + mov.f32 %f2712, 0f437C0000; + mov.f32 %f2711, 0f3BBB989D; + mov.f32 %f2710, 0f3FB8AA3B; + mov.f32 %f2709, 0f3F000000; + add.f32 %f1205, %f2715, 0f40000000; + mov.b32 %r340, %f1205; + setp.gt.s32 %p242, %r340, 2139095039; + add.f32 %f1206, %f93, 0f40000000; + setp.gtu.f32 %p243, %f2715, 0f7F800000; + selp.f32 %f1207, %f1206, %f2845, %p243; + selp.f32 %f1208, 0fFF800000, 0f7F800000, %p674; + setp.neu.f32 %p244, %f2715, 0f7F800000; + selp.f32 %f1209, %f1207, %f1208, %p244; + selp.f32 %f1210, %f1209, %f2845, %p242; + mul.f32 %f1211, %f1210, 0fBF000000; + setp.eq.f32 %p245, %f93, 0f3F800000; + selp.f32 %f1212, 0fBF000000, %f1211, %p245; + fma.rn.f32 %f1215, %f1212, %f2711, %f2709; + cvt.sat.f32.f32 %f1218, %f1215; + fma.rm.f32 %f1220, %f1218, %f2712, %f2713; + add.f32 %f1221, %f1220, 0fCB40007F; + neg.f32 %f1222, %f1221; + fma.rn.f32 %f1223, %f1212, %f2710, %f1222; + fma.rn.f32 %f1225, %f1212, %f2714, %f1223; + mov.b32 %r341, %f1220; + shl.b32 %r342, %r341, 23; + mov.b32 %f1226, %r342; + ex2.approx.ftz.f32 %f1227, %f1225; + mul.f32 %f181, %f1227, %f1226; + add.f32 %f1228, %f2716, 0f3F800000; + mul.f32 %f1229, %f1228, %f175; + mul.f32 %f1230, %f2716, %f181; + sub.f32 %f1231, %f1229, %f1230; + mul.f32 %f1232, %f59, %f1231; + mul.f32 %f182, %f124, %f1232; + not.pred %p246, %p9; + mov.f64 %fd516, %fd38; + @%p246 bra $L__BB1_134; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r343}, %fd38; + } + xor.b32 %r344, %r343, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r345, %temp}, %fd38; + } + mov.b64 %fd516, {%r345, %r344}; + +$L__BB1_134: + setp.eq.f32 %p678, %f2864, 0f00000000; + @%p678 bra $L__BB1_138; + bra.uni $L__BB1_135; + +$L__BB1_138: + mov.u32 %r346, 0; + mov.b64 %fd516, {%r346, %r63}; + bra.uni $L__BB1_139; + +$L__BB1_135: + setp.gt.s32 %p248, %r53, -1; + @%p248 bra $L__BB1_139; + + mov.f64 %fd483, 0d4014000000000000; + cvt.rzi.f64.f64 %fd338, %fd483; + setp.eq.f64 %p249, %fd338, 0d4014000000000000; + @%p249 bra $L__BB1_139; + + mov.f64 %fd516, 0dFFF8000000000000; + +$L__BB1_139: + cvt.f64.f32 %fd479, %f2864; + add.f64 %fd478, %fd477, 0d4014000000000000; + selp.f64 %fd517, %fd516, %fd478, %p139; + @%p17 bra $L__BB1_144; + + mov.f64 %fd480, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r767}, %fd480; + } + and.b32 %r766, %r767, 2147483647; + setp.eq.s32 %p251, %r766, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r347, %temp}, %fd480; + } + setp.eq.s32 %p252, %r347, 0; + and.pred %p253, %p251, %p252; + @%p253 bra $L__BB1_143; + bra.uni $L__BB1_141; + +$L__BB1_143: + mov.u32 %r351, 0; + mov.b64 %fd517, {%r351, %r67}; + bra.uni $L__BB1_144; + +$L__BB1_141: + and.b32 %r348, %r53, 2147483647; + setp.ne.s32 %p254, %r348, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r349, %temp}, %fd307; + } + setp.ne.s32 %p255, %r349, 0; + or.pred %p256, %p254, %p255; + mov.f64 %fd517, %fd516; + @%p256 bra $L__BB1_144; + + mov.u32 %r350, 0; + mov.b64 %fd517, {%r350, %r69}; + +$L__BB1_144: + setp.eq.f32 %p679, %f2864, 0f3F800000; + selp.f64 %fd344, 0d3FF0000000000000, %fd517, %p679; + div.rn.f64 %fd70, %fd36, %fd344; + not.pred %p258, %p10; + mov.f64 %fd519, %fd40; + @%p258 bra $L__BB1_146; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r352}, %fd40; + } + xor.b32 %r353, %r352, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r354, %temp}, %fd40; + } + mov.b64 %fd519, {%r354, %r353}; + +$L__BB1_146: + cvt.rn.f32.s32 %f2720, %r781; + sub.f32 %f2719, %f2720, %f2868; + add.f32 %f2718, %f2719, 0f3F000000; + setp.eq.f32 %p259, %f2718, 0f00000000; + @%p259 bra $L__BB1_150; + bra.uni $L__BB1_147; + +$L__BB1_150: + mov.u32 %r355, 0; + selp.b32 %r357, %r65, 0, %p117; + or.b32 %r358, %r357, 2146435072; + selp.b32 %r359, %r358, %r357, %p119; + mov.b64 %fd519, {%r355, %r359}; + bra.uni $L__BB1_151; + +$L__BB1_147: + setp.gt.s32 %p260, %r65, -1; + @%p260 bra $L__BB1_151; + + cvt.rzi.f64.f64 %fd346, %fd309; + setp.eq.f64 %p261, %fd346, 0d4008000000000000; + @%p261 bra $L__BB1_151; + + mov.f64 %fd519, 0dFFF8000000000000; + +$L__BB1_151: + selp.f64 %fd520, %fd519, %fd41, %p141; + @%p18 bra $L__BB1_156; + + setp.eq.s32 %p265, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r360, %temp}, %fd309; + } + setp.eq.s32 %p266, %r360, 0; + and.pred %p267, %p265, %p266; + @%p267 bra $L__BB1_155; + bra.uni $L__BB1_153; + +$L__BB1_155: + mov.u32 %r367, 0; + mov.b64 %fd520, {%r367, %r71}; + bra.uni $L__BB1_156; + +$L__BB1_153: + cvt.rn.f32.s32 %f2723, %r781; + sub.f32 %f2722, %f2723, %f2868; + add.f32 %f2721, %f2722, 0f3F000000; + cvt.f64.f32 %fd481, %f2721; + and.b32 %r361, %r65, 2147483647; + setp.ne.s32 %p268, %r361, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r362, %temp}, %fd481; + } + setp.ne.s32 %p269, %r362, 0; + or.pred %p270, %p268, %p269; + mov.f64 %fd520, %fd519; + @%p270 bra $L__BB1_156; + + and.pred %p272, %p125, %p10; + selp.b32 %r365, %r61, %r60, %p272; + mov.u32 %r366, 0; + mov.b64 %fd520, {%r366, %r365}; + +$L__BB1_156: + cvt.rn.f32.s32 %f2726, %r781; + sub.f32 %f2725, %f2726, %f2868; + add.f32 %f2724, %f2725, 0f3F000000; + setp.eq.f32 %p273, %f2724, 0f3F800000; + selp.f64 %fd349, 0d3FF0000000000000, %fd520, %p273; + cvt.f64.f32 %fd350, %f175; + mul.f64 %fd79, %fd349, %fd350; + not.pred %p274, %p11; + mov.f64 %fd522, %fd43; + @%p274 bra $L__BB1_158; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r368}, %fd43; + } + xor.b32 %r369, %r368, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r370, %temp}, %fd43; + } + mov.b64 %fd522, {%r370, %r369}; + +$L__BB1_158: + setp.eq.f32 %p275, %f76, 0f00000000; + @%p275 bra $L__BB1_162; + bra.uni $L__BB1_159; + +$L__BB1_162: + mov.u32 %r371, 0; + selp.b32 %r373, %r70, 0, %p117; + or.b32 %r374, %r373, 2146435072; + selp.b32 %r375, %r374, %r373, %p119; + mov.b64 %fd522, {%r371, %r375}; + bra.uni $L__BB1_163; + +$L__BB1_159: + setp.gt.s32 %p276, %r70, -1; + @%p276 bra $L__BB1_163; + + cvt.rzi.f64.f64 %fd352, %fd309; + setp.eq.f64 %p277, %fd352, 0d4008000000000000; + @%p277 bra $L__BB1_163; + + mov.f64 %fd522, 0dFFF8000000000000; + +$L__BB1_163: + selp.f64 %fd523, %fd522, %fd44, %p149; + @%p19 bra $L__BB1_168; + + setp.eq.s32 %p281, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r376, %temp}, %fd309; + } + setp.eq.s32 %p282, %r376, 0; + and.pred %p283, %p281, %p282; + @%p283 bra $L__BB1_167; + bra.uni $L__BB1_165; + +$L__BB1_167: + mov.u32 %r383, 0; + mov.b64 %fd523, {%r383, %r73}; + bra.uni $L__BB1_168; + +$L__BB1_165: + cvt.rn.f32.s32 %f2729, %r781; + sub.f32 %f2728, %f2729, %f2868; + add.f32 %f2727, %f2728, 0fBF000000; + cvt.f64.f32 %fd482, %f2727; + and.b32 %r377, %r70, 2147483647; + setp.ne.s32 %p284, %r377, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r378, %temp}, %fd482; + } + setp.ne.s32 %p285, %r378, 0; + or.pred %p286, %p284, %p285; + mov.f64 %fd523, %fd522; + @%p286 bra $L__BB1_168; + + and.pred %p288, %p125, %p11; + selp.b32 %r381, %r61, %r60, %p288; + mov.u32 %r382, 0; + mov.b64 %fd523, {%r382, %r381}; + +$L__BB1_168: + cvt.f64.f32 %fd484, %f124; + cvt.rn.f32.s32 %f2765, %r782; + mov.f32 %f2738, 0f00000000; + mov.f32 %f2737, 0f3102E308; + mov.f32 %f2736, 0fBF317218; + mov.f32 %f2735, 0f35BFBE8E; + mov.f32 %f2734, 0f3F317200; + mov.f32 %f2733, 0f3DAAAABD; + mov.f32 %f2732, 0f3C4CAF63; + mov.f32 %f2731, 0f3B18F0FE; + mov.f32 %f2730, 0f3FB8AA3B; + setp.eq.f32 %p289, %f76, 0f3F800000; + selp.f64 %fd355, 0d3FF0000000000000, %fd523, %p289; + cvt.f64.f32 %fd356, %f181; + mul.f64 %fd357, %fd355, %fd356; + sub.f64 %fd358, %fd79, %fd357; + mul.f64 %fd359, %fd70, %fd358; + mul.f64 %fd360, %fd359, %fd484; + mul.f32 %f1234, %f60, %f182; + cvt.f64.f32 %fd361, %f1234; + sub.f64 %fd362, %fd361, %fd360; + cvt.rn.f32.f64 %f183, %fd362; + add.f32 %f1235, %f2765, 0f3F800000; + sub.f32 %f1236, %f1235, %f2867; + div.rn.f32 %f184, %f1236, %f2864; + abs.f32 %f185, %f184; + setp.lt.f32 %p290, %f185, 0f00800000; + mul.f32 %f1237, %f185, 0f4B800000; + selp.f32 %f1238, %f1237, %f185, %p290; + selp.f32 %f1239, 0fC3170000, 0fC2FE0000, %p290; + mov.b32 %r384, %f1238; + and.b32 %r385, %r384, 8388607; + or.b32 %r386, %r385, 1065353216; + mov.b32 %f1240, %r386; + shr.u32 %r387, %r384, 23; + cvt.rn.f32.u32 %f1241, %r387; + add.f32 %f1242, %f1239, %f1241; + setp.gt.f32 %p291, %f1240, 0f3FB504F3; + mul.f32 %f1243, %f1240, 0f3F000000; + add.f32 %f1244, %f1242, 0f3F800000; + selp.f32 %f1245, %f1244, %f1242, %p291; + selp.f32 %f1246, %f1243, %f1240, %p291; + add.f32 %f1247, %f1246, 0fBF800000; + add.f32 %f1248, %f1246, 0f3F800000; + rcp.approx.ftz.f32 %f1249, %f1248; + add.f32 %f1250, %f1247, %f1247; + mul.f32 %f1252, %f1250, %f1249; + mul.f32 %f1253, %f1252, %f1252; + fma.rn.f32 %f1256, %f2731, %f1253, %f2732; + fma.rn.f32 %f1258, %f1256, %f1253, %f2733; + mul.rn.f32 %f1259, %f1258, %f1253; + mul.rn.f32 %f1260, %f1259, %f1252; + sub.f32 %f1261, %f1247, %f1252; + add.f32 %f1262, %f1261, %f1261; + neg.f32 %f1263, %f1252; + fma.rn.f32 %f1264, %f1263, %f1247, %f1262; + mul.rn.f32 %f1265, %f1249, %f1264; + add.f32 %f1266, %f1260, %f1252; + sub.f32 %f1267, %f1252, %f1266; + add.f32 %f1268, %f1260, %f1267; + add.f32 %f1269, %f1265, %f1268; + add.f32 %f1270, %f1266, %f1269; + sub.f32 %f1271, %f1266, %f1270; + add.f32 %f1272, %f1269, %f1271; + mul.rn.f32 %f1274, %f1245, %f2734; + mul.rn.f32 %f1276, %f1245, %f2735; + add.f32 %f1277, %f1274, %f1270; + sub.f32 %f1278, %f1274, %f1277; + add.f32 %f1279, %f1270, %f1278; + add.f32 %f1280, %f1272, %f1279; + add.f32 %f1281, %f1276, %f1280; + add.f32 %f1282, %f1277, %f1281; + sub.f32 %f1283, %f1277, %f1282; + add.f32 %f1284, %f1281, %f1283; + mul.rn.f32 %f1285, %f576, %f1282; + neg.f32 %f1286, %f1285; + fma.rn.f32 %f1287, %f576, %f1282, %f1286; + fma.rn.f32 %f1288, %f576, %f1284, %f1287; + fma.rn.f32 %f1290, %f2738, %f1282, %f1288; + add.rn.f32 %f1291, %f1285, %f1290; + neg.f32 %f1292, %f1291; + add.rn.f32 %f1293, %f1285, %f1292; + add.rn.f32 %f1294, %f1293, %f1290; + mov.b32 %r388, %f1291; + setp.eq.s32 %p292, %r388, 1118925336; + add.s32 %r389, %r388, -1; + mov.b32 %f1295, %r389; + add.f32 %f1296, %f1294, 0f37000000; + selp.f32 %f186, %f1296, %f1294, %p292; + selp.f32 %f1297, %f1295, %f1291, %p292; + mul.rn.f32 %f1299, %f1297, %f2730; + cvt.rzi.f32.f32 %f1300, %f1299; + abs.f32 %f1301, %f1300; + setp.gt.f32 %p293, %f1301, 0f42FC0000; + mov.b32 %r390, %f1300; + and.b32 %r391, %r390, -2147483648; + or.b32 %r392, %r391, 1123811328; + mov.b32 %f1302, %r392; + selp.f32 %f1303, %f1302, %f1300, %p293; + fma.rn.f32 %f1305, %f1303, %f2736, %f1297; + fma.rn.f32 %f1307, %f1303, %f2737, %f1305; + mul.f32 %f1308, %f1307, 0f3FB8AA3B; + add.f32 %f1309, %f1303, 0f4B40007F; + mov.b32 %r393, %f1309; + shl.b32 %r394, %r393, 23; + mov.b32 %f1310, %r394; + ex2.approx.ftz.f32 %f1311, %f1308; + mul.f32 %f187, %f1311, %f1310; + setp.eq.f32 %p294, %f187, 0f7F800000; + mov.f32 %f2846, 0f7F800000; + @%p294 bra $L__BB1_170; + + fma.rn.f32 %f2846, %f187, %f186, %f187; + +$L__BB1_170: + setp.lt.f32 %p295, %f184, 0f00000000; + and.pred %p22, %p295, %p106; + setp.eq.f32 %p297, %f184, 0f00000000; + @%p297 bra $L__BB1_174; + bra.uni $L__BB1_171; + +$L__BB1_174: + add.f32 %f1316, %f184, %f184; + selp.f32 %f2848, %f1316, 0f00000000, %p106; + bra.uni $L__BB1_175; + +$L__BB1_171: + mov.b32 %r395, %f2846; + xor.b32 %r396, %r395, -2147483648; + mov.b32 %f1312, %r396; + selp.f32 %f2848, %f1312, %f2846, %p22; + setp.geu.f32 %p298, %f184, 0f00000000; + @%p298 bra $L__BB1_175; + + cvt.rzi.f32.f32 %f1314, %f576; + setp.eq.f32 %p299, %f1314, 0f40000000; + @%p299 bra $L__BB1_175; + + mov.f32 %f2848, 0f7FFFFFFF; + +$L__BB1_175: + abs.f32 %f2771, %f184; + add.f32 %f1317, %f2771, 0f40000000; + mov.b32 %r397, %f1317; + setp.lt.s32 %p301, %r397, 2139095040; + @%p301 bra $L__BB1_180; + + abs.f32 %f2772, %f184; + setp.gtu.f32 %p302, %f2772, 0f7F800000; + @%p302 bra $L__BB1_179; + bra.uni $L__BB1_177; + +$L__BB1_179: + add.f32 %f2848, %f184, 0f40000000; + bra.uni $L__BB1_180; + +$L__BB1_177: + abs.f32 %f2773, %f184; + setp.neu.f32 %p303, %f2773, 0f7F800000; + @%p303 bra $L__BB1_180; + + selp.f32 %f2848, 0fFF800000, 0f7F800000, %p22; + +$L__BB1_180: + mov.f32 %f2754, 0f00000000; + mov.f32 %f2753, 0f3102E308; + mov.f32 %f2752, 0fBF317218; + mov.f32 %f2751, 0f35BFBE8E; + mov.f32 %f2750, 0f3F317200; + mov.f32 %f2749, 0f3DAAAABD; + mov.f32 %f2748, 0f3C4CAF63; + mov.f32 %f2747, 0f3B18F0FE; + mov.f32 %f2746, 0f32A57060; + mov.f32 %f2745, 0f4B400001; + mov.f32 %f2744, 0f437C0000; + mov.f32 %f2743, 0f3BBB989D; + mov.f32 %f2742, 0f3FB8AA3B; + mov.f32 %f2741, 0f3F000000; + cvt.rn.f32.s32 %f2740, %r782; + sub.f32 %f2739, %f2740, %f2867; + mul.f32 %f1319, %f2848, 0fBF000000; + setp.eq.f32 %p304, %f184, 0f3F800000; + selp.f32 %f1320, 0fBF000000, %f1319, %p304; + fma.rn.f32 %f1323, %f1320, %f2743, %f2741; + cvt.sat.f32.f32 %f1326, %f1323; + fma.rm.f32 %f1328, %f1326, %f2744, %f2745; + add.f32 %f1329, %f1328, 0fCB40007F; + neg.f32 %f1330, %f1329; + fma.rn.f32 %f1331, %f1320, %f2742, %f1330; + fma.rn.f32 %f1333, %f1320, %f2746, %f1331; + mov.b32 %r398, %f1328; + shl.b32 %r399, %r398, 23; + mov.b32 %f1334, %r399; + ex2.approx.ftz.f32 %f1335, %f1333; + mul.f32 %f196, %f1335, %f1334; + div.rn.f32 %f197, %f2739, %f2864; + abs.f32 %f198, %f197; + setp.lt.f32 %p305, %f198, 0f00800000; + mul.f32 %f1336, %f198, 0f4B800000; + selp.f32 %f1337, %f1336, %f198, %p305; + selp.f32 %f1338, 0fC3170000, 0fC2FE0000, %p305; + mov.b32 %r400, %f1337; + and.b32 %r401, %r400, 8388607; + or.b32 %r402, %r401, 1065353216; + mov.b32 %f1339, %r402; + shr.u32 %r403, %r400, 23; + cvt.rn.f32.u32 %f1340, %r403; + add.f32 %f1341, %f1338, %f1340; + setp.gt.f32 %p306, %f1339, 0f3FB504F3; + mul.f32 %f1342, %f1339, 0f3F000000; + add.f32 %f1343, %f1341, 0f3F800000; + selp.f32 %f1344, %f1343, %f1341, %p306; + selp.f32 %f1345, %f1342, %f1339, %p306; + add.f32 %f1346, %f1345, 0fBF800000; + add.f32 %f1347, %f1345, 0f3F800000; + rcp.approx.ftz.f32 %f1348, %f1347; + add.f32 %f1349, %f1346, %f1346; + mul.f32 %f1351, %f1349, %f1348; + mul.f32 %f1352, %f1351, %f1351; + fma.rn.f32 %f1355, %f2747, %f1352, %f2748; + fma.rn.f32 %f1357, %f1355, %f1352, %f2749; + mul.rn.f32 %f1358, %f1357, %f1352; + mul.rn.f32 %f1359, %f1358, %f1351; + sub.f32 %f1360, %f1346, %f1351; + add.f32 %f1361, %f1360, %f1360; + neg.f32 %f1362, %f1351; + fma.rn.f32 %f1363, %f1362, %f1346, %f1361; + mul.rn.f32 %f1364, %f1348, %f1363; + add.f32 %f1365, %f1359, %f1351; + sub.f32 %f1366, %f1351, %f1365; + add.f32 %f1367, %f1359, %f1366; + add.f32 %f1368, %f1364, %f1367; + add.f32 %f1369, %f1365, %f1368; + sub.f32 %f1370, %f1365, %f1369; + add.f32 %f1371, %f1368, %f1370; + mul.rn.f32 %f1373, %f1344, %f2750; + mul.rn.f32 %f1375, %f1344, %f2751; + add.f32 %f1376, %f1373, %f1369; + sub.f32 %f1377, %f1373, %f1376; + add.f32 %f1378, %f1369, %f1377; + add.f32 %f1379, %f1371, %f1378; + add.f32 %f1380, %f1375, %f1379; + add.f32 %f1381, %f1376, %f1380; + sub.f32 %f1382, %f1376, %f1381; + add.f32 %f1383, %f1380, %f1382; + mul.rn.f32 %f1384, %f576, %f1381; + neg.f32 %f1385, %f1384; + fma.rn.f32 %f1386, %f576, %f1381, %f1385; + fma.rn.f32 %f1387, %f576, %f1383, %f1386; + fma.rn.f32 %f1389, %f2754, %f1381, %f1387; + add.rn.f32 %f1390, %f1384, %f1389; + neg.f32 %f1391, %f1390; + add.rn.f32 %f1392, %f1384, %f1391; + add.rn.f32 %f1393, %f1392, %f1389; + mov.b32 %r404, %f1390; + setp.eq.s32 %p307, %r404, 1118925336; + add.s32 %r405, %r404, -1; + mov.b32 %f1394, %r405; + add.f32 %f1395, %f1393, 0f37000000; + selp.f32 %f199, %f1395, %f1393, %p307; + selp.f32 %f1396, %f1394, %f1390, %p307; + mul.rn.f32 %f1397, %f1396, %f2742; + cvt.rzi.f32.f32 %f1398, %f1397; + abs.f32 %f1399, %f1398; + setp.gt.f32 %p308, %f1399, 0f42FC0000; + mov.b32 %r406, %f1398; + and.b32 %r407, %r406, -2147483648; + or.b32 %r408, %r407, 1123811328; + mov.b32 %f1400, %r408; + selp.f32 %f1401, %f1400, %f1398, %p308; + fma.rn.f32 %f1403, %f1401, %f2752, %f1396; + fma.rn.f32 %f1405, %f1401, %f2753, %f1403; + mul.f32 %f1406, %f1405, 0f3FB8AA3B; + add.f32 %f1407, %f1401, 0f4B40007F; + mov.b32 %r409, %f1407; + shl.b32 %r410, %r409, 23; + mov.b32 %f1408, %r410; + ex2.approx.ftz.f32 %f1409, %f1406; + mul.f32 %f200, %f1409, %f1408; + setp.eq.f32 %p309, %f200, 0f7F800000; + mov.f32 %f2849, 0f7F800000; + @%p309 bra $L__BB1_182; + + fma.rn.f32 %f2849, %f200, %f199, %f200; + +$L__BB1_182: + setp.lt.f32 %p310, %f197, 0f00000000; + and.pred %p23, %p310, %p106; + setp.eq.f32 %p312, %f197, 0f00000000; + @%p312 bra $L__BB1_186; + bra.uni $L__BB1_183; + +$L__BB1_186: + add.f32 %f1414, %f197, %f197; + selp.f32 %f2851, %f1414, 0f00000000, %p106; + bra.uni $L__BB1_187; + +$L__BB1_183: + mov.b32 %r411, %f2849; + xor.b32 %r412, %r411, -2147483648; + mov.b32 %f1410, %r412; + selp.f32 %f2851, %f1410, %f2849, %p23; + setp.geu.f32 %p313, %f197, 0f00000000; + @%p313 bra $L__BB1_187; + + cvt.rzi.f32.f32 %f1412, %f576; + setp.eq.f32 %p314, %f1412, 0f40000000; + @%p314 bra $L__BB1_187; + + mov.f32 %f2851, 0f7FFFFFFF; + +$L__BB1_187: + abs.f32 %f2636, %f197; + add.f32 %f1415, %f2636, 0f40000000; + mov.b32 %r413, %f1415; + setp.lt.s32 %p316, %r413, 2139095040; + @%p316 bra $L__BB1_192; + + abs.f32 %f2759, %f197; + setp.gtu.f32 %p317, %f2759, 0f7F800000; + @%p317 bra $L__BB1_191; + bra.uni $L__BB1_189; + +$L__BB1_191: + add.f32 %f2851, %f197, 0f40000000; + bra.uni $L__BB1_192; + +$L__BB1_189: + abs.f32 %f2760, %f197; + setp.neu.f32 %p318, %f2760, 0f7F800000; + @%p318 bra $L__BB1_192; + + selp.f32 %f2851, 0fFF800000, 0f7F800000, %p23; + +$L__BB1_192: + mov.f32 %f2644, 0f32A57060; + mov.f32 %f2643, 0f4B400001; + mov.f32 %f2642, 0f437C0000; + mov.f32 %f2641, 0f3BBB989D; + mov.f32 %f2640, 0f3FB8AA3B; + mov.f32 %f2639, 0f3F000000; + cvt.rn.f32.s32 %f2638, %r782; + sub.f32 %f2637, %f2638, %f2867; + mul.f32 %f1416, %f2851, 0fBF000000; + setp.eq.f32 %p319, %f197, 0f3F800000; + selp.f32 %f1417, 0fBF000000, %f1416, %p319; + fma.rn.f32 %f1420, %f1417, %f2641, %f2639; + cvt.sat.f32.f32 %f1423, %f1420; + fma.rm.f32 %f1425, %f1423, %f2642, %f2643; + add.f32 %f1426, %f1425, 0fCB40007F; + neg.f32 %f1427, %f1426; + fma.rn.f32 %f1428, %f1417, %f2640, %f1427; + fma.rn.f32 %f1430, %f1417, %f2644, %f1428; + mov.b32 %r414, %f1425; + shl.b32 %r415, %r414, 23; + mov.b32 %f1431, %r415; + ex2.approx.ftz.f32 %f1432, %f1430; + mul.f32 %f209, %f1432, %f1431; + add.f32 %f1433, %f2637, 0f3F800000; + mul.f32 %f1434, %f1433, %f196; + mul.f32 %f1435, %f2637, %f209; + sub.f32 %f210, %f1434, %f1435; + cvt.f64.f32 %fd363, %f114; + { + .reg .b32 %temp; + mov.b64 {%temp, %r79}, %fd363; + } + abs.f64 %fd88, %fd363; + { // callseq 15, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd88; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd525, [retval0+0]; + } // callseq 15 + setp.lt.s32 %p320, %r79, 0; + and.pred %p24, %p320, %p117; + not.pred %p322, %p24; + @%p322 bra $L__BB1_194; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r416}, %fd525; + } + xor.b32 %r417, %r416, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r418, %temp}, %fd525; + } + mov.b64 %fd525, {%r418, %r417}; + +$L__BB1_194: + setp.eq.f32 %p323, %f114, 0f00000000; + @%p323 bra $L__BB1_198; + bra.uni $L__BB1_195; + +$L__BB1_198: + mov.u32 %r419, 0; + selp.b32 %r420, %r79, 0, %p117; + or.b32 %r421, %r420, 2146435072; + selp.b32 %r422, %r421, %r420, %p119; + mov.b64 %fd525, {%r419, %r422}; + bra.uni $L__BB1_199; + +$L__BB1_195: + setp.gt.s32 %p324, %r79, -1; + @%p324 bra $L__BB1_199; + + cvt.rzi.f64.f64 %fd366, %fd309; + setp.eq.f64 %p325, %fd366, 0d4008000000000000; + @%p325 bra $L__BB1_199; + + mov.f64 %fd525, 0dFFF8000000000000; + +$L__BB1_199: + add.f64 %fd94, %fd363, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r423}, %fd94; + } + and.b32 %r424, %r423, 2146435072; + setp.ne.s32 %p328, %r424, 2146435072; + mov.f64 %fd526, %fd525; + @%p328 bra $L__BB1_205; + + setp.gtu.f64 %p329, %fd88, 0d7FF0000000000000; + mov.f64 %fd526, %fd94; + @%p329 bra $L__BB1_205; + + setp.eq.s32 %p330, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r425, %temp}, %fd309; + } + setp.eq.s32 %p331, %r425, 0; + and.pred %p332, %p330, %p331; + @%p332 bra $L__BB1_204; + bra.uni $L__BB1_202; + +$L__BB1_204: + mov.u32 %r430, 0; + setp.gt.f64 %p339, %fd88, 0d3FF0000000000000; + selp.b32 %r431, 2146435072, 0, %p339; + xor.b32 %r432, %r431, 2146435072; + selp.b32 %r433, %r432, %r431, %p119; + setp.eq.f32 %p340, %f114, 0fBF800000; + selp.b32 %r434, 1072693248, %r433, %p340; + mov.b64 %fd526, {%r430, %r434}; + bra.uni $L__BB1_205; + +$L__BB1_202: + { + .reg .b32 %temp; + mov.b64 {%r426, %temp}, %fd363; + } + and.b32 %r427, %r79, 2147483647; + setp.ne.s32 %p333, %r427, 2146435072; + setp.ne.s32 %p334, %r426, 0; + or.pred %p335, %p333, %p334; + mov.f64 %fd526, %fd525; + @%p335 bra $L__BB1_205; + + and.pred %p337, %p125, %p24; + selp.b32 %r428, %r61, %r60, %p337; + mov.u32 %r429, 0; + mov.b64 %fd526, {%r429, %r428}; + +$L__BB1_205: + mul.f32 %f1436, %f59, %f210; + mul.f32 %f211, %f111, %f1436; + setp.eq.f32 %p341, %f114, 0f3F800000; + selp.f64 %fd371, 0d3FF0000000000000, %fd526, %p341; + cvt.f64.f32 %fd372, %f196; + mul.f64 %fd98, %fd371, %fd372; + cvt.f64.f32 %fd99, %f119; + { + .reg .b32 %temp; + mov.b64 {%temp, %r80}, %fd99; + } + abs.f64 %fd100, %fd99; + { // callseq 16, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd100; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd528, [retval0+0]; + } // callseq 16 + setp.lt.s32 %p342, %r80, 0; + and.pred %p25, %p342, %p117; + not.pred %p344, %p25; + @%p344 bra $L__BB1_207; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r435}, %fd528; + } + xor.b32 %r436, %r435, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r437, %temp}, %fd528; + } + mov.b64 %fd528, {%r437, %r436}; + +$L__BB1_207: + setp.eq.f32 %p345, %f119, 0f00000000; + @%p345 bra $L__BB1_211; + bra.uni $L__BB1_208; + +$L__BB1_211: + mov.u32 %r438, 0; + selp.b32 %r439, %r80, 0, %p117; + or.b32 %r440, %r439, 2146435072; + selp.b32 %r441, %r440, %r439, %p119; + mov.b64 %fd528, {%r438, %r441}; + bra.uni $L__BB1_212; + +$L__BB1_208: + setp.gt.s32 %p346, %r80, -1; + @%p346 bra $L__BB1_212; + + cvt.rzi.f64.f64 %fd375, %fd309; + setp.eq.f64 %p347, %fd375, 0d4008000000000000; + @%p347 bra $L__BB1_212; + + mov.f64 %fd528, 0dFFF8000000000000; + +$L__BB1_212: + add.f64 %fd106, %fd99, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r442}, %fd106; + } + and.b32 %r443, %r442, 2146435072; + setp.ne.s32 %p350, %r443, 2146435072; + mov.f64 %fd529, %fd528; + @%p350 bra $L__BB1_218; + + setp.gtu.f64 %p351, %fd100, 0d7FF0000000000000; + mov.f64 %fd529, %fd106; + @%p351 bra $L__BB1_218; + + setp.eq.s32 %p352, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r444, %temp}, %fd309; + } + setp.eq.s32 %p353, %r444, 0; + and.pred %p354, %p352, %p353; + @%p354 bra $L__BB1_217; + bra.uni $L__BB1_215; + +$L__BB1_217: + mov.u32 %r449, 0; + setp.gt.f64 %p361, %fd100, 0d3FF0000000000000; + selp.b32 %r450, 2146435072, 0, %p361; + xor.b32 %r451, %r450, 2146435072; + selp.b32 %r452, %r451, %r450, %p119; + setp.eq.f32 %p362, %f119, 0fBF800000; + selp.b32 %r453, 1072693248, %r452, %p362; + mov.b64 %fd529, {%r449, %r453}; + bra.uni $L__BB1_218; + +$L__BB1_215: + { + .reg .b32 %temp; + mov.b64 {%r445, %temp}, %fd99; + } + and.b32 %r446, %r80, 2147483647; + setp.ne.s32 %p355, %r446, 2146435072; + setp.ne.s32 %p356, %r445, 0; + or.pred %p357, %p355, %p356; + mov.f64 %fd529, %fd528; + @%p357 bra $L__BB1_218; + + and.pred %p359, %p125, %p25; + selp.b32 %r447, %r61, %r60, %p359; + mov.u32 %r448, 0; + mov.b64 %fd529, {%r448, %r447}; + +$L__BB1_218: + cvt.f64.f32 %fd476, %f111; + mov.f32 %f2852, 0f00000000; + setp.eq.f32 %p363, %f119, 0f3F800000; + selp.f64 %fd378, 0d3FF0000000000000, %fd529, %p363; + cvt.f64.f32 %fd379, %f209; + mul.f64 %fd380, %fd378, %fd379; + sub.f64 %fd381, %fd98, %fd380; + mul.f64 %fd382, %fd70, %fd381; + mul.f64 %fd384, %fd382, %fd476; + mul.f32 %f1438, %f60, %f211; + cvt.f64.f32 %fd385, %f1438; + sub.f64 %fd386, %fd385, %fd384; + cvt.rn.f32.f64 %f1439, %fd386; + add.f32 %f212, %f182, %f211; + add.f32 %f213, %f183, %f1439; + mul.f32 %f214, %f111, %f124; + setp.leu.f32 %p364, %f125, 0f3C23D70A; + @%p364 bra $L__BB1_220; + + div.rn.f32 %f1440, %f126, %f125; + add.f32 %f2852, %f1440, 0fBF800000; + +$L__BB1_220: + mov.f32 %f2853, 0f00000000; + @%p364 bra $L__BB1_235; + + and.b32 %r454, %r74, 2146435072; + setp.eq.s32 %p366, %r454, 1062207488; + cvt.f64.f32 %fd110, %f125; + { + .reg .b32 %temp; + mov.b64 {%temp, %r81}, %fd110; + } + abs.f64 %fd111, %fd110; + { // callseq 17, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd111; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd531, [retval0+0]; + } // callseq 17 + setp.lt.s32 %p367, %r81, 0; + and.pred %p26, %p367, %p366; + not.pred %p368, %p26; + @%p368 bra $L__BB1_223; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r455}, %fd531; + } + xor.b32 %r456, %r455, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r457, %temp}, %fd531; + } + mov.b64 %fd531, {%r457, %r456}; + +$L__BB1_223: + setp.eq.f32 %p369, %f125, 0f00000000; + @%p369 bra $L__BB1_227; + bra.uni $L__BB1_224; + +$L__BB1_227: + setp.lt.s32 %p372, %r74, 0; + mov.u32 %r458, 0; + selp.b32 %r460, %r81, 0, %p366; + or.b32 %r461, %r460, 2146435072; + selp.b32 %r462, %r461, %r460, %p372; + mov.b64 %fd531, {%r458, %r462}; + bra.uni $L__BB1_228; + +$L__BB1_224: + setp.gt.s32 %p370, %r81, -1; + @%p370 bra $L__BB1_228; + + cvt.rzi.f64.f64 %fd389, %fd315; + setp.eq.f64 %p371, %fd389, 0d4000000000000000; + @%p371 bra $L__BB1_228; + + mov.f64 %fd531, 0dFFF8000000000000; + +$L__BB1_228: + add.f64 %fd117, %fd110, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r463}, %fd117; + } + and.b32 %r464, %r463, 2146435072; + setp.ne.s32 %p374, %r464, 2146435072; + mov.f64 %fd532, %fd531; + @%p374 bra $L__BB1_234; + + setp.gtu.f64 %p375, %fd111, 0d7FF0000000000000; + mov.f64 %fd532, %fd117; + @%p375 bra $L__BB1_234; + + setp.eq.s32 %p376, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r465, %temp}, %fd315; + } + setp.eq.s32 %p377, %r465, 0; + and.pred %p378, %p376, %p377; + @%p378 bra $L__BB1_233; + bra.uni $L__BB1_231; + +$L__BB1_233: + setp.lt.s32 %p384, %r74, 0; + mov.u32 %r471, 0; + setp.gt.f64 %p385, %fd111, 0d3FF0000000000000; + selp.b32 %r472, 2146435072, 0, %p385; + xor.b32 %r473, %r472, 2146435072; + selp.b32 %r474, %r473, %r472, %p384; + setp.eq.f32 %p386, %f125, 0fBF800000; + selp.b32 %r475, 1072693248, %r474, %p386; + mov.b64 %fd532, {%r471, %r475}; + bra.uni $L__BB1_234; + +$L__BB1_231: + { + .reg .b32 %temp; + mov.b64 {%r466, %temp}, %fd110; + } + and.b32 %r467, %r81, 2147483647; + setp.ne.s32 %p379, %r467, 2146435072; + setp.ne.s32 %p380, %r466, 0; + or.pred %p381, %p379, %p380; + mov.f64 %fd532, %fd531; + @%p381 bra $L__BB1_234; + + setp.ne.s32 %p382, %r75, 1071644672; + and.pred %p383, %p382, %p26; + or.b32 %r468, %r76, -2147483648; + selp.b32 %r469, %r468, %r76, %p383; + mov.u32 %r470, 0; + mov.b64 %fd532, {%r470, %r469}; + +$L__BB1_234: + setp.eq.f32 %p387, %f125, 0f3F800000; + selp.f64 %fd392, 0d3FF0000000000000, %fd532, %p387; + cvt.f64.f32 %fd393, %f126; + div.rn.f64 %fd394, %fd393, %fd392; + cvt.rn.f32.f64 %f2853, %fd394; + +$L__BB1_235: + and.b32 %r476, %r74, 2146435072; + setp.eq.s32 %p388, %r476, 1062207488; + mov.f32 %f1442, 0f47C35000; + min.f32 %f1443, %f2853, %f1442; + cvt.f64.f32 %fd121, %f1443; + min.f32 %f219, %f2852, %f1442; + fma.rn.f32 %f2822, %f219, %f139, %f2822; + mul.f32 %f1444, %f219, %f140; + cvt.f64.f32 %fd122, %f1444; + cvt.f64.f32 %fd123, %f139; + { + .reg .b32 %temp; + mov.b64 {%temp, %r82}, %fd123; + } + abs.f64 %fd124, %fd123; + { // callseq 18, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd124; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd533, [retval0+0]; + } // callseq 18 + @%p388 bra $L__BB1_281; + bra.uni $L__BB1_236; + +$L__BB1_281: + setp.gt.s32 %p449, %r82, -1; + @%p449 bra $L__BB1_283; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r525}, %fd533; + } + xor.b32 %r526, %r525, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r527, %temp}, %fd533; + } + mov.b64 %fd533, {%r527, %r526}; + +$L__BB1_283: + setp.eq.f32 %p450, %f139, 0f00000000; + @%p450 bra $L__BB1_287; + bra.uni $L__BB1_284; + +$L__BB1_287: + setp.lt.s32 %p453, %r74, 0; + mov.u32 %r528, 0; + or.b32 %r529, %r82, 2146435072; + selp.b32 %r530, %r529, %r82, %p453; + mov.b64 %fd533, {%r528, %r530}; + bra.uni $L__BB1_288; + +$L__BB1_236: + setp.eq.f32 %p389, %f139, 0f00000000; + @%p389 bra $L__BB1_240; + bra.uni $L__BB1_237; + +$L__BB1_240: + mov.u32 %r477, 0; + mov.b64 %fd533, {%r477, %r77}; + bra.uni $L__BB1_241; + +$L__BB1_284: + @%p449 bra $L__BB1_288; + + cvt.rzi.f64.f64 %fd437, %fd315; + setp.eq.f64 %p452, %fd437, 0d4000000000000000; + @%p452 bra $L__BB1_288; + + mov.f64 %fd533, 0dFFF8000000000000; + +$L__BB1_288: + add.f64 %fd168, %fd123, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r531}, %fd168; + } + and.b32 %r532, %r531, 2146435072; + setp.ne.s32 %p454, %r532, 2146435072; + mov.f64 %fd543, %fd533; + @%p454 bra $L__BB1_294; + + setp.gtu.f64 %p455, %fd124, 0d7FF0000000000000; + mov.f64 %fd543, %fd168; + @%p455 bra $L__BB1_294; + + setp.eq.s32 %p456, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r533, %temp}, %fd315; + } + setp.eq.s32 %p457, %r533, 0; + and.pred %p458, %p456, %p457; + @%p458 bra $L__BB1_293; + bra.uni $L__BB1_291; + +$L__BB1_293: + setp.lt.s32 %p465, %r74, 0; + mov.u32 %r539, 0; + setp.gt.f64 %p466, %fd124, 0d3FF0000000000000; + selp.b32 %r540, 2146435072, 0, %p466; + xor.b32 %r541, %r540, 2146435072; + selp.b32 %r542, %r541, %r540, %p465; + setp.eq.f32 %p467, %f139, 0fBF800000; + selp.b32 %r543, 1072693248, %r542, %p467; + mov.b64 %fd543, {%r539, %r543}; + bra.uni $L__BB1_294; + +$L__BB1_237: + setp.gt.s32 %p390, %r82, -1; + @%p390 bra $L__BB1_241; + + cvt.rzi.f64.f64 %fd397, %fd315; + setp.eq.f64 %p391, %fd397, 0d4000000000000000; + @%p391 bra $L__BB1_241; + + mov.f64 %fd533, 0dFFF8000000000000; + +$L__BB1_241: + add.f64 %fd128, %fd123, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r478}, %fd128; + } + and.b32 %r479, %r478, 2146435072; + setp.ne.s32 %p392, %r479, 2146435072; + mov.f64 %fd534, %fd533; + @%p392 bra $L__BB1_247; + + setp.gtu.f64 %p393, %fd124, 0d7FF0000000000000; + mov.f64 %fd534, %fd128; + @%p393 bra $L__BB1_247; + + setp.eq.s32 %p394, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r480, %temp}, %fd315; + } + setp.eq.s32 %p395, %r480, 0; + and.pred %p396, %p394, %p395; + @%p396 bra $L__BB1_246; + bra.uni $L__BB1_244; + +$L__BB1_246: + setp.lt.s32 %p400, %r74, 0; + mov.u32 %r484, 0; + setp.gt.f64 %p401, %fd124, 0d3FF0000000000000; + selp.b32 %r485, 2146435072, 0, %p401; + xor.b32 %r486, %r485, 2146435072; + selp.b32 %r487, %r486, %r485, %p400; + setp.eq.f32 %p402, %f139, 0fBF800000; + selp.b32 %r488, 1072693248, %r487, %p402; + mov.b64 %fd534, {%r484, %r488}; + bra.uni $L__BB1_247; + +$L__BB1_291: + { + .reg .b32 %temp; + mov.b64 {%r534, %temp}, %fd123; + } + and.b32 %r535, %r82, 2147483647; + setp.ne.s32 %p459, %r535, 2146435072; + setp.ne.s32 %p460, %r534, 0; + or.pred %p461, %p459, %p460; + mov.f64 %fd543, %fd533; + @%p461 bra $L__BB1_294; + + setp.lt.s32 %p462, %r82, 0; + mov.u32 %r536, 0; + setp.ne.s32 %p463, %r75, 1071644672; + and.pred %p464, %p463, %p462; + or.b32 %r537, %r76, -2147483648; + selp.b32 %r538, %r537, %r76, %p464; + mov.b64 %fd543, {%r536, %r538}; + +$L__BB1_294: + setp.eq.f32 %p468, %f139, 0f3F800000; + selp.f64 %fd440, 0d3FF0000000000000, %fd543, %p468; + mul.f64 %fd441, %fd440, %fd121; + sub.f64 %fd442, %fd122, %fd441; + cvt.f64.f32 %fd443, %f2827; + add.f64 %fd557, %fd442, %fd443; + cvt.f64.f32 %fd173, %f168; + { + .reg .b32 %temp; + mov.b64 {%temp, %r86}, %fd173; + } + abs.f64 %fd174, %fd173; + { // callseq 22, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd174; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd545, [retval0+0]; + } // callseq 22 + setp.gt.s32 %p469, %r86, -1; + @%p469 bra $L__BB1_296; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r544}, %fd545; + } + xor.b32 %r545, %r544, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r546, %temp}, %fd545; + } + mov.b64 %fd545, {%r546, %r545}; + +$L__BB1_296: + setp.eq.f32 %p470, %f168, 0f00000000; + @%p470 bra $L__BB1_300; + bra.uni $L__BB1_297; + +$L__BB1_300: + setp.lt.s32 %p473, %r74, 0; + mov.u32 %r547, 0; + or.b32 %r548, %r86, 2146435072; + selp.b32 %r549, %r548, %r86, %p473; + mov.b64 %fd545, {%r547, %r549}; + bra.uni $L__BB1_301; + +$L__BB1_297: + @%p469 bra $L__BB1_301; + + cvt.rzi.f64.f64 %fd446, %fd315; + setp.eq.f64 %p472, %fd446, 0d4000000000000000; + @%p472 bra $L__BB1_301; + + mov.f64 %fd545, 0dFFF8000000000000; + +$L__BB1_301: + add.f64 %fd180, %fd173, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r550}, %fd180; + } + and.b32 %r551, %r550, 2146435072; + setp.ne.s32 %p474, %r551, 2146435072; + mov.f64 %fd546, %fd545; + @%p474 bra $L__BB1_307; + + setp.gtu.f64 %p475, %fd174, 0d7FF0000000000000; + mov.f64 %fd546, %fd180; + @%p475 bra $L__BB1_307; + + setp.eq.s32 %p476, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r552, %temp}, %fd315; + } + setp.eq.s32 %p477, %r552, 0; + and.pred %p478, %p476, %p477; + @%p478 bra $L__BB1_306; + bra.uni $L__BB1_304; + +$L__BB1_306: + setp.lt.s32 %p485, %r74, 0; + mov.u32 %r558, 0; + setp.gt.f64 %p486, %fd174, 0d3FF0000000000000; + selp.b32 %r559, 2146435072, 0, %p486; + xor.b32 %r560, %r559, 2146435072; + selp.b32 %r561, %r560, %r559, %p485; + setp.eq.f32 %p487, %f168, 0fBF800000; + selp.b32 %r562, 1072693248, %r561, %p487; + mov.b64 %fd546, {%r558, %r562}; + bra.uni $L__BB1_307; + +$L__BB1_244: + { + .reg .b32 %temp; + mov.b64 {%r481, %temp}, %fd123; + } + and.b32 %r482, %r82, 2147483647; + setp.ne.s32 %p397, %r482, 2146435072; + setp.ne.s32 %p398, %r481, 0; + or.pred %p399, %p397, %p398; + mov.f64 %fd534, %fd533; + @%p399 bra $L__BB1_247; + + mov.u32 %r483, 0; + mov.b64 %fd534, {%r483, %r76}; + +$L__BB1_247: + setp.eq.f32 %p403, %f139, 0f3F800000; + selp.f64 %fd400, 0d3FF0000000000000, %fd534, %p403; + mul.f64 %fd401, %fd400, %fd121; + sub.f64 %fd402, %fd122, %fd401; + cvt.f64.f32 %fd403, %f2827; + add.f64 %fd557, %fd402, %fd403; + cvt.f64.f32 %fd133, %f168; + { + .reg .b32 %temp; + mov.b64 {%temp, %r83}, %fd133; + } + abs.f64 %fd134, %fd133; + setp.eq.f32 %p404, %f168, 0f00000000; + @%p404 bra $L__BB1_251; + bra.uni $L__BB1_248; + +$L__BB1_251: + mov.u32 %r489, 0; + mov.b64 %fd535, {%r489, %r77}; + bra.uni $L__BB1_252; + +$L__BB1_248: + { // callseq 19, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd134; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd535, [retval0+0]; + } // callseq 19 + setp.gt.s32 %p405, %r83, -1; + @%p405 bra $L__BB1_252; + + cvt.rzi.f64.f64 %fd406, %fd315; + setp.eq.f64 %p406, %fd406, 0d4000000000000000; + @%p406 bra $L__BB1_252; + + mov.f64 %fd535, 0dFFF8000000000000; + +$L__BB1_252: + add.f64 %fd138, %fd133, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r490}, %fd138; + } + and.b32 %r491, %r490, 2146435072; + setp.ne.s32 %p407, %r491, 2146435072; + mov.f64 %fd536, %fd535; + @%p407 bra $L__BB1_258; + + setp.gtu.f64 %p408, %fd134, 0d7FF0000000000000; + mov.f64 %fd536, %fd138; + @%p408 bra $L__BB1_258; + + setp.eq.s32 %p409, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r492, %temp}, %fd315; + } + setp.eq.s32 %p410, %r492, 0; + and.pred %p411, %p409, %p410; + @%p411 bra $L__BB1_257; + bra.uni $L__BB1_255; + +$L__BB1_257: + setp.lt.s32 %p415, %r74, 0; + mov.u32 %r496, 0; + setp.gt.f64 %p416, %fd134, 0d3FF0000000000000; + selp.b32 %r497, 2146435072, 0, %p416; + xor.b32 %r498, %r497, 2146435072; + selp.b32 %r499, %r498, %r497, %p415; + setp.eq.f32 %p417, %f168, 0fBF800000; + selp.b32 %r500, 1072693248, %r499, %p417; + mov.b64 %fd536, {%r496, %r500}; + bra.uni $L__BB1_258; + +$L__BB1_304: + { + .reg .b32 %temp; + mov.b64 {%r553, %temp}, %fd173; + } + and.b32 %r554, %r86, 2147483647; + setp.ne.s32 %p479, %r554, 2146435072; + setp.ne.s32 %p480, %r553, 0; + or.pred %p481, %p479, %p480; + mov.f64 %fd546, %fd545; + @%p481 bra $L__BB1_307; + + setp.lt.s32 %p482, %r86, 0; + mov.u32 %r555, 0; + setp.ne.s32 %p483, %r75, 1071644672; + and.pred %p484, %p483, %p482; + or.b32 %r556, %r76, -2147483648; + selp.b32 %r557, %r556, %r76, %p484; + mov.b64 %fd546, {%r555, %r557}; + +$L__BB1_307: + setp.eq.f32 %p488, %f168, 0f3F800000; + selp.f64 %fd449, 0d3FF0000000000000, %fd546, %p488; + mul.f64 %fd450, %fd449, %fd121; + mul.f32 %f1448, %f219, %f169; + cvt.f64.f32 %fd451, %f1448; + sub.f64 %fd452, %fd451, %fd450; + cvt.f64.f32 %fd453, %f2826; + add.f64 %fd556, %fd452, %fd453; + cvt.f64.f32 %fd185, %f214; + { + .reg .b32 %temp; + mov.b64 {%temp, %r87}, %fd185; + } + abs.f64 %fd186, %fd185; + { // callseq 23, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd186; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd548, [retval0+0]; + } // callseq 23 + setp.gt.s32 %p489, %r87, -1; + @%p489 bra $L__BB1_309; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r563}, %fd548; + } + xor.b32 %r564, %r563, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r565, %temp}, %fd548; + } + mov.b64 %fd548, {%r565, %r564}; + +$L__BB1_309: + setp.eq.f32 %p490, %f214, 0f00000000; + @%p490 bra $L__BB1_313; + bra.uni $L__BB1_310; + +$L__BB1_313: + setp.lt.s32 %p493, %r74, 0; + mov.u32 %r566, 0; + or.b32 %r567, %r87, 2146435072; + selp.b32 %r568, %r567, %r87, %p493; + mov.b64 %fd548, {%r566, %r568}; + bra.uni $L__BB1_314; + +$L__BB1_310: + @%p489 bra $L__BB1_314; + + cvt.rzi.f64.f64 %fd456, %fd315; + setp.eq.f64 %p492, %fd456, 0d4000000000000000; + @%p492 bra $L__BB1_314; + + mov.f64 %fd548, 0dFFF8000000000000; + +$L__BB1_314: + add.f64 %fd192, %fd185, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r569}, %fd192; + } + and.b32 %r570, %r569, 2146435072; + setp.ne.s32 %p494, %r570, 2146435072; + mov.f64 %fd549, %fd548; + @%p494 bra $L__BB1_320; + + setp.gtu.f64 %p495, %fd186, 0d7FF0000000000000; + mov.f64 %fd549, %fd192; + @%p495 bra $L__BB1_320; + + setp.eq.s32 %p496, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r571, %temp}, %fd315; + } + setp.eq.s32 %p497, %r571, 0; + and.pred %p498, %p496, %p497; + @%p498 bra $L__BB1_319; + bra.uni $L__BB1_317; + +$L__BB1_319: + setp.lt.s32 %p505, %r74, 0; + mov.u32 %r577, 0; + setp.gt.f64 %p506, %fd186, 0d3FF0000000000000; + selp.b32 %r578, 2146435072, 0, %p506; + xor.b32 %r579, %r578, 2146435072; + selp.b32 %r580, %r579, %r578, %p505; + setp.eq.f32 %p507, %f214, 0fBF800000; + selp.b32 %r581, 1072693248, %r580, %p507; + mov.b64 %fd549, {%r577, %r581}; + bra.uni $L__BB1_320; + +$L__BB1_255: + { + .reg .b32 %temp; + mov.b64 {%r493, %temp}, %fd133; + } + and.b32 %r494, %r83, 2147483647; + setp.ne.s32 %p412, %r494, 2146435072; + setp.ne.s32 %p413, %r493, 0; + or.pred %p414, %p412, %p413; + mov.f64 %fd536, %fd535; + @%p414 bra $L__BB1_258; + + mov.u32 %r495, 0; + mov.b64 %fd536, {%r495, %r76}; + +$L__BB1_258: + setp.eq.f32 %p418, %f168, 0f3F800000; + selp.f64 %fd409, 0d3FF0000000000000, %fd536, %p418; + mul.f64 %fd410, %fd409, %fd121; + mul.f32 %f1445, %f219, %f169; + cvt.f64.f32 %fd411, %f1445; + sub.f64 %fd412, %fd411, %fd410; + cvt.f64.f32 %fd413, %f2826; + add.f64 %fd556, %fd412, %fd413; + cvt.f64.f32 %fd143, %f214; + { + .reg .b32 %temp; + mov.b64 {%temp, %r84}, %fd143; + } + abs.f64 %fd144, %fd143; + setp.eq.f32 %p419, %f214, 0f00000000; + @%p419 bra $L__BB1_262; + bra.uni $L__BB1_259; + +$L__BB1_262: + mov.u32 %r501, 0; + mov.b64 %fd537, {%r501, %r77}; + bra.uni $L__BB1_263; + +$L__BB1_259: + { // callseq 20, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd144; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd537, [retval0+0]; + } // callseq 20 + setp.gt.s32 %p420, %r84, -1; + @%p420 bra $L__BB1_263; + + cvt.rzi.f64.f64 %fd416, %fd315; + setp.eq.f64 %p421, %fd416, 0d4000000000000000; + @%p421 bra $L__BB1_263; + + mov.f64 %fd537, 0dFFF8000000000000; + +$L__BB1_263: + add.f64 %fd148, %fd143, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r502}, %fd148; + } + and.b32 %r503, %r502, 2146435072; + setp.ne.s32 %p422, %r503, 2146435072; + mov.f64 %fd538, %fd537; + @%p422 bra $L__BB1_269; + + setp.gtu.f64 %p423, %fd144, 0d7FF0000000000000; + mov.f64 %fd538, %fd148; + @%p423 bra $L__BB1_269; + + setp.eq.s32 %p424, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r504, %temp}, %fd315; + } + setp.eq.s32 %p425, %r504, 0; + and.pred %p426, %p424, %p425; + @%p426 bra $L__BB1_268; + bra.uni $L__BB1_266; + +$L__BB1_268: + setp.lt.s32 %p430, %r74, 0; + mov.u32 %r508, 0; + setp.gt.f64 %p431, %fd144, 0d3FF0000000000000; + selp.b32 %r509, 2146435072, 0, %p431; + xor.b32 %r510, %r509, 2146435072; + selp.b32 %r511, %r510, %r509, %p430; + setp.eq.f32 %p432, %f214, 0fBF800000; + selp.b32 %r512, 1072693248, %r511, %p432; + mov.b64 %fd538, {%r508, %r512}; + bra.uni $L__BB1_269; + +$L__BB1_317: + { + .reg .b32 %temp; + mov.b64 {%r572, %temp}, %fd185; + } + and.b32 %r573, %r87, 2147483647; + setp.ne.s32 %p499, %r573, 2146435072; + setp.ne.s32 %p500, %r572, 0; + or.pred %p501, %p499, %p500; + mov.f64 %fd549, %fd548; + @%p501 bra $L__BB1_320; + + setp.lt.s32 %p502, %r87, 0; + mov.u32 %r574, 0; + setp.ne.s32 %p503, %r75, 1071644672; + and.pred %p504, %p503, %p502; + or.b32 %r575, %r76, -2147483648; + selp.b32 %r576, %r575, %r76, %p504; + mov.b64 %fd549, {%r574, %r576}; + +$L__BB1_320: + mul.f32 %f1449, %f219, 0f00000000; + cvt.f64.f32 %fd459, %f1449; + setp.eq.f32 %p508, %f214, 0f3F800000; + selp.f64 %fd460, 0d3FF0000000000000, %fd549, %p508; + mul.f64 %fd461, %fd460, %fd121; + sub.f64 %fd462, %fd459, %fd461; + cvt.f64.f32 %fd463, %f2825; + add.f64 %fd555, %fd462, %fd463; + cvt.f64.f32 %fd464, %f2824; + sub.f64 %fd465, %fd459, %fd121; + add.f64 %fd554, %fd465, %fd464; + cvt.f64.f32 %fd198, %f212; + { + .reg .b32 %temp; + mov.b64 {%temp, %r88}, %fd198; + } + abs.f64 %fd199, %fd198; + { // callseq 24, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd199; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd551, [retval0+0]; + } // callseq 24 + setp.gt.s32 %p509, %r88, -1; + @%p509 bra $L__BB1_322; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r582}, %fd551; + } + xor.b32 %r583, %r582, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r584, %temp}, %fd551; + } + mov.b64 %fd551, {%r584, %r583}; + +$L__BB1_322: + setp.eq.f32 %p510, %f212, 0f00000000; + @%p510 bra $L__BB1_326; + bra.uni $L__BB1_323; + +$L__BB1_326: + setp.lt.s32 %p513, %r74, 0; + mov.u32 %r585, 0; + or.b32 %r586, %r88, 2146435072; + selp.b32 %r587, %r586, %r88, %p513; + mov.b64 %fd551, {%r585, %r587}; + bra.uni $L__BB1_327; + +$L__BB1_323: + @%p509 bra $L__BB1_327; + + cvt.rzi.f64.f64 %fd468, %fd315; + setp.eq.f64 %p512, %fd468, 0d4000000000000000; + @%p512 bra $L__BB1_327; + + mov.f64 %fd551, 0dFFF8000000000000; + +$L__BB1_327: + add.f64 %fd205, %fd198, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r588}, %fd205; + } + and.b32 %r589, %r588, 2146435072; + setp.ne.s32 %p514, %r589, 2146435072; + mov.f64 %fd552, %fd551; + @%p514 bra $L__BB1_333; + + setp.gtu.f64 %p515, %fd199, 0d7FF0000000000000; + mov.f64 %fd552, %fd205; + @%p515 bra $L__BB1_333; + + setp.eq.s32 %p516, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r590, %temp}, %fd315; + } + setp.eq.s32 %p517, %r590, 0; + and.pred %p518, %p516, %p517; + @%p518 bra $L__BB1_332; + bra.uni $L__BB1_330; + +$L__BB1_332: + setp.lt.s32 %p525, %r74, 0; + mov.u32 %r596, 0; + setp.gt.f64 %p526, %fd199, 0d3FF0000000000000; + selp.b32 %r597, 2146435072, 0, %p526; + xor.b32 %r598, %r597, 2146435072; + selp.b32 %r599, %r598, %r597, %p525; + setp.eq.f32 %p527, %f212, 0fBF800000; + selp.b32 %r600, 1072693248, %r599, %p527; + mov.b64 %fd552, {%r596, %r600}; + bra.uni $L__BB1_333; + +$L__BB1_266: + { + .reg .b32 %temp; + mov.b64 {%r505, %temp}, %fd143; + } + and.b32 %r506, %r84, 2147483647; + setp.ne.s32 %p427, %r506, 2146435072; + setp.ne.s32 %p428, %r505, 0; + or.pred %p429, %p427, %p428; + mov.f64 %fd538, %fd537; + @%p429 bra $L__BB1_269; + + mov.u32 %r507, 0; + mov.b64 %fd538, {%r507, %r76}; + +$L__BB1_269: + mul.f32 %f1446, %f219, 0f00000000; + cvt.f64.f32 %fd419, %f1446; + setp.eq.f32 %p433, %f214, 0f3F800000; + selp.f64 %fd420, 0d3FF0000000000000, %fd538, %p433; + mul.f64 %fd421, %fd420, %fd121; + sub.f64 %fd422, %fd419, %fd421; + cvt.f64.f32 %fd423, %f2825; + add.f64 %fd555, %fd422, %fd423; + cvt.f64.f32 %fd424, %f2824; + sub.f64 %fd425, %fd419, %fd121; + add.f64 %fd554, %fd425, %fd424; + cvt.f64.f32 %fd154, %f212; + { + .reg .b32 %temp; + mov.b64 {%temp, %r85}, %fd154; + } + abs.f64 %fd155, %fd154; + setp.eq.f32 %p434, %f212, 0f00000000; + @%p434 bra $L__BB1_273; + bra.uni $L__BB1_270; + +$L__BB1_273: + mov.u32 %r513, 0; + mov.b64 %fd539, {%r513, %r77}; + bra.uni $L__BB1_274; + +$L__BB1_270: + { // callseq 21, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd155; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd539, [retval0+0]; + } // callseq 21 + setp.gt.s32 %p435, %r85, -1; + @%p435 bra $L__BB1_274; + + cvt.rzi.f64.f64 %fd428, %fd315; + setp.eq.f64 %p436, %fd428, 0d4000000000000000; + @%p436 bra $L__BB1_274; + + mov.f64 %fd539, 0dFFF8000000000000; + +$L__BB1_274: + add.f64 %fd159, %fd154, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r514}, %fd159; + } + and.b32 %r515, %r514, 2146435072; + setp.ne.s32 %p437, %r515, 2146435072; + mov.f64 %fd540, %fd539; + @%p437 bra $L__BB1_280; + + setp.gtu.f64 %p438, %fd155, 0d7FF0000000000000; + mov.f64 %fd540, %fd159; + @%p438 bra $L__BB1_280; + + setp.eq.s32 %p439, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r516, %temp}, %fd315; + } + setp.eq.s32 %p440, %r516, 0; + and.pred %p441, %p439, %p440; + @%p441 bra $L__BB1_279; + bra.uni $L__BB1_277; + +$L__BB1_279: + setp.lt.s32 %p445, %r74, 0; + mov.u32 %r520, 0; + setp.gt.f64 %p446, %fd155, 0d3FF0000000000000; + selp.b32 %r521, 2146435072, 0, %p446; + xor.b32 %r522, %r521, 2146435072; + selp.b32 %r523, %r522, %r521, %p445; + setp.eq.f32 %p447, %f212, 0fBF800000; + selp.b32 %r524, 1072693248, %r523, %p447; + mov.b64 %fd540, {%r520, %r524}; + bra.uni $L__BB1_280; + +$L__BB1_330: + { + .reg .b32 %temp; + mov.b64 {%r591, %temp}, %fd198; + } + and.b32 %r592, %r88, 2147483647; + setp.ne.s32 %p519, %r592, 2146435072; + setp.ne.s32 %p520, %r591, 0; + or.pred %p521, %p519, %p520; + mov.f64 %fd552, %fd551; + @%p521 bra $L__BB1_333; + + setp.lt.s32 %p522, %r88, 0; + mov.u32 %r593, 0; + setp.ne.s32 %p523, %r75, 1071644672; + and.pred %p524, %p523, %p522; + or.b32 %r594, %r76, -2147483648; + selp.b32 %r595, %r594, %r76, %p524; + mov.b64 %fd552, {%r593, %r595}; + +$L__BB1_333: + setp.eq.f32 %p528, %f212, 0f3F800000; + selp.f64 %fd471, 0d3FF0000000000000, %fd552, %p528; + mul.f64 %fd472, %fd471, %fd121; + mul.f32 %f1450, %f219, %f213; + cvt.f64.f32 %fd473, %f1450; + sub.f64 %fd474, %fd473, %fd472; + cvt.f64.f32 %fd475, %f2823; + add.f64 %fd553, %fd474, %fd475; + bra.uni $L__BB1_334; + +$L__BB1_277: + { + .reg .b32 %temp; + mov.b64 {%r517, %temp}, %fd154; + } + and.b32 %r518, %r85, 2147483647; + setp.ne.s32 %p442, %r518, 2146435072; + setp.ne.s32 %p443, %r517, 0; + or.pred %p444, %p442, %p443; + mov.f64 %fd540, %fd539; + @%p444 bra $L__BB1_280; + + mov.u32 %r519, 0; + mov.b64 %fd540, {%r519, %r76}; + +$L__BB1_280: + setp.eq.f32 %p448, %f212, 0f3F800000; + selp.f64 %fd431, 0d3FF0000000000000, %fd540, %p448; + mul.f64 %fd432, %fd431, %fd121; + mul.f32 %f1447, %f219, %f213; + cvt.f64.f32 %fd433, %f1447; + sub.f64 %fd434, %fd433, %fd432; + cvt.f64.f32 %fd435, %f2823; + add.f64 %fd553, %fd434, %fd435; + +$L__BB1_334: + cvt.rn.f32.f64 %f2827, %fd557; + cvt.rn.f32.f64 %f2826, %fd556; + cvt.rn.f32.f64 %f2825, %fd555; + cvt.rn.f32.f64 %f2824, %fd554; + cvt.rn.f32.f64 %f2823, %fd553; + fma.rn.f32 %f2821, %f219, %f168, %f2821; + fma.rn.f32 %f2820, %f219, %f214, %f2820; + add.f32 %f2819, %f2819, %f219; + fma.rn.f32 %f2818, %f219, %f212, %f2818; + add.s32 %r782, %r782, 1; + setp.lt.s32 %p529, %r782, %r102; + @%p529 bra $L__BB1_56; + + add.s32 %r781, %r781, 1; + setp.lt.s32 %p530, %r781, %r102; + @%p530 bra $L__BB1_55; + +$L__BB1_336: + ld.param.u32 %r759, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_3]; + div.rn.f32 %f1451, %f2822, %f2827; + mov.f32 %f1452, 0fBF800000; + max.f32 %f1453, %f1451, %f1452; + mov.f32 %f1454, 0f3F800000; + min.f32 %f1455, %f1453, %f1454; + sub.f32 %f2868, %f2868, %f1455; + div.rn.f32 %f1456, %f2821, %f2826; + max.f32 %f1457, %f1456, %f1452; + min.f32 %f1458, %f1457, %f1454; + sub.f32 %f2867, %f2867, %f1458; + neg.f32 %f1459, %f2866; + div.rn.f32 %f1460, %f2820, %f2825; + max.f32 %f1461, %f1460, %f1459; + min.f32 %f1462, %f1461, %f2866; + sub.f32 %f1463, %f2866, %f1462; + neg.f32 %f1464, %f2865; + div.rn.f32 %f1465, %f2819, %f2824; + max.f32 %f1466, %f1465, %f1464; + min.f32 %f1467, %f1466, %f2865; + sub.f32 %f1468, %f2865, %f1467; + neg.f32 %f1469, %f2864; + div.rn.f32 %f1470, %f2818, %f2823; + max.f32 %f1471, %f1470, %f1469; + min.f32 %f1472, %f1471, %f2864; + sub.f32 %f1473, %f2864, %f1472; + max.f32 %f2866, %f1463, %f1454; + mov.f32 %f1474, 0f3C23D70A; + max.f32 %f2865, %f1468, %f1474; + mov.f32 %f1475, 0f3F000000; + max.f32 %f1476, %f1473, %f1475; + min.f32 %f2864, %f1476, %f51; + add.s32 %r780, %r780, 1; + setp.lt.s32 %p531, %r780, %r759; + @%p531 bra $L__BB1_53; + +$L__BB1_337: + mov.f32 %f1492, 0f00000000; + mov.f32 %f2885, %f1492; + mov.f32 %f2886, %f1492; + mov.f32 %f2887, %f1492; + mov.f32 %f2890, %f1492; + mov.f32 %f2894, %f1492; + mov.f32 %f2888, %f1492; + mov.f32 %f2889, %f1492; + mov.f32 %f2891, %f1492; + mov.f32 %f2895, %f1492; + mov.f32 %f2892, %f1492; + mov.f32 %f2893, %f1492; + mov.f32 %f2896, %f1492; + mov.f32 %f2897, %f1492; + mov.f32 %f2898, %f1492; + mov.f32 %f2899, %f1492; + mov.f32 %f2927, %f1492; + @%p40 bra $L__BB1_426; + + mov.f32 %f1509, 0f3F000000; + div.rn.f32 %f1510, %f1509, %f2864; + div.rn.f32 %f1511, %f1510, %f2864; + div.rn.f32 %f1512, %f2866, 0fC0206C98; + div.rn.f32 %f250, %f1512, %f2864; + div.rn.f32 %f251, %f250, %f2864; + sqrt.rn.f32 %f252, %f1511; + mov.f32 %f1513, 0f3F800000; + cvt.rzi.f32.f32 %f1514, %f1513; + add.f32 %f1515, %f1514, %f1514; + mov.f32 %f1516, 0f40000000; + sub.f32 %f1517, %f1516, %f1515; + abs.f32 %f253, %f1517; + mov.u32 %r601, 0; + setp.eq.f32 %p540, %f253, 0f3F800000; + mov.u32 %r783, %r601; + +$L__BB1_339: + cvt.rn.f32.s32 %f1518, %r783; + sub.f32 %f270, %f1518, %f2868; + add.f32 %f1519, %f270, 0f3F000000; + mul.f32 %f1520, %f1519, %f252; + abs.f32 %f271, %f1520; + setp.ge.f32 %p533, %f271, 0f3F8060FE; + mul.f32 %f1521, %f1520, %f1520; + selp.f32 %f1522, %f271, %f1521, %p533; + selp.f32 %f1523, 0f3789CA3C, 0f38B1E96A, %p533; + selp.f32 %f1524, 0fB9F560B9, 0fBA574D20, %p533; + fma.rn.f32 %f1525, %f1523, %f1522, %f1524; + selp.f32 %f1526, 0f3BAC840B, 0f3BAAD5EA, %p533; + fma.rn.f32 %f1527, %f1525, %f1522, %f1526; + selp.f32 %f1528, 0fBD0C8162, 0fBCDC1BE7, %p533; + fma.rn.f32 %f1529, %f1527, %f1522, %f1528; + selp.f32 %f1530, 0f3E1CF906, 0f3DE718AF, %p533; + fma.rn.f32 %f1531, %f1529, %f1522, %f1530; + selp.f32 %f1532, 0f3F6A937E, 0fBEC093AC, %p533; + fma.rn.f32 %f1533, %f1531, %f1522, %f1532; + selp.f32 %f1534, 0f3F20D842, 0f3E0375D3, %p533; + fma.rn.f32 %f1535, %f1533, %f1522, %f1534; + neg.f32 %f1536, %f271; + selp.f32 %f1537, %f1536, %f1520, %p533; + fma.rn.f32 %f272, %f1535, %f1537, %f1537; + mov.b32 %r603, %f1520; + and.b32 %r93, %r603, -2147483648; + add.f32 %f1538, %f270, 0fBF000000; + mul.f32 %f1539, %f1538, %f252; + abs.f32 %f273, %f1539; + setp.ge.f32 %p534, %f273, 0f3F8060FE; + mul.f32 %f1540, %f1539, %f1539; + selp.f32 %f1541, %f273, %f1540, %p534; + selp.f32 %f1542, 0f3789CA3C, 0f38B1E96A, %p534; + selp.f32 %f1543, 0fB9F560B9, 0fBA574D20, %p534; + fma.rn.f32 %f1544, %f1542, %f1541, %f1543; + selp.f32 %f1545, 0f3BAC840B, 0f3BAAD5EA, %p534; + fma.rn.f32 %f1546, %f1544, %f1541, %f1545; + selp.f32 %f1547, 0fBD0C8162, 0fBCDC1BE7, %p534; + fma.rn.f32 %f1548, %f1546, %f1541, %f1547; + selp.f32 %f1549, 0f3E1CF906, 0f3DE718AF, %p534; + fma.rn.f32 %f1550, %f1548, %f1541, %f1549; + selp.f32 %f1551, 0f3F6A937E, 0fBEC093AC, %p534; + fma.rn.f32 %f1552, %f1550, %f1541, %f1551; + selp.f32 %f1553, 0f3F20D842, 0f3E0375D3, %p534; + fma.rn.f32 %f1554, %f1552, %f1541, %f1553; + neg.f32 %f1555, %f273; + selp.f32 %f1556, %f1555, %f1539, %p534; + fma.rn.f32 %f274, %f1554, %f1556, %f1556; + mov.b32 %r604, %f1539; + and.b32 %r94, %r604, -2147483648; + add.f32 %f1557, %f1518, 0f3F000000; + sub.f32 %f1558, %f1557, %f2868; + div.rn.f32 %f275, %f1558, %f2864; + abs.f32 %f276, %f275; + setp.lt.f32 %p535, %f276, 0f00800000; + mul.f32 %f1559, %f276, 0f4B800000; + selp.f32 %f1560, %f1559, %f276, %p535; + selp.f32 %f1561, 0fC3170000, 0fC2FE0000, %p535; + mov.b32 %r605, %f1560; + and.b32 %r606, %r605, 8388607; + or.b32 %r607, %r606, 1065353216; + mov.b32 %f1562, %r607; + shr.u32 %r608, %r605, 23; + cvt.rn.f32.u32 %f1563, %r608; + add.f32 %f1564, %f1561, %f1563; + setp.gt.f32 %p536, %f1562, 0f3FB504F3; + mul.f32 %f1565, %f1562, 0f3F000000; + add.f32 %f1566, %f1564, 0f3F800000; + selp.f32 %f1567, %f1566, %f1564, %p536; + selp.f32 %f1568, %f1565, %f1562, %p536; + add.f32 %f1569, %f1568, 0fBF800000; + add.f32 %f1570, %f1568, 0f3F800000; + rcp.approx.ftz.f32 %f1571, %f1570; + add.f32 %f1572, %f1569, %f1569; + mul.f32 %f1574, %f1572, %f1571; + mul.f32 %f1575, %f1574, %f1574; + mov.f32 %f1576, 0f3C4CAF63; + mov.f32 %f1577, 0f3B18F0FE; + fma.rn.f32 %f1578, %f1577, %f1575, %f1576; + mov.f32 %f1579, 0f3DAAAABD; + fma.rn.f32 %f1580, %f1578, %f1575, %f1579; + mul.rn.f32 %f1581, %f1580, %f1575; + mul.rn.f32 %f1582, %f1581, %f1574; + sub.f32 %f1583, %f1569, %f1574; + add.f32 %f1584, %f1583, %f1583; + neg.f32 %f1585, %f1574; + fma.rn.f32 %f1586, %f1585, %f1569, %f1584; + mul.rn.f32 %f1587, %f1571, %f1586; + add.f32 %f1588, %f1582, %f1574; + sub.f32 %f1589, %f1574, %f1588; + add.f32 %f1590, %f1582, %f1589; + add.f32 %f1591, %f1587, %f1590; + add.f32 %f1592, %f1588, %f1591; + sub.f32 %f1593, %f1588, %f1592; + add.f32 %f1594, %f1591, %f1593; + mov.f32 %f1595, 0f3F317200; + mul.rn.f32 %f1596, %f1567, %f1595; + mov.f32 %f1597, 0f35BFBE8E; + mul.rn.f32 %f1598, %f1567, %f1597; + add.f32 %f1599, %f1596, %f1592; + sub.f32 %f1600, %f1596, %f1599; + add.f32 %f1601, %f1592, %f1600; + add.f32 %f1602, %f1594, %f1601; + add.f32 %f1603, %f1598, %f1602; + add.f32 %f1604, %f1599, %f1603; + sub.f32 %f1605, %f1599, %f1604; + add.f32 %f1606, %f1603, %f1605; + mul.rn.f32 %f1607, %f1516, %f1604; + neg.f32 %f1608, %f1607; + fma.rn.f32 %f1609, %f1516, %f1604, %f1608; + fma.rn.f32 %f1610, %f1516, %f1606, %f1609; + fma.rn.f32 %f1612, %f1492, %f1604, %f1610; + add.rn.f32 %f1613, %f1607, %f1612; + neg.f32 %f1614, %f1613; + add.rn.f32 %f1615, %f1607, %f1614; + add.rn.f32 %f1616, %f1615, %f1612; + mov.b32 %r609, %f1613; + setp.eq.s32 %p537, %r609, 1118925336; + add.s32 %r610, %r609, -1; + mov.b32 %f1617, %r610; + add.f32 %f1618, %f1616, 0f37000000; + selp.f32 %f277, %f1618, %f1616, %p537; + selp.f32 %f1619, %f1617, %f1613, %p537; + mov.f32 %f1620, 0f3FB8AA3B; + mul.rn.f32 %f1621, %f1619, %f1620; + cvt.rzi.f32.f32 %f1622, %f1621; + abs.f32 %f1623, %f1622; + setp.gt.f32 %p538, %f1623, 0f42FC0000; + mov.b32 %r611, %f1622; + and.b32 %r612, %r611, -2147483648; + or.b32 %r613, %r612, 1123811328; + mov.b32 %f1624, %r613; + selp.f32 %f1625, %f1624, %f1622, %p538; + mov.f32 %f1626, 0fBF317218; + fma.rn.f32 %f1627, %f1625, %f1626, %f1619; + mov.f32 %f1628, 0f3102E308; + fma.rn.f32 %f1629, %f1625, %f1628, %f1627; + mul.f32 %f1630, %f1629, 0f3FB8AA3B; + add.f32 %f1631, %f1625, 0f4B40007F; + mov.b32 %r614, %f1631; + shl.b32 %r615, %r614, 23; + mov.b32 %f1632, %r615; + ex2.approx.ftz.f32 %f1633, %f1630; + mul.f32 %f278, %f1633, %f1632; + setp.lt.f32 %p539, %f275, 0f00000000; + and.pred %p27, %p539, %p540; + add.f32 %f1634, %f275, %f275; + selp.f32 %f279, %f1634, 0f00000000, %p540; + add.f32 %f1635, %f276, 0f40000000; + mov.b32 %r95, %f1635; + div.rn.f32 %f280, %f1538, %f2864; + abs.f32 %f281, %f280; + setp.lt.f32 %p541, %f281, 0f00800000; + mul.f32 %f1636, %f281, 0f4B800000; + selp.f32 %f1637, %f1636, %f281, %p541; + selp.f32 %f1638, 0fC3170000, 0fC2FE0000, %p541; + mov.b32 %r616, %f1637; + and.b32 %r617, %r616, 8388607; + or.b32 %r618, %r617, 1065353216; + mov.b32 %f1639, %r618; + shr.u32 %r619, %r616, 23; + cvt.rn.f32.u32 %f1640, %r619; + add.f32 %f1641, %f1638, %f1640; + setp.gt.f32 %p542, %f1639, 0f3FB504F3; + mul.f32 %f1642, %f1639, 0f3F000000; + add.f32 %f1643, %f1641, 0f3F800000; + selp.f32 %f1644, %f1643, %f1641, %p542; + selp.f32 %f1645, %f1642, %f1639, %p542; + add.f32 %f1646, %f1645, 0fBF800000; + add.f32 %f1647, %f1645, 0f3F800000; + rcp.approx.ftz.f32 %f1648, %f1647; + add.f32 %f1649, %f1646, %f1646; + mul.f32 %f1650, %f1649, %f1648; + mul.f32 %f1651, %f1650, %f1650; + fma.rn.f32 %f1652, %f1577, %f1651, %f1576; + fma.rn.f32 %f1653, %f1652, %f1651, %f1579; + mul.rn.f32 %f1654, %f1653, %f1651; + mul.rn.f32 %f1655, %f1654, %f1650; + sub.f32 %f1656, %f1646, %f1650; + add.f32 %f1657, %f1656, %f1656; + neg.f32 %f1658, %f1650; + fma.rn.f32 %f1659, %f1658, %f1646, %f1657; + mul.rn.f32 %f1660, %f1648, %f1659; + add.f32 %f1661, %f1655, %f1650; + sub.f32 %f1662, %f1650, %f1661; + add.f32 %f1663, %f1655, %f1662; + add.f32 %f1664, %f1660, %f1663; + add.f32 %f1665, %f1661, %f1664; + sub.f32 %f1666, %f1661, %f1665; + add.f32 %f1667, %f1664, %f1666; + mul.rn.f32 %f1668, %f1644, %f1595; + mul.rn.f32 %f1669, %f1644, %f1597; + add.f32 %f1670, %f1668, %f1665; + sub.f32 %f1671, %f1668, %f1670; + add.f32 %f1672, %f1665, %f1671; + add.f32 %f1673, %f1667, %f1672; + add.f32 %f1674, %f1669, %f1673; + add.f32 %f1675, %f1670, %f1674; + sub.f32 %f1676, %f1670, %f1675; + add.f32 %f1677, %f1674, %f1676; + mul.rn.f32 %f1678, %f1516, %f1675; + neg.f32 %f1679, %f1678; + fma.rn.f32 %f1680, %f1516, %f1675, %f1679; + fma.rn.f32 %f1681, %f1516, %f1677, %f1680; + fma.rn.f32 %f1682, %f1492, %f1675, %f1681; + add.rn.f32 %f1683, %f1678, %f1682; + neg.f32 %f1684, %f1683; + add.rn.f32 %f1685, %f1678, %f1684; + add.rn.f32 %f1686, %f1685, %f1682; + mov.b32 %r620, %f1683; + setp.eq.s32 %p543, %r620, 1118925336; + add.s32 %r621, %r620, -1; + mov.b32 %f1687, %r621; + add.f32 %f1688, %f1686, 0f37000000; + selp.f32 %f282, %f1688, %f1686, %p543; + selp.f32 %f1689, %f1687, %f1683, %p543; + mul.rn.f32 %f1690, %f1689, %f1620; + cvt.rzi.f32.f32 %f1691, %f1690; + abs.f32 %f1692, %f1691; + setp.gt.f32 %p544, %f1692, 0f42FC0000; + mov.b32 %r622, %f1691; + and.b32 %r623, %r622, -2147483648; + or.b32 %r624, %r623, 1123811328; + mov.b32 %f1693, %r624; + selp.f32 %f1694, %f1693, %f1691, %p544; + fma.rn.f32 %f1695, %f1694, %f1626, %f1689; + fma.rn.f32 %f1696, %f1694, %f1628, %f1695; + mul.f32 %f1697, %f1696, 0f3FB8AA3B; + add.f32 %f1698, %f1694, 0f4B40007F; + mov.b32 %r625, %f1698; + shl.b32 %r626, %r625, 23; + mov.b32 %f1699, %r626; + ex2.approx.ftz.f32 %f1700, %f1697; + mul.f32 %f283, %f1700, %f1699; + add.f32 %f284, %f275, 0f40000000; + setp.lt.f32 %p545, %f280, 0f00000000; + and.pred %p28, %p545, %p540; + selp.f32 %f285, 0fFF800000, 0f7F800000, %p27; + add.f32 %f1701, %f280, %f280; + selp.f32 %f286, %f1701, 0f00000000, %p540; + add.f32 %f1702, %f281, 0f40000000; + mov.b32 %r96, %f1702; + add.f32 %f287, %f280, 0f40000000; + selp.f32 %f288, 0fFF800000, 0f7F800000, %p28; + add.f32 %f1703, %f1518, 0f3F800000; + sub.f32 %f1704, %f1703, %f2868; + div.rn.f32 %f289, %f1704, %f2864; + abs.f32 %f290, %f289; + setp.lt.f32 %p546, %f290, 0f00800000; + mul.f32 %f1705, %f290, 0f4B800000; + selp.f32 %f1706, %f1705, %f290, %p546; + selp.f32 %f1707, 0fC3170000, 0fC2FE0000, %p546; + mov.b32 %r627, %f1706; + and.b32 %r628, %r627, 8388607; + or.b32 %r629, %r628, 1065353216; + mov.b32 %f1708, %r629; + shr.u32 %r630, %r627, 23; + cvt.rn.f32.u32 %f1709, %r630; + add.f32 %f1710, %f1707, %f1709; + setp.gt.f32 %p547, %f1708, 0f3FB504F3; + mul.f32 %f1711, %f1708, 0f3F000000; + add.f32 %f1712, %f1710, 0f3F800000; + selp.f32 %f1713, %f1712, %f1710, %p547; + selp.f32 %f1714, %f1711, %f1708, %p547; + add.f32 %f1715, %f1714, 0fBF800000; + add.f32 %f1716, %f1714, 0f3F800000; + rcp.approx.ftz.f32 %f1717, %f1716; + add.f32 %f1718, %f1715, %f1715; + mul.f32 %f1719, %f1718, %f1717; + mul.f32 %f1720, %f1719, %f1719; + fma.rn.f32 %f1721, %f1577, %f1720, %f1576; + fma.rn.f32 %f1722, %f1721, %f1720, %f1579; + mul.rn.f32 %f1723, %f1722, %f1720; + mul.rn.f32 %f1724, %f1723, %f1719; + sub.f32 %f1725, %f1715, %f1719; + add.f32 %f1726, %f1725, %f1725; + neg.f32 %f1727, %f1719; + fma.rn.f32 %f1728, %f1727, %f1715, %f1726; + mul.rn.f32 %f1729, %f1717, %f1728; + add.f32 %f1730, %f1724, %f1719; + sub.f32 %f1731, %f1719, %f1730; + add.f32 %f1732, %f1724, %f1731; + add.f32 %f1733, %f1729, %f1732; + add.f32 %f1734, %f1730, %f1733; + sub.f32 %f1735, %f1730, %f1734; + add.f32 %f1736, %f1733, %f1735; + mul.rn.f32 %f1737, %f1713, %f1595; + mul.rn.f32 %f1738, %f1713, %f1597; + add.f32 %f1739, %f1737, %f1734; + sub.f32 %f1740, %f1737, %f1739; + add.f32 %f1741, %f1734, %f1740; + add.f32 %f1742, %f1736, %f1741; + add.f32 %f1743, %f1738, %f1742; + add.f32 %f1744, %f1739, %f1743; + sub.f32 %f1745, %f1739, %f1744; + add.f32 %f1746, %f1743, %f1745; + mul.rn.f32 %f1747, %f1516, %f1744; + neg.f32 %f1748, %f1747; + fma.rn.f32 %f1749, %f1516, %f1744, %f1748; + fma.rn.f32 %f1750, %f1516, %f1746, %f1749; + fma.rn.f32 %f1751, %f1492, %f1744, %f1750; + add.rn.f32 %f1752, %f1747, %f1751; + neg.f32 %f1753, %f1752; + add.rn.f32 %f1754, %f1747, %f1753; + add.rn.f32 %f1755, %f1754, %f1751; + mov.b32 %r631, %f1752; + setp.eq.s32 %p548, %r631, 1118925336; + add.s32 %r632, %r631, -1; + mov.b32 %f1756, %r632; + add.f32 %f1757, %f1755, 0f37000000; + selp.f32 %f291, %f1757, %f1755, %p548; + selp.f32 %f1758, %f1756, %f1752, %p548; + mul.rn.f32 %f1759, %f1758, %f1620; + cvt.rzi.f32.f32 %f1760, %f1759; + abs.f32 %f1761, %f1760; + setp.gt.f32 %p549, %f1761, 0f42FC0000; + mov.b32 %r633, %f1760; + and.b32 %r634, %r633, -2147483648; + or.b32 %r635, %r634, 1123811328; + mov.b32 %f1762, %r635; + selp.f32 %f1763, %f1762, %f1760, %p549; + fma.rn.f32 %f1764, %f1763, %f1626, %f1758; + fma.rn.f32 %f1765, %f1763, %f1628, %f1764; + mul.f32 %f1766, %f1765, 0f3FB8AA3B; + add.f32 %f1767, %f1763, 0f4B40007F; + mov.b32 %r636, %f1767; + shl.b32 %r637, %r636, 23; + mov.b32 %f1768, %r637; + ex2.approx.ftz.f32 %f1769, %f1766; + mul.f32 %f292, %f1769, %f1768; + setp.lt.f32 %p550, %f289, 0f00000000; + and.pred %p29, %p550, %p540; + add.f32 %f1770, %f289, %f289; + selp.f32 %f293, %f1770, 0f00000000, %p540; + add.f32 %f1771, %f290, 0f40000000; + mov.b32 %r97, %f1771; + div.rn.f32 %f294, %f270, %f2864; + abs.f32 %f295, %f294; + setp.lt.f32 %p551, %f295, 0f00800000; + mul.f32 %f1772, %f295, 0f4B800000; + selp.f32 %f1773, %f1772, %f295, %p551; + selp.f32 %f1774, 0fC3170000, 0fC2FE0000, %p551; + mov.b32 %r638, %f1773; + and.b32 %r639, %r638, 8388607; + or.b32 %r640, %r639, 1065353216; + mov.b32 %f1775, %r640; + shr.u32 %r641, %r638, 23; + cvt.rn.f32.u32 %f1776, %r641; + add.f32 %f1777, %f1774, %f1776; + setp.gt.f32 %p552, %f1775, 0f3FB504F3; + mul.f32 %f1778, %f1775, 0f3F000000; + add.f32 %f1779, %f1777, 0f3F800000; + selp.f32 %f1780, %f1779, %f1777, %p552; + selp.f32 %f1781, %f1778, %f1775, %p552; + add.f32 %f1782, %f1781, 0fBF800000; + add.f32 %f1783, %f1781, 0f3F800000; + rcp.approx.ftz.f32 %f1784, %f1783; + add.f32 %f1785, %f1782, %f1782; + mul.f32 %f1786, %f1785, %f1784; + mul.f32 %f1787, %f1786, %f1786; + fma.rn.f32 %f1788, %f1577, %f1787, %f1576; + fma.rn.f32 %f1789, %f1788, %f1787, %f1579; + mul.rn.f32 %f1790, %f1789, %f1787; + mul.rn.f32 %f1791, %f1790, %f1786; + sub.f32 %f1792, %f1782, %f1786; + add.f32 %f1793, %f1792, %f1792; + neg.f32 %f1794, %f1786; + fma.rn.f32 %f1795, %f1794, %f1782, %f1793; + mul.rn.f32 %f1796, %f1784, %f1795; + add.f32 %f1797, %f1791, %f1786; + sub.f32 %f1798, %f1786, %f1797; + add.f32 %f1799, %f1791, %f1798; + add.f32 %f1800, %f1796, %f1799; + add.f32 %f1801, %f1797, %f1800; + sub.f32 %f1802, %f1797, %f1801; + add.f32 %f1803, %f1800, %f1802; + mul.rn.f32 %f1804, %f1780, %f1595; + mul.rn.f32 %f1805, %f1780, %f1597; + add.f32 %f1806, %f1804, %f1801; + sub.f32 %f1807, %f1804, %f1806; + add.f32 %f1808, %f1801, %f1807; + add.f32 %f1809, %f1803, %f1808; + add.f32 %f1810, %f1805, %f1809; + add.f32 %f1811, %f1806, %f1810; + sub.f32 %f1812, %f1806, %f1811; + add.f32 %f1813, %f1810, %f1812; + mul.rn.f32 %f1814, %f1516, %f1811; + neg.f32 %f1815, %f1814; + fma.rn.f32 %f1816, %f1516, %f1811, %f1815; + fma.rn.f32 %f1817, %f1516, %f1813, %f1816; + fma.rn.f32 %f1818, %f1492, %f1811, %f1817; + add.rn.f32 %f1819, %f1814, %f1818; + neg.f32 %f1820, %f1819; + add.rn.f32 %f1821, %f1814, %f1820; + add.rn.f32 %f1822, %f1821, %f1818; + mov.b32 %r642, %f1819; + setp.eq.s32 %p553, %r642, 1118925336; + add.s32 %r643, %r642, -1; + mov.b32 %f1823, %r643; + add.f32 %f1824, %f1822, 0f37000000; + selp.f32 %f296, %f1824, %f1822, %p553; + selp.f32 %f1825, %f1823, %f1819, %p553; + mul.rn.f32 %f1826, %f1825, %f1620; + cvt.rzi.f32.f32 %f1827, %f1826; + abs.f32 %f1828, %f1827; + setp.gt.f32 %p554, %f1828, 0f42FC0000; + mov.b32 %r644, %f1827; + and.b32 %r645, %r644, -2147483648; + or.b32 %r646, %r645, 1123811328; + mov.b32 %f1829, %r646; + selp.f32 %f1830, %f1829, %f1827, %p554; + fma.rn.f32 %f1831, %f1830, %f1626, %f1825; + fma.rn.f32 %f1832, %f1830, %f1628, %f1831; + mul.f32 %f1833, %f1832, 0f3FB8AA3B; + add.f32 %f1834, %f1830, 0f4B40007F; + mov.b32 %r647, %f1834; + shl.b32 %r648, %r647, 23; + mov.b32 %f1835, %r648; + ex2.approx.ftz.f32 %f1836, %f1833; + mul.f32 %f297, %f1836, %f1835; + add.f32 %f298, %f289, 0f40000000; + setp.lt.f32 %p555, %f294, 0f00000000; + and.pred %p30, %p555, %p540; + selp.f32 %f299, 0fFF800000, 0f7F800000, %p29; + add.f32 %f1837, %f294, %f294; + selp.f32 %f300, %f1837, 0f00000000, %p540; + add.f32 %f1838, %f295, 0f40000000; + mov.b32 %r98, %f1838; + add.f32 %f301, %f270, 0f3F800000; + add.f32 %f302, %f294, 0f40000000; + selp.f32 %f303, 0fFF800000, 0f7F800000, %p30; + setp.geu.f32 %p31, %f275, 0f00000000; + setp.geu.f32 %p32, %f280, 0f00000000; + setp.geu.f32 %p33, %f289, 0f00000000; + setp.geu.f32 %p34, %f294, 0f00000000; + mov.u32 %r784, %r601; + +$L__BB1_340: + setp.ltu.f32 %p556, %f271, 0f3F8060FE; + mov.f32 %f2901, %f272; + @%p556 bra $L__BB1_342; + + ex2.approx.ftz.f32 %f1839, %f272; + sub.f32 %f1841, %f1513, %f1839; + mov.b32 %r649, %f1841; + or.b32 %r650, %r93, %r649; + mov.b32 %f2901, %r650; + +$L__BB1_342: + setp.ltu.f32 %p557, %f273, 0f3F8060FE; + mov.f32 %f2902, %f274; + @%p557 bra $L__BB1_344; + + ex2.approx.ftz.f32 %f1842, %f274; + sub.f32 %f1844, %f1513, %f1842; + mov.b32 %r651, %f1844; + or.b32 %r652, %r94, %r651; + mov.b32 %f2902, %r652; + +$L__BB1_344: + sub.f32 %f1845, %f2901, %f2902; + mul.f32 %f324, %f1845, 0f3F000000; + cvt.rn.f32.s32 %f325, %r784; + sub.f32 %f326, %f325, %f2867; + add.f32 %f1846, %f326, 0f3F000000; + mul.f32 %f327, %f252, %f1846; + abs.f32 %f1847, %f327; + setp.ltu.f32 %p558, %f1847, 0f3F8060FE; + setp.ge.f32 %p559, %f1847, 0f3F8060FE; + mul.f32 %f1848, %f327, %f327; + selp.f32 %f1849, %f1847, %f1848, %p559; + selp.f32 %f1850, 0f3789CA3C, 0f38B1E96A, %p559; + selp.f32 %f1851, 0fB9F560B9, 0fBA574D20, %p559; + fma.rn.f32 %f1852, %f1850, %f1849, %f1851; + selp.f32 %f1853, 0f3BAC840B, 0f3BAAD5EA, %p559; + fma.rn.f32 %f1854, %f1852, %f1849, %f1853; + selp.f32 %f1855, 0fBD0C8162, 0fBCDC1BE7, %p559; + fma.rn.f32 %f1856, %f1854, %f1849, %f1855; + selp.f32 %f1857, 0f3E1CF906, 0f3DE718AF, %p559; + fma.rn.f32 %f1858, %f1856, %f1849, %f1857; + selp.f32 %f1859, 0f3F6A937E, 0fBEC093AC, %p559; + fma.rn.f32 %f1860, %f1858, %f1849, %f1859; + selp.f32 %f1861, 0f3F20D842, 0f3E0375D3, %p559; + fma.rn.f32 %f1862, %f1860, %f1849, %f1861; + neg.f32 %f1863, %f1847; + selp.f32 %f1864, %f1863, %f327, %p559; + fma.rn.f32 %f2903, %f1862, %f1864, %f1864; + @%p558 bra $L__BB1_346; + + ex2.approx.ftz.f32 %f1865, %f2903; + sub.f32 %f1867, %f1513, %f1865; + mov.b32 %r653, %f1867; + mov.b32 %r654, %f327; + and.b32 %r655, %r654, -2147483648; + or.b32 %r656, %r655, %r653; + mov.b32 %f2903, %r656; + +$L__BB1_346: + add.f32 %f331, %f326, 0fBF000000; + mul.f32 %f332, %f252, %f331; + abs.f32 %f1868, %f332; + setp.ltu.f32 %p560, %f1868, 0f3F8060FE; + setp.ge.f32 %p561, %f1868, 0f3F8060FE; + mul.f32 %f1869, %f332, %f332; + selp.f32 %f1870, %f1868, %f1869, %p561; + selp.f32 %f1871, 0f3789CA3C, 0f38B1E96A, %p561; + selp.f32 %f1872, 0fB9F560B9, 0fBA574D20, %p561; + fma.rn.f32 %f1873, %f1871, %f1870, %f1872; + selp.f32 %f1874, 0f3BAC840B, 0f3BAAD5EA, %p561; + fma.rn.f32 %f1875, %f1873, %f1870, %f1874; + selp.f32 %f1876, 0fBD0C8162, 0fBCDC1BE7, %p561; + fma.rn.f32 %f1877, %f1875, %f1870, %f1876; + selp.f32 %f1878, 0f3E1CF906, 0f3DE718AF, %p561; + fma.rn.f32 %f1879, %f1877, %f1870, %f1878; + selp.f32 %f1880, 0f3F6A937E, 0fBEC093AC, %p561; + fma.rn.f32 %f1881, %f1879, %f1870, %f1880; + selp.f32 %f1882, 0f3F20D842, 0f3E0375D3, %p561; + fma.rn.f32 %f1883, %f1881, %f1870, %f1882; + neg.f32 %f1884, %f1868; + selp.f32 %f1885, %f1884, %f332, %p561; + fma.rn.f32 %f2904, %f1883, %f1885, %f1885; + @%p560 bra $L__BB1_348; + + ex2.approx.ftz.f32 %f1886, %f2904; + sub.f32 %f1888, %f1513, %f1886; + mov.b32 %r657, %f1888; + mov.b32 %r658, %f332; + and.b32 %r659, %r658, -2147483648; + or.b32 %r660, %r659, %r657; + mov.b32 %f2904, %r660; + +$L__BB1_348: + sub.f32 %f1890, %f2903, %f2904; + mul.f32 %f336, %f1890, 0f3F000000; + mul.f32 %f1891, %f324, %f2866; + fma.rn.f32 %f337, %f336, %f1891, %f2865; + mad.lo.s32 %r661, %r784, %r102, %r783; + add.s32 %r662, %r661, %r2; + mul.wide.s32 %rd28, %r662, 4; + add.s64 %rd29, %rd1, %rd28; + ld.global.f32 %f338, [%rd29]; + setp.eq.f32 %p562, %f278, 0f7F800000; + mov.f32 %f2905, 0f7F800000; + @%p562 bra $L__BB1_350; + + fma.rn.f32 %f2905, %f278, %f277, %f278; + +$L__BB1_350: + mov.b32 %r663, %f2905; + xor.b32 %r664, %r663, -2147483648; + mov.b32 %f1892, %r664; + selp.f32 %f341, %f1892, %f2905, %p27; + setp.eq.f32 %p563, %f275, 0f00000000; + selp.f32 %f2906, %f279, %f341, %p563; + @%p31 bra $L__BB1_353; + + cvt.rzi.f32.f32 %f1894, %f1516; + setp.eq.f32 %p564, %f1894, 0f40000000; + mov.f32 %f2906, %f341; + @%p564 bra $L__BB1_353; + + mov.f32 %f2906, 0f7FFFFFFF; + +$L__BB1_353: + setp.eq.f32 %p565, %f283, 0f7F800000; + mov.f32 %f2907, 0f7F800000; + @%p565 bra $L__BB1_355; + + fma.rn.f32 %f2907, %f283, %f282, %f283; + +$L__BB1_355: + mov.b32 %r665, %f2907; + xor.b32 %r666, %r665, -2147483648; + mov.b32 %f1897, %r666; + selp.f32 %f346, %f1897, %f2907, %p28; + setp.eq.f32 %p566, %f280, 0f00000000; + selp.f32 %f2908, %f286, %f346, %p566; + @%p32 bra $L__BB1_358; + + cvt.rzi.f32.f32 %f1899, %f1516; + setp.eq.f32 %p567, %f1899, 0f40000000; + mov.f32 %f2908, %f346; + @%p567 bra $L__BB1_358; + + mov.f32 %f2908, 0f7FFFFFFF; + +$L__BB1_358: + setp.gtu.f32 %p568, %f276, 0f7F800000; + mov.f32 %f2909, 0f7F800000; + selp.f32 %f1902, %f284, %f2906, %p568; + setp.neu.f32 %p569, %f276, 0f7F800000; + selp.f32 %f1903, %f1902, %f285, %p569; + setp.gt.s32 %p570, %r95, 2139095039; + selp.f32 %f1904, %f1903, %f2906, %p570; + mul.f32 %f1905, %f1904, 0fBF000000; + setp.eq.f32 %p571, %f275, 0f3F800000; + selp.f32 %f1906, 0fBF000000, %f1905, %p571; + mov.f32 %f1908, 0f3BBB989D; + fma.rn.f32 %f1909, %f1906, %f1908, %f1509; + mov.f32 %f1911, 0f437C0000; + cvt.sat.f32.f32 %f1912, %f1909; + mov.f32 %f1913, 0f4B400001; + fma.rm.f32 %f1914, %f1912, %f1911, %f1913; + setp.gtu.f32 %p572, %f281, 0f7F800000; + selp.f32 %f1915, %f287, %f2908, %p572; + setp.neu.f32 %p573, %f281, 0f7F800000; + selp.f32 %f1916, %f1915, %f288, %p573; + setp.gt.s32 %p574, %r96, 2139095039; + selp.f32 %f1917, %f1916, %f2908, %p574; + mul.f32 %f1918, %f1917, 0fBF000000; + setp.eq.f32 %p575, %f280, 0f3F800000; + selp.f32 %f1919, 0fBF000000, %f1918, %p575; + fma.rn.f32 %f1920, %f1919, %f1908, %f1509; + cvt.sat.f32.f32 %f1921, %f1920; + fma.rm.f32 %f1922, %f1921, %f1911, %f1913; + add.f32 %f1923, %f1922, 0fCB40007F; + neg.f32 %f1924, %f1923; + fma.rn.f32 %f1925, %f1919, %f1620, %f1924; + mov.f32 %f1926, 0f32A57060; + fma.rn.f32 %f1927, %f1919, %f1926, %f1925; + mov.b32 %r667, %f1922; + shl.b32 %r668, %r667, 23; + mov.b32 %f1928, %r668; + ex2.approx.ftz.f32 %f1929, %f1927; + mul.f32 %f1930, %f1929, %f1928; + mov.b32 %r669, %f1914; + shl.b32 %r670, %r669, 23; + mov.b32 %f1931, %r670; + add.f32 %f1932, %f1914, 0fCB40007F; + neg.f32 %f1933, %f1932; + fma.rn.f32 %f1934, %f1906, %f1620, %f1933; + fma.rn.f32 %f1935, %f1906, %f1926, %f1934; + ex2.approx.ftz.f32 %f1936, %f1935; + mul.f32 %f1937, %f1936, %f1931; + sub.f32 %f1938, %f1937, %f1930; + mul.f32 %f1939, %f250, %f1938; + mul.f32 %f349, %f336, %f1939; + add.f32 %f1940, %f325, 0f3F000000; + sub.f32 %f1941, %f1940, %f2867; + div.rn.f32 %f350, %f1941, %f2864; + abs.f32 %f351, %f350; + setp.lt.f32 %p576, %f351, 0f00800000; + mul.f32 %f1942, %f351, 0f4B800000; + selp.f32 %f1943, %f1942, %f351, %p576; + selp.f32 %f1944, 0fC3170000, 0fC2FE0000, %p576; + mov.b32 %r671, %f1943; + and.b32 %r672, %r671, 8388607; + or.b32 %r673, %r672, 1065353216; + mov.b32 %f1945, %r673; + shr.u32 %r674, %r671, 23; + cvt.rn.f32.u32 %f1946, %r674; + add.f32 %f1947, %f1944, %f1946; + setp.gt.f32 %p577, %f1945, 0f3FB504F3; + mul.f32 %f1948, %f1945, 0f3F000000; + add.f32 %f1949, %f1947, 0f3F800000; + selp.f32 %f1950, %f1949, %f1947, %p577; + selp.f32 %f1951, %f1948, %f1945, %p577; + add.f32 %f1952, %f1951, 0fBF800000; + add.f32 %f1953, %f1951, 0f3F800000; + rcp.approx.ftz.f32 %f1954, %f1953; + add.f32 %f1955, %f1952, %f1952; + mul.f32 %f1957, %f1955, %f1954; + mul.f32 %f1958, %f1957, %f1957; + fma.rn.f32 %f1961, %f1577, %f1958, %f1576; + fma.rn.f32 %f1963, %f1961, %f1958, %f1579; + mul.rn.f32 %f1964, %f1963, %f1958; + mul.rn.f32 %f1965, %f1964, %f1957; + sub.f32 %f1966, %f1952, %f1957; + add.f32 %f1967, %f1966, %f1966; + neg.f32 %f1968, %f1957; + fma.rn.f32 %f1969, %f1968, %f1952, %f1967; + mul.rn.f32 %f1970, %f1954, %f1969; + add.f32 %f1971, %f1965, %f1957; + sub.f32 %f1972, %f1957, %f1971; + add.f32 %f1973, %f1965, %f1972; + add.f32 %f1974, %f1970, %f1973; + add.f32 %f1975, %f1971, %f1974; + sub.f32 %f1976, %f1971, %f1975; + add.f32 %f1977, %f1974, %f1976; + mul.rn.f32 %f1979, %f1950, %f1595; + mul.rn.f32 %f1981, %f1950, %f1597; + add.f32 %f1982, %f1979, %f1975; + sub.f32 %f1983, %f1979, %f1982; + add.f32 %f1984, %f1975, %f1983; + add.f32 %f1985, %f1977, %f1984; + add.f32 %f1986, %f1981, %f1985; + add.f32 %f1987, %f1982, %f1986; + sub.f32 %f1988, %f1982, %f1987; + add.f32 %f1989, %f1986, %f1988; + mul.rn.f32 %f1990, %f1516, %f1987; + neg.f32 %f1991, %f1990; + fma.rn.f32 %f1992, %f1516, %f1987, %f1991; + fma.rn.f32 %f1993, %f1516, %f1989, %f1992; + mov.f32 %f1994, 0f00000000; + fma.rn.f32 %f1995, %f1994, %f1987, %f1993; + add.rn.f32 %f1996, %f1990, %f1995; + neg.f32 %f1997, %f1996; + add.rn.f32 %f1998, %f1990, %f1997; + add.rn.f32 %f1999, %f1998, %f1995; + mov.b32 %r675, %f1996; + setp.eq.s32 %p578, %r675, 1118925336; + add.s32 %r676, %r675, -1; + mov.b32 %f2000, %r676; + add.f32 %f2001, %f1999, 0f37000000; + selp.f32 %f352, %f2001, %f1999, %p578; + selp.f32 %f2002, %f2000, %f1996, %p578; + mul.rn.f32 %f2003, %f2002, %f1620; + cvt.rzi.f32.f32 %f2004, %f2003; + abs.f32 %f2005, %f2004; + setp.gt.f32 %p579, %f2005, 0f42FC0000; + mov.b32 %r677, %f2004; + and.b32 %r678, %r677, -2147483648; + or.b32 %r679, %r678, 1123811328; + mov.b32 %f2006, %r679; + selp.f32 %f2007, %f2006, %f2004, %p579; + fma.rn.f32 %f2009, %f2007, %f1626, %f2002; + fma.rn.f32 %f2011, %f2007, %f1628, %f2009; + mul.f32 %f2012, %f2011, 0f3FB8AA3B; + add.f32 %f2013, %f2007, 0f4B40007F; + mov.b32 %r680, %f2013; + shl.b32 %r681, %r680, 23; + mov.b32 %f2014, %r681; + ex2.approx.ftz.f32 %f2015, %f2012; + mul.f32 %f353, %f2015, %f2014; + setp.eq.f32 %p580, %f353, 0f7F800000; + @%p580 bra $L__BB1_360; + + fma.rn.f32 %f2909, %f353, %f352, %f353; + +$L__BB1_360: + setp.lt.f32 %p581, %f350, 0f00000000; + and.pred %p35, %p581, %p540; + setp.eq.f32 %p583, %f350, 0f00000000; + @%p583 bra $L__BB1_364; + bra.uni $L__BB1_361; + +$L__BB1_364: + add.f32 %f2020, %f350, %f350; + selp.f32 %f2911, %f2020, 0f00000000, %p540; + bra.uni $L__BB1_365; + +$L__BB1_361: + mov.b32 %r682, %f2909; + xor.b32 %r683, %r682, -2147483648; + mov.b32 %f2016, %r683; + selp.f32 %f2911, %f2016, %f2909, %p35; + setp.geu.f32 %p584, %f350, 0f00000000; + @%p584 bra $L__BB1_365; + + cvt.rzi.f32.f32 %f2018, %f1516; + setp.eq.f32 %p585, %f2018, 0f40000000; + @%p585 bra $L__BB1_365; + + mov.f32 %f2911, 0f7FFFFFFF; + +$L__BB1_365: + add.f32 %f2021, %f351, 0f40000000; + mov.b32 %r684, %f2021; + setp.lt.s32 %p587, %r684, 2139095040; + @%p587 bra $L__BB1_370; + + setp.gtu.f32 %p588, %f351, 0f7F800000; + @%p588 bra $L__BB1_369; + bra.uni $L__BB1_367; + +$L__BB1_369: + add.f32 %f2911, %f350, 0f40000000; + bra.uni $L__BB1_370; + +$L__BB1_367: + setp.neu.f32 %p589, %f351, 0f7F800000; + @%p589 bra $L__BB1_370; + + selp.f32 %f2911, 0fFF800000, 0f7F800000, %p35; + +$L__BB1_370: + mul.f32 %f2023, %f2911, 0fBF000000; + setp.eq.f32 %p590, %f350, 0f3F800000; + selp.f32 %f2024, 0fBF000000, %f2023, %p590; + fma.rn.f32 %f2027, %f2024, %f1908, %f1509; + cvt.sat.f32.f32 %f2030, %f2027; + fma.rm.f32 %f2032, %f2030, %f1911, %f1913; + add.f32 %f2033, %f2032, 0fCB40007F; + neg.f32 %f2034, %f2033; + fma.rn.f32 %f2035, %f2024, %f1620, %f2034; + fma.rn.f32 %f2037, %f2024, %f1926, %f2035; + mov.b32 %r685, %f2032; + shl.b32 %r686, %r685, 23; + mov.b32 %f2038, %r686; + ex2.approx.ftz.f32 %f2039, %f2037; + mul.f32 %f362, %f2039, %f2038; + div.rn.f32 %f363, %f331, %f2864; + abs.f32 %f364, %f363; + setp.lt.f32 %p591, %f364, 0f00800000; + mul.f32 %f2040, %f364, 0f4B800000; + selp.f32 %f2041, %f2040, %f364, %p591; + selp.f32 %f2042, 0fC3170000, 0fC2FE0000, %p591; + mov.b32 %r687, %f2041; + and.b32 %r688, %r687, 8388607; + or.b32 %r689, %r688, 1065353216; + mov.b32 %f2043, %r689; + shr.u32 %r690, %r687, 23; + cvt.rn.f32.u32 %f2044, %r690; + add.f32 %f2045, %f2042, %f2044; + setp.gt.f32 %p592, %f2043, 0f3FB504F3; + mul.f32 %f2046, %f2043, 0f3F000000; + add.f32 %f2047, %f2045, 0f3F800000; + selp.f32 %f2048, %f2047, %f2045, %p592; + selp.f32 %f2049, %f2046, %f2043, %p592; + add.f32 %f2050, %f2049, 0fBF800000; + add.f32 %f2051, %f2049, 0f3F800000; + rcp.approx.ftz.f32 %f2052, %f2051; + add.f32 %f2053, %f2050, %f2050; + mul.f32 %f2055, %f2053, %f2052; + mul.f32 %f2056, %f2055, %f2055; + fma.rn.f32 %f2059, %f1577, %f2056, %f1576; + fma.rn.f32 %f2061, %f2059, %f2056, %f1579; + mul.rn.f32 %f2062, %f2061, %f2056; + mul.rn.f32 %f2063, %f2062, %f2055; + sub.f32 %f2064, %f2050, %f2055; + add.f32 %f2065, %f2064, %f2064; + neg.f32 %f2066, %f2055; + fma.rn.f32 %f2067, %f2066, %f2050, %f2065; + mul.rn.f32 %f2068, %f2052, %f2067; + add.f32 %f2069, %f2063, %f2055; + sub.f32 %f2070, %f2055, %f2069; + add.f32 %f2071, %f2063, %f2070; + add.f32 %f2072, %f2068, %f2071; + add.f32 %f2073, %f2069, %f2072; + sub.f32 %f2074, %f2069, %f2073; + add.f32 %f2075, %f2072, %f2074; + mul.rn.f32 %f2077, %f2048, %f1595; + mul.rn.f32 %f2079, %f2048, %f1597; + add.f32 %f2080, %f2077, %f2073; + sub.f32 %f2081, %f2077, %f2080; + add.f32 %f2082, %f2073, %f2081; + add.f32 %f2083, %f2075, %f2082; + add.f32 %f2084, %f2079, %f2083; + add.f32 %f2085, %f2080, %f2084; + sub.f32 %f2086, %f2080, %f2085; + add.f32 %f2087, %f2084, %f2086; + mul.rn.f32 %f2088, %f1516, %f2085; + neg.f32 %f2089, %f2088; + fma.rn.f32 %f2090, %f1516, %f2085, %f2089; + fma.rn.f32 %f2091, %f1516, %f2087, %f2090; + fma.rn.f32 %f2093, %f1994, %f2085, %f2091; + add.rn.f32 %f2094, %f2088, %f2093; + neg.f32 %f2095, %f2094; + add.rn.f32 %f2096, %f2088, %f2095; + add.rn.f32 %f2097, %f2096, %f2093; + mov.b32 %r691, %f2094; + setp.eq.s32 %p593, %r691, 1118925336; + add.s32 %r692, %r691, -1; + mov.b32 %f2098, %r692; + add.f32 %f2099, %f2097, 0f37000000; + selp.f32 %f365, %f2099, %f2097, %p593; + selp.f32 %f2100, %f2098, %f2094, %p593; + mul.rn.f32 %f2101, %f2100, %f1620; + cvt.rzi.f32.f32 %f2102, %f2101; + abs.f32 %f2103, %f2102; + setp.gt.f32 %p594, %f2103, 0f42FC0000; + mov.b32 %r693, %f2102; + and.b32 %r694, %r693, -2147483648; + or.b32 %r695, %r694, 1123811328; + mov.b32 %f2104, %r695; + selp.f32 %f2105, %f2104, %f2102, %p594; + fma.rn.f32 %f2107, %f2105, %f1626, %f2100; + fma.rn.f32 %f2109, %f2105, %f1628, %f2107; + mul.f32 %f2110, %f2109, 0f3FB8AA3B; + add.f32 %f2111, %f2105, 0f4B40007F; + mov.b32 %r696, %f2111; + shl.b32 %r697, %r696, 23; + mov.b32 %f2112, %r697; + ex2.approx.ftz.f32 %f2113, %f2110; + mul.f32 %f366, %f2113, %f2112; + setp.eq.f32 %p595, %f366, 0f7F800000; + mov.f32 %f2912, 0f7F800000; + @%p595 bra $L__BB1_372; + + fma.rn.f32 %f2912, %f366, %f365, %f366; + +$L__BB1_372: + setp.lt.f32 %p596, %f363, 0f00000000; + and.pred %p36, %p596, %p540; + setp.eq.f32 %p598, %f363, 0f00000000; + @%p598 bra $L__BB1_376; + bra.uni $L__BB1_373; + +$L__BB1_376: + add.f32 %f2118, %f363, %f363; + selp.f32 %f2914, %f2118, 0f00000000, %p540; + bra.uni $L__BB1_377; + +$L__BB1_373: + mov.b32 %r698, %f2912; + xor.b32 %r699, %r698, -2147483648; + mov.b32 %f2114, %r699; + selp.f32 %f2914, %f2114, %f2912, %p36; + setp.geu.f32 %p599, %f363, 0f00000000; + @%p599 bra $L__BB1_377; + + cvt.rzi.f32.f32 %f2116, %f1516; + setp.eq.f32 %p600, %f2116, 0f40000000; + @%p600 bra $L__BB1_377; + + mov.f32 %f2914, 0f7FFFFFFF; + +$L__BB1_377: + add.f32 %f2119, %f364, 0f40000000; + mov.b32 %r700, %f2119; + setp.lt.s32 %p602, %r700, 2139095040; + @%p602 bra $L__BB1_382; + + setp.gtu.f32 %p603, %f364, 0f7F800000; + @%p603 bra $L__BB1_381; + bra.uni $L__BB1_379; + +$L__BB1_381: + add.f32 %f2914, %f363, 0f40000000; + bra.uni $L__BB1_382; + +$L__BB1_379: + setp.neu.f32 %p604, %f364, 0f7F800000; + @%p604 bra $L__BB1_382; + + selp.f32 %f2914, 0fFF800000, 0f7F800000, %p36; + +$L__BB1_382: + mul.f32 %f2121, %f2914, 0fBF000000; + setp.eq.f32 %p605, %f363, 0f3F800000; + selp.f32 %f2122, 0fBF000000, %f2121, %p605; + fma.rn.f32 %f2125, %f2122, %f1908, %f1509; + cvt.sat.f32.f32 %f2128, %f2125; + fma.rm.f32 %f2130, %f2128, %f1911, %f1913; + add.f32 %f2131, %f2130, 0fCB40007F; + neg.f32 %f2132, %f2131; + fma.rn.f32 %f2133, %f2122, %f1620, %f2132; + fma.rn.f32 %f2135, %f2122, %f1926, %f2133; + mov.b32 %r701, %f2130; + shl.b32 %r702, %r701, 23; + mov.b32 %f2136, %r702; + ex2.approx.ftz.f32 %f2137, %f2135; + mul.f32 %f2138, %f2137, %f2136; + sub.f32 %f375, %f362, %f2138; + setp.eq.f32 %p606, %f292, 0f7F800000; + mov.f32 %f2915, 0f7F800000; + @%p606 bra $L__BB1_384; + + fma.rn.f32 %f2915, %f292, %f291, %f292; + +$L__BB1_384: + mov.b32 %r703, %f2915; + xor.b32 %r704, %r703, -2147483648; + mov.b32 %f2139, %r704; + selp.f32 %f378, %f2139, %f2915, %p29; + setp.eq.f32 %p607, %f289, 0f00000000; + selp.f32 %f2916, %f293, %f378, %p607; + @%p33 bra $L__BB1_387; + + cvt.rzi.f32.f32 %f2141, %f1516; + setp.eq.f32 %p608, %f2141, 0f40000000; + mov.f32 %f2916, %f378; + @%p608 bra $L__BB1_387; + + mov.f32 %f2916, 0f7FFFFFFF; + +$L__BB1_387: + setp.eq.f32 %p609, %f297, 0f7F800000; + mov.f32 %f2917, 0f7F800000; + @%p609 bra $L__BB1_389; + + fma.rn.f32 %f2917, %f297, %f296, %f297; + +$L__BB1_389: + mov.b32 %r705, %f2917; + xor.b32 %r706, %r705, -2147483648; + mov.b32 %f2144, %r706; + selp.f32 %f383, %f2144, %f2917, %p30; + setp.eq.f32 %p610, %f294, 0f00000000; + selp.f32 %f2918, %f300, %f383, %p610; + @%p34 bra $L__BB1_392; + + cvt.rzi.f32.f32 %f2146, %f1516; + setp.eq.f32 %p611, %f2146, 0f40000000; + mov.f32 %f2918, %f383; + @%p611 bra $L__BB1_392; + + mov.f32 %f2918, 0f7FFFFFFF; + +$L__BB1_392: + mul.f32 %f2149, %f250, %f375; + mul.f32 %f386, %f324, %f2149; + setp.gtu.f32 %p612, %f290, 0f7F800000; + mov.f32 %f2919, 0f7F800000; + selp.f32 %f2150, %f298, %f2916, %p612; + setp.neu.f32 %p613, %f290, 0f7F800000; + selp.f32 %f2151, %f2150, %f299, %p613; + setp.gt.s32 %p614, %r97, 2139095039; + selp.f32 %f2152, %f2151, %f2916, %p614; + mul.f32 %f2153, %f2152, 0fBF000000; + setp.eq.f32 %p615, %f289, 0f3F800000; + selp.f32 %f2154, 0fBF000000, %f2153, %p615; + fma.rn.f32 %f2157, %f2154, %f1908, %f1509; + cvt.sat.f32.f32 %f2160, %f2157; + fma.rm.f32 %f2162, %f2160, %f1911, %f1913; + setp.gtu.f32 %p616, %f295, 0f7F800000; + selp.f32 %f2163, %f302, %f2918, %p616; + setp.neu.f32 %p617, %f295, 0f7F800000; + selp.f32 %f2164, %f2163, %f303, %p617; + setp.gt.s32 %p618, %r98, 2139095039; + selp.f32 %f2165, %f2164, %f2918, %p618; + mul.f32 %f2166, %f2165, 0fBF000000; + setp.eq.f32 %p619, %f294, 0f3F800000; + selp.f32 %f2167, 0fBF000000, %f2166, %p619; + fma.rn.f32 %f2168, %f2167, %f1908, %f1509; + cvt.sat.f32.f32 %f2169, %f2168; + fma.rm.f32 %f2170, %f2169, %f1911, %f1913; + add.f32 %f2171, %f2170, 0fCB40007F; + neg.f32 %f2172, %f2171; + fma.rn.f32 %f2173, %f2167, %f1620, %f2172; + fma.rn.f32 %f2175, %f2167, %f1926, %f2173; + mov.b32 %r707, %f2170; + shl.b32 %r708, %r707, 23; + mov.b32 %f2176, %r708; + ex2.approx.ftz.f32 %f2177, %f2175; + mul.f32 %f2178, %f2177, %f2176; + mul.f32 %f2179, %f270, %f2178; + mov.b32 %r709, %f2162; + shl.b32 %r710, %r709, 23; + mov.b32 %f2180, %r710; + add.f32 %f2181, %f2162, 0fCB40007F; + neg.f32 %f2182, %f2181; + fma.rn.f32 %f2183, %f2154, %f1620, %f2182; + fma.rn.f32 %f2184, %f2154, %f1926, %f2183; + ex2.approx.ftz.f32 %f2185, %f2184; + mul.f32 %f2186, %f2185, %f2180; + mul.f32 %f2187, %f301, %f2186; + sub.f32 %f2188, %f2187, %f2179; + mul.f32 %f2189, %f251, %f2188; + mul.f32 %f387, %f336, %f2189; + add.f32 %f2190, %f325, 0f3F800000; + sub.f32 %f2191, %f2190, %f2867; + div.rn.f32 %f388, %f2191, %f2864; + abs.f32 %f389, %f388; + setp.lt.f32 %p620, %f389, 0f00800000; + mul.f32 %f2192, %f389, 0f4B800000; + selp.f32 %f2193, %f2192, %f389, %p620; + selp.f32 %f2194, 0fC3170000, 0fC2FE0000, %p620; + mov.b32 %r711, %f2193; + and.b32 %r712, %r711, 8388607; + or.b32 %r713, %r712, 1065353216; + mov.b32 %f2195, %r713; + shr.u32 %r714, %r711, 23; + cvt.rn.f32.u32 %f2196, %r714; + add.f32 %f2197, %f2194, %f2196; + setp.gt.f32 %p621, %f2195, 0f3FB504F3; + mul.f32 %f2198, %f2195, 0f3F000000; + add.f32 %f2199, %f2197, 0f3F800000; + selp.f32 %f2200, %f2199, %f2197, %p621; + selp.f32 %f2201, %f2198, %f2195, %p621; + add.f32 %f2202, %f2201, 0fBF800000; + add.f32 %f2203, %f2201, 0f3F800000; + rcp.approx.ftz.f32 %f2204, %f2203; + add.f32 %f2205, %f2202, %f2202; + mul.f32 %f2207, %f2205, %f2204; + mul.f32 %f2208, %f2207, %f2207; + fma.rn.f32 %f2211, %f1577, %f2208, %f1576; + fma.rn.f32 %f2213, %f2211, %f2208, %f1579; + mul.rn.f32 %f2214, %f2213, %f2208; + mul.rn.f32 %f2215, %f2214, %f2207; + sub.f32 %f2216, %f2202, %f2207; + add.f32 %f2217, %f2216, %f2216; + neg.f32 %f2218, %f2207; + fma.rn.f32 %f2219, %f2218, %f2202, %f2217; + mul.rn.f32 %f2220, %f2204, %f2219; + add.f32 %f2221, %f2215, %f2207; + sub.f32 %f2222, %f2207, %f2221; + add.f32 %f2223, %f2215, %f2222; + add.f32 %f2224, %f2220, %f2223; + add.f32 %f2225, %f2221, %f2224; + sub.f32 %f2226, %f2221, %f2225; + add.f32 %f2227, %f2224, %f2226; + mul.rn.f32 %f2229, %f2200, %f1595; + mul.rn.f32 %f2231, %f2200, %f1597; + add.f32 %f2232, %f2229, %f2225; + sub.f32 %f2233, %f2229, %f2232; + add.f32 %f2234, %f2225, %f2233; + add.f32 %f2235, %f2227, %f2234; + add.f32 %f2236, %f2231, %f2235; + add.f32 %f2237, %f2232, %f2236; + sub.f32 %f2238, %f2232, %f2237; + add.f32 %f2239, %f2236, %f2238; + mul.rn.f32 %f2240, %f1516, %f2237; + neg.f32 %f2241, %f2240; + fma.rn.f32 %f2242, %f1516, %f2237, %f2241; + fma.rn.f32 %f2243, %f1516, %f2239, %f2242; + fma.rn.f32 %f2245, %f1994, %f2237, %f2243; + add.rn.f32 %f2246, %f2240, %f2245; + neg.f32 %f2247, %f2246; + add.rn.f32 %f2248, %f2240, %f2247; + add.rn.f32 %f2249, %f2248, %f2245; + mov.b32 %r715, %f2246; + setp.eq.s32 %p622, %r715, 1118925336; + add.s32 %r716, %r715, -1; + mov.b32 %f2250, %r716; + add.f32 %f2251, %f2249, 0f37000000; + selp.f32 %f390, %f2251, %f2249, %p622; + selp.f32 %f2252, %f2250, %f2246, %p622; + mul.rn.f32 %f2253, %f2252, %f1620; + cvt.rzi.f32.f32 %f2254, %f2253; + abs.f32 %f2255, %f2254; + setp.gt.f32 %p623, %f2255, 0f42FC0000; + mov.b32 %r717, %f2254; + and.b32 %r718, %r717, -2147483648; + or.b32 %r719, %r718, 1123811328; + mov.b32 %f2256, %r719; + selp.f32 %f2257, %f2256, %f2254, %p623; + fma.rn.f32 %f2259, %f2257, %f1626, %f2252; + fma.rn.f32 %f2261, %f2257, %f1628, %f2259; + mul.f32 %f2262, %f2261, 0f3FB8AA3B; + add.f32 %f2263, %f2257, 0f4B40007F; + mov.b32 %r720, %f2263; + shl.b32 %r721, %r720, 23; + mov.b32 %f2264, %r721; + ex2.approx.ftz.f32 %f2265, %f2262; + mul.f32 %f391, %f2265, %f2264; + setp.eq.f32 %p624, %f391, 0f7F800000; + @%p624 bra $L__BB1_394; + + fma.rn.f32 %f2919, %f391, %f390, %f391; + +$L__BB1_394: + setp.lt.f32 %p625, %f388, 0f00000000; + and.pred %p37, %p625, %p540; + setp.eq.f32 %p627, %f388, 0f00000000; + @%p627 bra $L__BB1_398; + bra.uni $L__BB1_395; + +$L__BB1_398: + add.f32 %f2270, %f388, %f388; + selp.f32 %f2921, %f2270, 0f00000000, %p540; + bra.uni $L__BB1_399; + +$L__BB1_395: + mov.b32 %r722, %f2919; + xor.b32 %r723, %r722, -2147483648; + mov.b32 %f2266, %r723; + selp.f32 %f2921, %f2266, %f2919, %p37; + setp.geu.f32 %p628, %f388, 0f00000000; + @%p628 bra $L__BB1_399; + + cvt.rzi.f32.f32 %f2268, %f1516; + setp.eq.f32 %p629, %f2268, 0f40000000; + @%p629 bra $L__BB1_399; + + mov.f32 %f2921, 0f7FFFFFFF; + +$L__BB1_399: + add.f32 %f2271, %f389, 0f40000000; + mov.b32 %r724, %f2271; + setp.lt.s32 %p631, %r724, 2139095040; + @%p631 bra $L__BB1_404; + + setp.gtu.f32 %p632, %f389, 0f7F800000; + @%p632 bra $L__BB1_403; + bra.uni $L__BB1_401; + +$L__BB1_403: + add.f32 %f2921, %f388, 0f40000000; + bra.uni $L__BB1_404; + +$L__BB1_401: + setp.neu.f32 %p633, %f389, 0f7F800000; + @%p633 bra $L__BB1_404; + + selp.f32 %f2921, 0fFF800000, 0f7F800000, %p37; + +$L__BB1_404: + mul.f32 %f2273, %f2921, 0fBF000000; + setp.eq.f32 %p634, %f388, 0f3F800000; + selp.f32 %f2274, 0fBF000000, %f2273, %p634; + fma.rn.f32 %f2277, %f2274, %f1908, %f1509; + cvt.sat.f32.f32 %f2280, %f2277; + fma.rm.f32 %f2282, %f2280, %f1911, %f1913; + add.f32 %f2283, %f2282, 0fCB40007F; + neg.f32 %f2284, %f2283; + fma.rn.f32 %f2285, %f2274, %f1620, %f2284; + fma.rn.f32 %f2287, %f2274, %f1926, %f2285; + mov.b32 %r725, %f2282; + shl.b32 %r726, %r725, 23; + mov.b32 %f2288, %r726; + ex2.approx.ftz.f32 %f2289, %f2287; + mul.f32 %f400, %f2289, %f2288; + div.rn.f32 %f401, %f326, %f2864; + abs.f32 %f402, %f401; + setp.lt.f32 %p635, %f402, 0f00800000; + mul.f32 %f2290, %f402, 0f4B800000; + selp.f32 %f2291, %f2290, %f402, %p635; + selp.f32 %f2292, 0fC3170000, 0fC2FE0000, %p635; + mov.b32 %r727, %f2291; + and.b32 %r728, %r727, 8388607; + or.b32 %r729, %r728, 1065353216; + mov.b32 %f2293, %r729; + shr.u32 %r730, %r727, 23; + cvt.rn.f32.u32 %f2294, %r730; + add.f32 %f2295, %f2292, %f2294; + setp.gt.f32 %p636, %f2293, 0f3FB504F3; + mul.f32 %f2296, %f2293, 0f3F000000; + add.f32 %f2297, %f2295, 0f3F800000; + selp.f32 %f2298, %f2297, %f2295, %p636; + selp.f32 %f2299, %f2296, %f2293, %p636; + add.f32 %f2300, %f2299, 0fBF800000; + add.f32 %f2301, %f2299, 0f3F800000; + rcp.approx.ftz.f32 %f2302, %f2301; + add.f32 %f2303, %f2300, %f2300; + mul.f32 %f2305, %f2303, %f2302; + mul.f32 %f2306, %f2305, %f2305; + fma.rn.f32 %f2309, %f1577, %f2306, %f1576; + fma.rn.f32 %f2311, %f2309, %f2306, %f1579; + mul.rn.f32 %f2312, %f2311, %f2306; + mul.rn.f32 %f2313, %f2312, %f2305; + sub.f32 %f2314, %f2300, %f2305; + add.f32 %f2315, %f2314, %f2314; + neg.f32 %f2316, %f2305; + fma.rn.f32 %f2317, %f2316, %f2300, %f2315; + mul.rn.f32 %f2318, %f2302, %f2317; + add.f32 %f2319, %f2313, %f2305; + sub.f32 %f2320, %f2305, %f2319; + add.f32 %f2321, %f2313, %f2320; + add.f32 %f2322, %f2318, %f2321; + add.f32 %f2323, %f2319, %f2322; + sub.f32 %f2324, %f2319, %f2323; + add.f32 %f2325, %f2322, %f2324; + mul.rn.f32 %f2327, %f2298, %f1595; + mul.rn.f32 %f2329, %f2298, %f1597; + add.f32 %f2330, %f2327, %f2323; + sub.f32 %f2331, %f2327, %f2330; + add.f32 %f2332, %f2323, %f2331; + add.f32 %f2333, %f2325, %f2332; + add.f32 %f2334, %f2329, %f2333; + add.f32 %f2335, %f2330, %f2334; + sub.f32 %f2336, %f2330, %f2335; + add.f32 %f2337, %f2334, %f2336; + mul.rn.f32 %f2338, %f1516, %f2335; + neg.f32 %f2339, %f2338; + fma.rn.f32 %f2340, %f1516, %f2335, %f2339; + fma.rn.f32 %f2341, %f1516, %f2337, %f2340; + fma.rn.f32 %f2343, %f1994, %f2335, %f2341; + add.rn.f32 %f2344, %f2338, %f2343; + neg.f32 %f2345, %f2344; + add.rn.f32 %f2346, %f2338, %f2345; + add.rn.f32 %f2347, %f2346, %f2343; + mov.b32 %r731, %f2344; + setp.eq.s32 %p637, %r731, 1118925336; + add.s32 %r732, %r731, -1; + mov.b32 %f2348, %r732; + add.f32 %f2349, %f2347, 0f37000000; + selp.f32 %f403, %f2349, %f2347, %p637; + selp.f32 %f2350, %f2348, %f2344, %p637; + mul.rn.f32 %f2351, %f2350, %f1620; + cvt.rzi.f32.f32 %f2352, %f2351; + abs.f32 %f2353, %f2352; + setp.gt.f32 %p638, %f2353, 0f42FC0000; + mov.b32 %r733, %f2352; + and.b32 %r734, %r733, -2147483648; + or.b32 %r735, %r734, 1123811328; + mov.b32 %f2354, %r735; + selp.f32 %f2355, %f2354, %f2352, %p638; + fma.rn.f32 %f2357, %f2355, %f1626, %f2350; + fma.rn.f32 %f2359, %f2355, %f1628, %f2357; + mul.f32 %f2360, %f2359, 0f3FB8AA3B; + add.f32 %f2361, %f2355, 0f4B40007F; + mov.b32 %r736, %f2361; + shl.b32 %r737, %r736, 23; + mov.b32 %f2362, %r737; + ex2.approx.ftz.f32 %f2363, %f2360; + mul.f32 %f404, %f2363, %f2362; + setp.eq.f32 %p639, %f404, 0f7F800000; + mov.f32 %f2922, 0f7F800000; + @%p639 bra $L__BB1_406; + + fma.rn.f32 %f2922, %f404, %f403, %f404; + +$L__BB1_406: + setp.lt.f32 %p640, %f401, 0f00000000; + and.pred %p38, %p640, %p540; + setp.eq.f32 %p642, %f401, 0f00000000; + @%p642 bra $L__BB1_410; + bra.uni $L__BB1_407; + +$L__BB1_410: + add.f32 %f2368, %f401, %f401; + selp.f32 %f2924, %f2368, 0f00000000, %p540; + bra.uni $L__BB1_411; + +$L__BB1_407: + mov.b32 %r738, %f2922; + xor.b32 %r739, %r738, -2147483648; + mov.b32 %f2364, %r739; + selp.f32 %f2924, %f2364, %f2922, %p38; + setp.geu.f32 %p643, %f401, 0f00000000; + @%p643 bra $L__BB1_411; + + cvt.rzi.f32.f32 %f2366, %f1516; + setp.eq.f32 %p644, %f2366, 0f40000000; + @%p644 bra $L__BB1_411; + + mov.f32 %f2924, 0f7FFFFFFF; + +$L__BB1_411: + add.f32 %f2369, %f402, 0f40000000; + mov.b32 %r740, %f2369; + setp.lt.s32 %p646, %r740, 2139095040; + @%p646 bra $L__BB1_416; + + setp.gtu.f32 %p647, %f402, 0f7F800000; + @%p647 bra $L__BB1_415; + bra.uni $L__BB1_413; + +$L__BB1_415: + add.f32 %f2924, %f401, 0f40000000; + bra.uni $L__BB1_416; + +$L__BB1_413: + setp.neu.f32 %p648, %f402, 0f7F800000; + @%p648 bra $L__BB1_416; + + selp.f32 %f2924, 0fFF800000, 0f7F800000, %p38; + +$L__BB1_416: + mul.f32 %f2370, %f2924, 0fBF000000; + setp.eq.f32 %p649, %f401, 0f3F800000; + selp.f32 %f2371, 0fBF000000, %f2370, %p649; + fma.rn.f32 %f2374, %f2371, %f1908, %f1509; + cvt.sat.f32.f32 %f2377, %f2374; + fma.rm.f32 %f2379, %f2377, %f1911, %f1913; + add.f32 %f2380, %f2379, 0fCB40007F; + neg.f32 %f2381, %f2380; + fma.rn.f32 %f2382, %f2371, %f1620, %f2381; + fma.rn.f32 %f2384, %f2371, %f1926, %f2382; + mov.b32 %r741, %f2379; + shl.b32 %r742, %r741, 23; + mov.b32 %f2385, %r742; + ex2.approx.ftz.f32 %f2386, %f2384; + mul.f32 %f2387, %f2386, %f2385; + add.f32 %f2388, %f326, 0f3F800000; + mul.f32 %f2389, %f2388, %f400; + mul.f32 %f2390, %f326, %f2387; + sub.f32 %f2391, %f2389, %f2390; + mul.f32 %f2392, %f251, %f2391; + fma.rn.f32 %f2393, %f324, %f2392, %f387; + mul.f32 %f2394, %f349, %f349; + div.rn.f32 %f2395, %f2394, %f337; + add.f32 %f2896, %f2896, %f2395; + mul.f32 %f2396, %f386, %f349; + div.rn.f32 %f2397, %f2396, %f337; + add.f32 %f2895, %f2895, %f2397; + mul.f32 %f2398, %f324, %f336; + mul.f32 %f2399, %f2398, %f349; + div.rn.f32 %f2400, %f2399, %f337; + add.f32 %f2894, %f2894, %f2400; + div.rn.f32 %f2401, %f349, %f337; + add.f32 %f2893, %f2893, %f2401; + mul.f32 %f2402, %f2393, %f349; + div.rn.f32 %f2403, %f2402, %f337; + add.f32 %f2892, %f2892, %f2403; + mul.f32 %f2404, %f386, %f386; + div.rn.f32 %f2405, %f2404, %f337; + add.f32 %f2891, %f2891, %f2405; + mul.f32 %f2406, %f2398, %f386; + div.rn.f32 %f2407, %f2406, %f337; + add.f32 %f2890, %f2890, %f2407; + div.rn.f32 %f2408, %f386, %f337; + add.f32 %f2889, %f2889, %f2408; + mul.f32 %f2409, %f2393, %f386; + div.rn.f32 %f2410, %f2409, %f337; + add.f32 %f2888, %f2888, %f2410; + mul.f32 %f2411, %f2398, %f2398; + div.rn.f32 %f2412, %f2411, %f337; + add.f32 %f2887, %f2887, %f2412; + div.rn.f32 %f2413, %f2398, %f337; + add.f32 %f2886, %f2886, %f2413; + mul.f32 %f2414, %f2393, %f2398; + div.rn.f32 %f2415, %f2414, %f337; + add.f32 %f2885, %f2885, %f2415; + rcp.rn.f32 %f2416, %f337; + add.f32 %f2897, %f2897, %f2416; + div.rn.f32 %f2417, %f2393, %f337; + add.f32 %f2898, %f2898, %f2417; + mul.f32 %f2418, %f2393, %f2393; + div.rn.f32 %f2419, %f2418, %f337; + add.f32 %f2899, %f2899, %f2419; + setp.leu.f32 %p650, %f337, 0f00000000; + @%p650 bra $L__BB1_424; + + setp.gt.f32 %p651, %f338, 0f00000000; + @%p651 bra $L__BB1_419; + bra.uni $L__BB1_418; + +$L__BB1_419: + setp.lt.f32 %p652, %f337, 0f00800000; + mul.f32 %f2420, %f337, 0f4B000000; + selp.f32 %f429, %f2420, %f337, %p652; + selp.f32 %f2421, 0fC1B80000, 0f00000000, %p652; + mov.b32 %r743, %f429; + add.s32 %r744, %r743, -1059760811; + and.b32 %r745, %r744, -8388608; + sub.s32 %r746, %r743, %r745; + mov.b32 %f2422, %r746; + cvt.rn.f32.s32 %f2423, %r745; + mov.f32 %f2424, 0f34000000; + fma.rn.f32 %f2425, %f2423, %f2424, %f2421; + add.f32 %f2426, %f2422, 0fBF800000; + mov.f32 %f2427, 0f3E1039F6; + mov.f32 %f2428, 0fBE055027; + fma.rn.f32 %f2429, %f2428, %f2426, %f2427; + mov.f32 %f2430, 0fBDF8CDCC; + fma.rn.f32 %f2431, %f2429, %f2426, %f2430; + mov.f32 %f2432, 0f3E0F2955; + fma.rn.f32 %f2433, %f2431, %f2426, %f2432; + mov.f32 %f2434, 0fBE2AD8B9; + fma.rn.f32 %f2435, %f2433, %f2426, %f2434; + mov.f32 %f2436, 0f3E4CED0B; + fma.rn.f32 %f2437, %f2435, %f2426, %f2436; + mov.f32 %f2438, 0fBE7FFF22; + fma.rn.f32 %f2439, %f2437, %f2426, %f2438; + mov.f32 %f2440, 0f3EAAAA78; + fma.rn.f32 %f2441, %f2439, %f2426, %f2440; + mov.f32 %f2442, 0fBF000000; + fma.rn.f32 %f2443, %f2441, %f2426, %f2442; + mul.f32 %f2444, %f2426, %f2443; + fma.rn.f32 %f2445, %f2444, %f2426, %f2426; + mov.f32 %f2446, 0f3F317218; + fma.rn.f32 %f2925, %f2425, %f2446, %f2445; + setp.lt.u32 %p653, %r743, 2139095040; + @%p653 bra $L__BB1_421; + + mov.f32 %f2447, 0f7F800000; + fma.rn.f32 %f2925, %f429, %f2447, %f2447; + +$L__BB1_421: + setp.eq.f32 %p654, %f429, 0f00000000; + selp.f32 %f2448, 0fFF800000, %f2925, %p654; + mul.f32 %f2449, %f338, %f2448; + sub.f32 %f433, %f2449, %f337; + mul.f32 %f2450, %f338, 0f4B000000; + setp.lt.f32 %p655, %f338, 0f00800000; + selp.f32 %f434, %f2450, %f338, %p655; + selp.f32 %f2451, 0fC1B80000, 0f00000000, %p655; + mov.b32 %r747, %f434; + add.s32 %r748, %r747, -1059760811; + and.b32 %r749, %r748, -8388608; + sub.s32 %r750, %r747, %r749; + mov.b32 %f2452, %r750; + cvt.rn.f32.s32 %f2453, %r749; + fma.rn.f32 %f2455, %f2453, %f2424, %f2451; + add.f32 %f2456, %f2452, 0fBF800000; + fma.rn.f32 %f2459, %f2428, %f2456, %f2427; + fma.rn.f32 %f2461, %f2459, %f2456, %f2430; + fma.rn.f32 %f2463, %f2461, %f2456, %f2432; + fma.rn.f32 %f2465, %f2463, %f2456, %f2434; + fma.rn.f32 %f2467, %f2465, %f2456, %f2436; + fma.rn.f32 %f2469, %f2467, %f2456, %f2438; + fma.rn.f32 %f2471, %f2469, %f2456, %f2440; + fma.rn.f32 %f2473, %f2471, %f2456, %f2442; + mul.f32 %f2474, %f2456, %f2473; + fma.rn.f32 %f2475, %f2474, %f2456, %f2456; + fma.rn.f32 %f2926, %f2455, %f2446, %f2475; + setp.lt.u32 %p656, %r747, 2139095040; + @%p656 bra $L__BB1_423; + + mov.f32 %f2477, 0f7F800000; + fma.rn.f32 %f2926, %f434, %f2477, %f2477; + +$L__BB1_423: + setp.eq.f32 %p657, %f434, 0f00000000; + selp.f32 %f2478, 0fFF800000, %f2926, %p657; + mul.f32 %f2479, %f338, %f2478; + sub.f32 %f2480, %f433, %f2479; + add.f32 %f2481, %f338, %f2480; + add.f32 %f2927, %f2927, %f2481; + bra.uni $L__BB1_424; + +$L__BB1_418: + sub.f32 %f2927, %f2927, %f337; + +$L__BB1_424: + add.s32 %r784, %r784, 1; + setp.lt.s32 %p658, %r784, %r102; + @%p658 bra $L__BB1_340; + + add.s32 %r783, %r783, 1; + setp.lt.s32 %p659, %r783, %r102; + @%p659 bra $L__BB1_339; + +$L__BB1_426: + ld.param.u64 %rd50, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_6]; + ld.param.u64 %rd49, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_5]; + ld.param.u32 %r764, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_7]; + ld.param.u64 %rd48, [_Z20kernel_MLEFit_XYNBS_PKffiiPfS1_S1_i_param_4]; + mov.u32 %r763, %tid.x; + mov.u32 %r762, %ntid.x; + mov.u32 %r761, %ctaid.x; + mad.lo.s32 %r760, %r761, %r762, %r763; + rcp.rn.f32 %f2482, %f2896; + mov.f32 %f2483, 0f3F800000; + mul.f32 %f2484, %f2482, %f2895; + mul.f32 %f2485, %f2482, %f2894; + mul.f32 %f2486, %f2482, %f2893; + mul.f32 %f2487, %f2482, %f2892; + fma.rn.f32 %f2488, %f2484, %f2895, 0f00000000; + sub.f32 %f2490, %f2891, %f2488; + fma.rn.f32 %f2491, %f2485, %f2895, 0f00000000; + rcp.rn.f32 %f2492, %f2490; + sub.f32 %f2493, %f2890, %f2491; + mul.f32 %f2494, %f2492, %f2493; + fma.rn.f32 %f2495, %f2486, %f2895, 0f00000000; + sub.f32 %f2496, %f2889, %f2495; + mul.f32 %f2497, %f2492, %f2496; + fma.rn.f32 %f2498, %f2487, %f2895, 0f00000000; + sub.f32 %f2499, %f2888, %f2498; + mul.f32 %f2500, %f2492, %f2499; + fma.rn.f32 %f2501, %f2484, %f2894, 0f00000000; + sub.f32 %f2502, %f2890, %f2501; + fma.rn.f32 %f2503, %f2485, %f2894, 0f00000000; + fma.rn.f32 %f2504, %f2494, %f2502, %f2503; + sub.f32 %f2505, %f2887, %f2504; + fma.rn.f32 %f2506, %f2486, %f2894, 0f00000000; + fma.rn.f32 %f2507, %f2497, %f2502, %f2506; + rcp.rn.f32 %f2508, %f2505; + sub.f32 %f2509, %f2886, %f2507; + mul.f32 %f2510, %f2508, %f2509; + fma.rn.f32 %f2511, %f2487, %f2894, 0f00000000; + fma.rn.f32 %f2512, %f2500, %f2502, %f2511; + sub.f32 %f2513, %f2885, %f2512; + mul.f32 %f2514, %f2508, %f2513; + fma.rn.f32 %f2515, %f2484, %f2893, 0f00000000; + sub.f32 %f2516, %f2889, %f2515; + fma.rn.f32 %f2517, %f2485, %f2893, 0f00000000; + fma.rn.f32 %f2518, %f2494, %f2516, %f2517; + sub.f32 %f2519, %f2886, %f2518; + fma.rn.f32 %f2520, %f2486, %f2893, 0f00000000; + fma.rn.f32 %f2521, %f2497, %f2516, %f2520; + fma.rn.f32 %f2522, %f2510, %f2519, %f2521; + sub.f32 %f2523, %f2897, %f2522; + fma.rn.f32 %f2524, %f2487, %f2893, 0f00000000; + fma.rn.f32 %f2525, %f2500, %f2516, %f2524; + fma.rn.f32 %f2526, %f2514, %f2519, %f2525; + rcp.rn.f32 %f2527, %f2523; + sub.f32 %f2528, %f2898, %f2526; + mul.f32 %f2529, %f2527, %f2528; + fma.rn.f32 %f2530, %f2484, %f2892, 0f00000000; + sub.f32 %f2531, %f2888, %f2530; + fma.rn.f32 %f2532, %f2485, %f2892, 0f00000000; + fma.rn.f32 %f2533, %f2494, %f2531, %f2532; + sub.f32 %f2534, %f2885, %f2533; + fma.rn.f32 %f2535, %f2486, %f2892, 0f00000000; + fma.rn.f32 %f2536, %f2497, %f2531, %f2535; + fma.rn.f32 %f2537, %f2510, %f2534, %f2536; + sub.f32 %f2538, %f2898, %f2537; + fma.rn.f32 %f2539, %f2487, %f2892, 0f00000000; + fma.rn.f32 %f2540, %f2500, %f2531, %f2539; + fma.rn.f32 %f2541, %f2514, %f2534, %f2540; + fma.rn.f32 %f2542, %f2529, %f2538, %f2541; + sub.f32 %f2543, %f2899, %f2542; + add.f32 %f2544, %f2484, 0f00000000; + sub.f32 %f2545, %f1492, %f2544; + add.f32 %f2546, %f2485, 0f00000000; + fma.rn.f32 %f2547, %f2494, %f2545, %f2546; + sub.f32 %f2548, %f1492, %f2547; + add.f32 %f2549, %f2486, 0f00000000; + fma.rn.f32 %f2550, %f2497, %f2545, %f2549; + fma.rn.f32 %f2551, %f2510, %f2548, %f2550; + sub.f32 %f2552, %f1492, %f2551; + add.f32 %f2553, %f2487, 0f00000000; + fma.rn.f32 %f2554, %f2500, %f2545, %f2553; + fma.rn.f32 %f2555, %f2514, %f2548, %f2554; + fma.rn.f32 %f2556, %f2529, %f2552, %f2555; + sub.f32 %f2557, %f1492, %f2556; + div.rn.f32 %f2558, %f2557, %f2543; + fma.rn.f32 %f2559, %f2538, %f2558, 0f00000000; + sub.f32 %f2560, %f2552, %f2559; + mul.f32 %f2561, %f2527, %f2560; + fma.rn.f32 %f2562, %f2519, %f2561, 0f00000000; + fma.rn.f32 %f2563, %f2534, %f2558, %f2562; + sub.f32 %f2564, %f2548, %f2563; + mul.f32 %f2565, %f2508, %f2564; + fma.rn.f32 %f2566, %f2502, %f2565, 0f00000000; + fma.rn.f32 %f2567, %f2516, %f2561, %f2566; + fma.rn.f32 %f2568, %f2531, %f2558, %f2567; + sub.f32 %f2569, %f2545, %f2568; + mul.f32 %f2570, %f2492, %f2569; + fma.rn.f32 %f2571, %f2895, %f2570, 0f00000000; + fma.rn.f32 %f2572, %f2894, %f2565, %f2571; + fma.rn.f32 %f2573, %f2893, %f2561, %f2572; + fma.rn.f32 %f2574, %f2892, %f2558, %f2573; + sub.f32 %f2575, %f2483, %f2574; + mul.f32 %f2576, %f2482, %f2575; + fma.rn.f32 %f2577, %f2484, 0f00000000, 0f00000000; + sub.f32 %f2578, %f2483, %f2577; + fma.rn.f32 %f2579, %f2485, 0f00000000, 0f00000000; + fma.rn.f32 %f2580, %f2494, %f2578, %f2579; + sub.f32 %f2581, %f1492, %f2580; + fma.rn.f32 %f2582, %f2486, 0f00000000, 0f00000000; + fma.rn.f32 %f2583, %f2497, %f2578, %f2582; + fma.rn.f32 %f2584, %f2510, %f2581, %f2583; + sub.f32 %f2585, %f1492, %f2584; + fma.rn.f32 %f2586, %f2487, 0f00000000, 0f00000000; + fma.rn.f32 %f2587, %f2500, %f2578, %f2586; + fma.rn.f32 %f2588, %f2514, %f2581, %f2587; + fma.rn.f32 %f2589, %f2529, %f2585, %f2588; + sub.f32 %f2590, %f1492, %f2589; + div.rn.f32 %f2591, %f2590, %f2543; + fma.rn.f32 %f2592, %f2538, %f2591, 0f00000000; + sub.f32 %f2593, %f2585, %f2592; + mul.f32 %f2594, %f2527, %f2593; + fma.rn.f32 %f2595, %f2519, %f2594, 0f00000000; + fma.rn.f32 %f2596, %f2534, %f2591, %f2595; + sub.f32 %f2597, %f2581, %f2596; + mul.f32 %f2598, %f2508, %f2597; + fma.rn.f32 %f2599, %f2502, %f2598, 0f00000000; + fma.rn.f32 %f2600, %f2516, %f2594, %f2599; + fma.rn.f32 %f2601, %f2531, %f2591, %f2600; + sub.f32 %f2602, %f2578, %f2601; + mul.f32 %f2603, %f2492, %f2602; + sub.f32 %f2604, %f1492, %f2577; + fma.rn.f32 %f2605, %f2494, %f2604, %f2579; + sub.f32 %f2606, %f2483, %f2605; + fma.rn.f32 %f2607, %f2497, %f2604, %f2582; + fma.rn.f32 %f2608, %f2510, %f2606, %f2607; + sub.f32 %f2609, %f1492, %f2608; + fma.rn.f32 %f2610, %f2500, %f2604, %f2586; + fma.rn.f32 %f2611, %f2514, %f2606, %f2610; + fma.rn.f32 %f2612, %f2529, %f2609, %f2611; + sub.f32 %f2613, %f1492, %f2612; + div.rn.f32 %f2614, %f2613, %f2543; + fma.rn.f32 %f2615, %f2538, %f2614, 0f00000000; + sub.f32 %f2616, %f2609, %f2615; + mul.f32 %f2617, %f2527, %f2616; + fma.rn.f32 %f2618, %f2519, %f2617, 0f00000000; + fma.rn.f32 %f2619, %f2534, %f2614, %f2618; + sub.f32 %f2620, %f2606, %f2619; + mul.f32 %f2621, %f2508, %f2620; + sub.f32 %f2622, %f1492, %f2605; + fma.rn.f32 %f2623, %f2510, %f2622, %f2607; + sub.f32 %f2624, %f2483, %f2623; + fma.rn.f32 %f2625, %f2514, %f2622, %f2610; + fma.rn.f32 %f2626, %f2529, %f2624, %f2625; + sub.f32 %f2627, %f1492, %f2626; + div.rn.f32 %f2628, %f2627, %f2543; + fma.rn.f32 %f2629, %f2538, %f2628, 0f00000000; + sub.f32 %f2630, %f2624, %f2629; + mul.f32 %f2631, %f2527, %f2630; + sub.f32 %f2632, %f1492, %f2623; + fma.rn.f32 %f2633, %f2529, %f2632, %f2625; + sub.f32 %f2634, %f2483, %f2633; + div.rn.f32 %f2635, %f2634, %f2543; + cvta.to.global.u64 %rd30, %rd48; + mul.wide.s32 %rd31, %r760, 4; + add.s64 %rd32, %rd30, %rd31; + st.global.f32 [%rd32], %f2868; + add.s32 %r755, %r760, %r764; + mul.wide.s32 %rd33, %r764, 4; + add.s64 %rd34, %rd32, %rd33; + st.global.f32 [%rd34], %f2867; + add.s32 %r756, %r755, %r764; + shl.b32 %r757, %r764, 3; + cvt.s64.s32 %rd35, %r757; + add.s64 %rd36, %rd32, %rd35; + st.global.f32 [%rd36], %f2866; + add.s32 %r758, %r756, %r764; + mul.wide.s32 %rd37, %r758, 4; + add.s64 %rd38, %rd30, %rd37; + st.global.f32 [%rd38], %f2865; + add.s64 %rd39, %rd36, %rd35; + st.global.f32 [%rd39], %f2864; + cvta.to.global.u64 %rd40, %rd49; + add.s64 %rd41, %rd40, %rd31; + st.global.f32 [%rd41], %f2576; + add.s64 %rd42, %rd41, %rd33; + st.global.f32 [%rd42], %f2603; + add.s64 %rd43, %rd41, %rd35; + st.global.f32 [%rd43], %f2621; + add.s64 %rd44, %rd40, %rd37; + st.global.f32 [%rd44], %f2631; + add.s64 %rd45, %rd43, %rd35; + st.global.f32 [%rd45], %f2635; + cvta.to.global.u64 %rd46, %rd50; + add.s64 %rd47, %rd46, %rd31; + st.global.f32 [%rd47], %f2927; + +$L__BB1_427: ret; -} +} // .globl _Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i .visible .entry _Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i( .param .u64 _Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_0, @@ -6655,4212 +9462,9688 @@ BB1_218: .param .u32 _Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_14 ) { - .local .align 4 .b8 __local_depot2[100]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<384>; - .reg .f32 %f<3401>; - .reg .b32 %r<329>; - .reg .b64 %rd<107>; - - - mov.u64 %SPL, __local_depot2; - ld.param.u64 %rd44, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_0]; - ld.param.f32 %f643, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_1]; - ld.param.f32 %f644, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_2]; - ld.param.f32 %f645, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_3]; - ld.param.f32 %f646, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_4]; - ld.param.f32 %f647, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_5]; - ld.param.f32 %f648, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_6]; - ld.param.f32 %f649, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_7]; - ld.param.f32 %f650, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_8]; - ld.param.u32 %r63, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_9]; - ld.param.u32 %r64, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_10]; - ld.param.u32 %r65, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_14]; - cvta.to.global.u64 %rd1, %rd44; - add.u64 %rd2, %SPL, 0; - mov.u32 %r66, %ntid.x; - mov.u32 %r67, %ctaid.x; - mov.u32 %r68, %tid.x; - mad.lo.s32 %r1, %r66, %r67, %r68; - setp.ge.s32 %p17, %r1, %r65; - @%p17 bra BB2_226; - - mov.u32 %r69, 0; - mov.u64 %rd94, %rd2; - mov.u32 %r305, %r69; - -BB2_2: - st.local.u32 [%rd94], %r69; - add.s64 %rd94, %rd94, 4; - add.s32 %r305, %r305, 1; - setp.lt.u32 %p18, %r305, 25; - @%p18 bra BB2_2; - - mul.lo.s32 %r71, %r63, %r63; - mul.lo.s32 %r4, %r71, %r1; - mov.f32 %f3345, 0f00000000; - setp.lt.s32 %p19, %r63, 1; - mov.f32 %f1, %f3345; - mov.f32 %f2, %f3345; - mov.f32 %f3, %f3345; - @%p19 bra BB2_17; - - and.b32 %r5, %r63, 3; - shl.b32 %r6, %r63, 2; - mov.f32 %f656, 0f00000000; - mov.u32 %r72, 0; - mov.u32 %r306, %r72; - mov.f32 %f1, %f656; - mov.f32 %f2, %f656; - mov.f32 %f3, %f656; - -BB2_5: - cvt.rn.f32.s32 %f4, %r306; - setp.eq.s32 %p20, %r5, 0; - @%p20 bra BB2_6; - - setp.eq.s32 %p21, %r5, 1; - @%p21 bra BB2_8; - bra.uni BB2_9; - -BB2_8: - mov.u32 %r308, %r72; - bra.uni BB2_13; - -BB2_6: - mov.u32 %r310, %r72; - mov.f32 %f3247, %f1; - mov.f32 %f3248, %f2; - mov.f32 %f3249, %f3; - mov.f32 %f1, %f656; - mov.f32 %f2, %f656; - mov.f32 %f3, %f656; - bra.uni BB2_14; - -BB2_9: - setp.eq.s32 %p22, %r5, 2; - @%p22 bra BB2_10; - bra.uni BB2_11; - -BB2_10: - mov.u32 %r307, %r72; - bra.uni BB2_12; - -BB2_11: - add.s32 %r77, %r306, %r4; - mul.wide.s32 %rd50, %r77, 4; - add.s64 %rd51, %rd1, %rd50; - ld.global.f32 %f660, [%rd51]; - fma.rn.f32 %f3, %f4, %f660, %f3; - fma.rn.f32 %f2, %f660, 0f00000000, %f2; - add.f32 %f1, %f1, %f660; - mov.u32 %r307, 1; - -BB2_12: - neg.s32 %r78, %r307; - and.b32 %r79, %r78, %r63; - add.s32 %r80, %r79, %r306; - add.s32 %r81, %r80, %r4; - mul.wide.s32 %rd52, %r81, 4; - add.s64 %rd53, %rd1, %rd52; - ld.global.f32 %f661, [%rd53]; - fma.rn.f32 %f3, %f4, %f661, %f3; - cvt.rn.f32.s32 %f662, %r307; - fma.rn.f32 %f2, %f662, %f661, %f2; - add.f32 %f1, %f1, %f661; - add.s32 %r308, %r307, 1; - -BB2_13: - mad.lo.s32 %r82, %r308, %r63, %r306; - add.s32 %r83, %r82, %r4; - mul.wide.s32 %rd54, %r83, 4; - add.s64 %rd55, %rd1, %rd54; - ld.global.f32 %f663, [%rd55]; - fma.rn.f32 %f3249, %f4, %f663, %f3; - cvt.rn.f32.s32 %f664, %r308; - fma.rn.f32 %f3248, %f664, %f663, %f2; - add.f32 %f3247, %f1, %f663; - add.s32 %r310, %r308, 1; - mov.f32 %f1, %f3247; - mov.f32 %f2, %f3248; - mov.f32 %f3, %f3249; - -BB2_14: - setp.lt.u32 %p23, %r63, 4; - @%p23 bra BB2_16; - -BB2_15: - mad.lo.s32 %r84, %r310, %r63, %r306; - add.s32 %r85, %r84, %r4; - mul.wide.s32 %rd56, %r85, 4; - add.s64 %rd57, %rd1, %rd56; - ld.global.f32 %f665, [%rd57]; - fma.rn.f32 %f666, %f4, %f665, %f3249; - cvt.rn.f32.s32 %f667, %r310; - fma.rn.f32 %f668, %f667, %f665, %f3248; - add.f32 %f669, %f3247, %f665; - cvt.s64.s32 %rd58, %r6; - add.s64 %rd59, %rd57, %rd58; - ld.global.f32 %f670, [%rd59]; - fma.rn.f32 %f671, %f4, %f670, %f666; - add.s32 %r86, %r310, 1; - cvt.rn.f32.s32 %f672, %r86; - fma.rn.f32 %f673, %f672, %f670, %f668; - add.f32 %f674, %f669, %f670; - add.s64 %rd60, %rd59, %rd58; - ld.global.f32 %f675, [%rd60]; - fma.rn.f32 %f676, %f4, %f675, %f671; - add.s32 %r87, %r310, 2; - cvt.rn.f32.s32 %f677, %r87; - fma.rn.f32 %f678, %f677, %f675, %f673; - add.f32 %f679, %f674, %f675; - add.s64 %rd61, %rd60, %rd58; - ld.global.f32 %f680, [%rd61]; - fma.rn.f32 %f3249, %f4, %f680, %f676; - add.s32 %r88, %r310, 3; - cvt.rn.f32.s32 %f681, %r88; - fma.rn.f32 %f3248, %f681, %f680, %f678; - add.f32 %f3247, %f679, %f680; - add.s32 %r310, %r310, 4; - setp.lt.s32 %p24, %r310, %r63; - mov.f32 %f1, %f3247; - mov.f32 %f2, %f3248; - mov.f32 %f3, %f3249; - @%p24 bra BB2_15; - -BB2_16: - add.s32 %r306, %r306, 1; - setp.lt.s32 %p25, %r306, %r63; - @%p25 bra BB2_5; - -BB2_17: - div.rn.f32 %f3349, %f3, %f1; - div.rn.f32 %f3348, %f2, %f1; - mov.f32 %f684, 0f3F000000; - div.rn.f32 %f685, %f684, %f643; - div.rn.f32 %f40, %f685, %f643; - mov.f32 %f3258, 0f51BA43B7; - mov.f32 %f3259, %f3345; - @%p19 bra BB2_36; - - and.b32 %r16, %r63, 3; - mov.f32 %f3259, 0f00000000; - mov.u32 %r89, 0; - mov.f32 %f3258, 0f51BA43B7; - mov.u32 %r311, %r89; - -BB2_19: - mov.u32 %r312, %r89; - -BB2_20: - cvt.rn.f32.s32 %f690, %r312; - mul.f32 %f691, %f690, %f690; - mul.f32 %f45, %f40, %f691; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f689, 0f00000000; - mov.f32 %f3276, %f689; - mov.f32 %f3277, %f689; - mov.u32 %r313, %r89; - -BB2_21: - sub.s32 %r93, %r313, %r311; - cvt.rn.f32.s32 %f50, %r93; - mul.lo.s32 %r20, %r313, %r63; - setp.eq.s32 %p27, %r16, 0; - @%p27 bra BB2_22; - - setp.eq.s32 %p28, %r16, 1; - @%p28 bra BB2_26; - bra.uni BB2_24; - -BB2_26: - mul.f32 %f706, %f50, %f50; - mul.f32 %f3267, %f40, %f706; - neg.f32 %f707, %f3267; - mul.f32 %f708, %f3267, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f709, %f708; - mov.f32 %f710, 0fBF317200; - fma.rn.f32 %f711, %f709, %f710, %f707; - mov.f32 %f712, 0fB5BFBE8E; - fma.rn.f32 %f713, %f709, %f712, %f711; - mul.f32 %f714, %f713, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f715, %f714; - add.f32 %f716, %f709, 0f00000000; - ex2.approx.f32 %f717, %f716; - mul.f32 %f3266, %f715, %f717; - mov.u32 %r315, 0; - bra.uni BB2_29; - -BB2_22: - mov.f32 %f3270, %f3276; - mov.f32 %f3271, %f3277; - mov.u32 %r317, %r89; - mov.f32 %f3276, %f689; - mov.f32 %f3277, %f689; - bra.uni BB2_30; - -BB2_24: - setp.ne.s32 %p29, %r16, 2; - @%p29 bra BB2_27; - - mul.f32 %f694, %f50, %f50; - mul.f32 %f3267, %f40, %f694; - neg.f32 %f695, %f3267; - mul.f32 %f696, %f3267, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f697, %f696; - mov.f32 %f698, 0fBF317200; - fma.rn.f32 %f699, %f697, %f698, %f695; - mov.f32 %f700, 0fB5BFBE8E; - fma.rn.f32 %f701, %f697, %f700, %f699; - mul.f32 %f702, %f701, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f703, %f702; - add.f32 %f704, %f697, 0f00000000; - ex2.approx.f32 %f705, %f704; - mul.f32 %f3266, %f703, %f705; - mov.u32 %r314, 0; - bra.uni BB2_28; - -BB2_27: - setp.lt.f32 %p30, %f45, 0fC2D20000; - mul.f32 %f718, %f50, %f50; - mul.f32 %f3267, %f40, %f718; - neg.f32 %f719, %f3267; - mul.f32 %f720, %f3267, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f721, %f720; - mov.f32 %f722, 0fBF317200; - fma.rn.f32 %f723, %f721, %f722, %f719; - mov.f32 %f724, 0fB5BFBE8E; - fma.rn.f32 %f725, %f721, %f724, %f723; - mul.f32 %f726, %f725, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f727, %f726; - add.f32 %f728, %f721, 0f00000000; - ex2.approx.f32 %f729, %f728; - mul.f32 %f3266, %f727, %f729; - setp.gt.f32 %p31, %f3267, 0f42D20000; - selp.f32 %f730, 0f00000000, %f3266, %p31; - setp.lt.f32 %p32, %f3267, 0fC2D20000; - selp.f32 %f731, 0f7F800000, %f730, %p32; - cvt.rzi.f32.f32 %f732, %f47; - fma.rn.f32 %f733, %f732, %f722, %f46; - fma.rn.f32 %f734, %f732, %f724, %f733; - mul.f32 %f735, %f734, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f736, %f735; - add.f32 %f737, %f732, 0f00000000; - ex2.approx.f32 %f738, %f737; - mul.f32 %f739, %f736, %f738; - setp.gt.f32 %p33, %f45, 0f42D20000; - selp.f32 %f740, 0f00000000, %f739, %p33; - selp.f32 %f741, 0f7F800000, %f740, %p30; - mul.f32 %f742, %f731, %f741; - add.s32 %r97, %r20, %r4; - mul.wide.s32 %rd62, %r97, 4; - add.s64 %rd63, %rd1, %rd62; - ld.global.f32 %f743, [%rd63]; - fma.rn.f32 %f3277, %f743, %f742, %f3277; - add.f32 %f3276, %f3276, %f742; - mov.u32 %r314, 1; - -BB2_28: - sub.s32 %r98, %r312, %r314; - cvt.rn.f32.s32 %f744, %r98; - mul.f32 %f745, %f744, %f744; - setp.gt.f32 %p34, %f3267, 0f42D20000; - selp.f32 %f746, 0f00000000, %f3266, %p34; - setp.lt.f32 %p35, %f3267, 0fC2D20000; - selp.f32 %f747, 0f7F800000, %f746, %p35; - mul.f32 %f748, %f40, %f745; - neg.f32 %f749, %f748; - mul.f32 %f750, %f748, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f751, %f750; - mov.f32 %f752, 0fBF317200; - fma.rn.f32 %f753, %f751, %f752, %f749; - mov.f32 %f754, 0fB5BFBE8E; - fma.rn.f32 %f755, %f751, %f754, %f753; - mul.f32 %f756, %f755, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f757, %f756; - add.f32 %f758, %f751, 0f00000000; - ex2.approx.f32 %f759, %f758; - mul.f32 %f760, %f757, %f759; - setp.gt.f32 %p36, %f748, 0f42D20000; - selp.f32 %f761, 0f00000000, %f760, %p36; - setp.lt.f32 %p37, %f748, 0fC2D20000; - selp.f32 %f762, 0f7F800000, %f761, %p37; - mul.f32 %f763, %f747, %f762; - add.s32 %r99, %r314, %r20; - add.s32 %r100, %r99, %r4; - mul.wide.s32 %rd64, %r100, 4; - add.s64 %rd65, %rd1, %rd64; - ld.global.f32 %f764, [%rd65]; - fma.rn.f32 %f3277, %f764, %f763, %f3277; - add.f32 %f3276, %f3276, %f763; - add.s32 %r315, %r314, 1; - -BB2_29: - sub.s32 %r101, %r312, %r315; - cvt.rn.f32.s32 %f765, %r101; - mul.f32 %f766, %f765, %f765; - setp.gt.f32 %p38, %f3267, 0f42D20000; - selp.f32 %f767, 0f00000000, %f3266, %p38; - setp.lt.f32 %p39, %f3267, 0fC2D20000; - selp.f32 %f768, 0f7F800000, %f767, %p39; - mul.f32 %f769, %f40, %f766; - neg.f32 %f770, %f769; - mul.f32 %f771, %f769, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f772, %f771; - mov.f32 %f773, 0fBF317200; - fma.rn.f32 %f774, %f772, %f773, %f770; - mov.f32 %f775, 0fB5BFBE8E; - fma.rn.f32 %f776, %f772, %f775, %f774; - mul.f32 %f777, %f776, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f778, %f777; - add.f32 %f779, %f772, 0f00000000; - ex2.approx.f32 %f780, %f779; - mul.f32 %f781, %f778, %f780; - setp.gt.f32 %p40, %f769, 0f42D20000; - selp.f32 %f782, 0f00000000, %f781, %p40; - setp.lt.f32 %p41, %f769, 0fC2D20000; - selp.f32 %f783, 0f7F800000, %f782, %p41; - mul.f32 %f784, %f768, %f783; - add.s32 %r102, %r315, %r20; - add.s32 %r103, %r102, %r4; - mul.wide.s32 %rd66, %r103, 4; - add.s64 %rd67, %rd1, %rd66; - ld.global.f32 %f785, [%rd67]; - fma.rn.f32 %f3271, %f785, %f784, %f3277; - add.f32 %f3270, %f3276, %f784; - add.s32 %r317, %r315, 1; - mov.f32 %f3276, %f3270; - mov.f32 %f3277, %f3271; - -BB2_30: - setp.lt.u32 %p42, %r63, 4; - @%p42 bra BB2_33; - - mul.f32 %f786, %f50, %f50; - mul.f32 %f787, %f40, %f786; + .reg .pred %p<1292>; + .reg .f32 %f<3358>; + .reg .b32 %r<1374>; + .reg .f64 %fd<1222>; + .reg .b64 %rd<47>; + + + ld.param.u64 %rd3, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_0]; + ld.param.f32 %f535, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_1]; + ld.param.f32 %f536, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_2]; + ld.param.f32 %f537, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_3]; + ld.param.f32 %f538, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_4]; + ld.param.f32 %f539, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_5]; + ld.param.f32 %f540, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_6]; + ld.param.f32 %f541, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_7]; + ld.param.f32 %f542, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_8]; + ld.param.u32 %r182, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_9]; + ld.param.u32 %r183, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_10]; + ld.param.u32 %r184, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_14]; + cvta.to.global.u64 %rd1, %rd3; + mov.u32 %r185, %ntid.x; + mov.u32 %r186, %ctaid.x; + mov.u32 %r187, %tid.x; + mad.lo.s32 %r1, %r186, %r185, %r187; + setp.ge.s32 %p79, %r1, %r184; + @%p79 bra $L__BB2_885; + + mul.lo.s32 %r188, %r182, %r182; + mul.lo.s32 %r2, %r188, %r1; + setp.lt.s32 %p80, %r182, 1; + mov.f32 %f545, 0f00000000; + mov.f32 %f3187, %f545; + mov.f32 %f3188, %f545; + mov.f32 %f3189, %f545; + @%p80 bra $L__BB2_11; + + add.s32 %r3, %r182, -1; + and.b32 %r4, %r182, 3; + sub.s32 %r5, %r182, %r4; + shl.b32 %r6, %r182, 2; + mov.u32 %r189, 0; + setp.lt.u32 %p81, %r3, 3; + setp.eq.s32 %p83, %r4, 0; + setp.eq.s32 %p84, %r4, 1; + setp.eq.s32 %p85, %r4, 2; + cvt.s64.s32 %rd9, %r6; + mov.u32 %r1361, %r189; + +$L__BB2_3: + cvt.rn.f32.s32 %f4, %r1361; + mov.u32 %r1364, %r189; + @%p81 bra $L__BB2_6; + + mov.u32 %r1364, %r189; + mov.u32 %r1363, %r5; + +$L__BB2_5: + mad.lo.s32 %r192, %r1364, %r182, %r1361; + add.s32 %r193, %r192, %r2; + mul.wide.s32 %rd7, %r193, 4; + add.s64 %rd8, %rd1, %rd7; + ld.global.f32 %f550, [%rd8]; + fma.rn.f32 %f551, %f550, %f4, %f3187; + cvt.rn.f32.s32 %f552, %r1364; + fma.rn.f32 %f553, %f550, %f552, %f3188; + add.f32 %f554, %f3189, %f550; + add.s64 %rd10, %rd8, %rd9; + ld.global.f32 %f555, [%rd10]; + fma.rn.f32 %f556, %f555, %f4, %f551; + add.s32 %r194, %r1364, 1; + cvt.rn.f32.s32 %f557, %r194; + fma.rn.f32 %f558, %f555, %f557, %f553; + add.f32 %f559, %f554, %f555; + add.s64 %rd11, %rd10, %rd9; + ld.global.f32 %f560, [%rd11]; + fma.rn.f32 %f561, %f560, %f4, %f556; + add.s32 %r195, %r1364, 2; + cvt.rn.f32.s32 %f562, %r195; + fma.rn.f32 %f563, %f560, %f562, %f558; + add.f32 %f564, %f559, %f560; + add.s64 %rd12, %rd11, %rd9; + ld.global.f32 %f565, [%rd12]; + fma.rn.f32 %f3187, %f565, %f4, %f561; + add.s32 %r196, %r1364, 3; + cvt.rn.f32.s32 %f566, %r196; + fma.rn.f32 %f3188, %f565, %f566, %f563; + add.f32 %f3189, %f564, %f565; + add.s32 %r1364, %r1364, 4; + add.s32 %r1363, %r1363, -4; + setp.ne.s32 %p82, %r1363, 0; + @%p82 bra $L__BB2_5; + +$L__BB2_6: + @%p83 bra $L__BB2_10; + + mad.lo.s32 %r13, %r1364, %r182, %r1361; + add.s32 %r197, %r13, %r2; + mul.wide.s32 %rd13, %r197, 4; + add.s64 %rd14, %rd1, %rd13; + ld.global.f32 %f567, [%rd14]; + fma.rn.f32 %f3187, %f567, %f4, %f3187; + cvt.rn.f32.s32 %f568, %r1364; + fma.rn.f32 %f3188, %f567, %f568, %f3188; + add.f32 %f3189, %f3189, %f567; + @%p84 bra $L__BB2_10; + + add.s32 %r14, %r13, %r182; + add.s32 %r198, %r14, %r2; + mul.wide.s32 %rd15, %r198, 4; + add.s64 %rd16, %rd1, %rd15; + ld.global.f32 %f569, [%rd16]; + fma.rn.f32 %f3187, %f569, %f4, %f3187; + add.s32 %r199, %r1364, 1; + cvt.rn.f32.s32 %f570, %r199; + fma.rn.f32 %f3188, %f569, %f570, %f3188; + add.f32 %f3189, %f3189, %f569; + @%p85 bra $L__BB2_10; + + add.s32 %r200, %r1364, 2; + add.s32 %r201, %r14, %r182; + add.s32 %r202, %r201, %r2; + mul.wide.s32 %rd17, %r202, 4; + add.s64 %rd18, %rd1, %rd17; + ld.global.f32 %f571, [%rd18]; + fma.rn.f32 %f3187, %f571, %f4, %f3187; + cvt.rn.f32.s32 %f572, %r200; + fma.rn.f32 %f3188, %f571, %f572, %f3188; + add.f32 %f3189, %f3189, %f571; + +$L__BB2_10: + add.s32 %r1361, %r1361, 1; + setp.lt.s32 %p86, %r1361, %r182; + @%p86 bra $L__BB2_3; + +$L__BB2_11: + div.rn.f32 %f3278, %f3187, %f3189; + div.rn.f32 %f3277, %f3188, %f3189; + mov.f32 %f3275, 0f51BA43B7; + mov.f32 %f3196, %f545; + @%p80 bra $L__BB2_51; + + mov.f32 %f577, 0f3F000000; + div.rn.f32 %f578, %f577, %f535; + div.rn.f32 %f579, %f578, %f535; + cvt.f64.f32 %fd1, %f579; + mov.f64 %fd551, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd551; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p88, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p88; + mov.u32 %r203, 0; + or.b32 %r20, %r19, -2147483648; + setp.eq.s32 %p90, %r17, 1062207488; + setp.lt.s32 %p91, %r16, 0; + setp.ne.s32 %p96, %r18, 1071644672; + setp.eq.s32 %p123, %r18, 2146435072; + mov.u32 %r1365, %r203; + mov.f32 %f3196, %f545; + +$L__BB2_13: + mov.u32 %r1366, %r203; + +$L__BB2_14: + mov.f32 %f3199, 0f00000000; + mov.f32 %f3200, %f3199; + mov.u32 %r1367, %r203; + +$L__BB2_15: + sub.s32 %r24, %r1367, %r1365; + cvt.rn.f32.s32 %f582, %r24; + cvt.f64.f32 %fd2, %f582; + { + .reg .b32 %temp; + mov.b64 {%temp, %r25}, %fd2; + } + abs.f64 %fd552, %fd2; + { // callseq 25, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd552; + .param .b64 param1; + st.param.f64 [param1+0], %fd551; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 25 + setp.lt.s32 %p89, %r25, 0; + and.pred %p1, %p89, %p90; + selp.b32 %r207, %r25, 0, %p90; + or.b32 %r208, %r207, 2146435072; + selp.b32 %r26, %r208, %r207, %p91; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r209}, %fd4; + } + and.b32 %r27, %r209, 2146435072; + setp.ne.s32 %p92, %r27, 2146435072; + setp.gtu.f64 %p93, %fd552, 0d7FF0000000000000; + setp.gt.f64 %p94, %fd552, 0d3FF0000000000000; + selp.b32 %r210, 2146435072, 0, %p94; + xor.b32 %r211, %r210, 2146435072; + selp.b32 %r212, %r211, %r210, %p91; + setp.eq.s32 %p95, %r24, -1; + selp.b32 %r28, 1072693248, %r212, %p95; + and.b32 %r29, %r25, 2147483647; + and.pred %p97, %p96, %p1; + selp.b32 %r30, %r20, %r19, %p97; + mul.lo.s32 %r31, %r1367, %r182; + or.pred %p2, %p92, %p93; + mov.u32 %r1368, %r203; + +$L__BB2_16: + not.pred %p98, %p1; + mov.f64 %fd1073, %fd3; + @%p98 bra $L__BB2_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r213}, %fd3; + } + xor.b32 %r214, %r213, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r215, %temp}, %fd3; + } + mov.b64 %fd1073, {%r215, %r214}; + +$L__BB2_18: + setp.eq.s32 %p99, %r24, 0; + @%p99 bra $L__BB2_22; + + setp.gt.s32 %p100, %r25, -1; + @%p100 bra $L__BB2_23; + + cvt.rzi.f64.f64 %fd555, %fd551; + setp.eq.f64 %p101, %fd555, 0d4000000000000000; + @%p101 bra $L__BB2_23; + + mov.f64 %fd1073, 0dFFF8000000000000; + bra.uni $L__BB2_23; + +$L__BB2_22: + mov.u32 %r216, 0; + mov.b64 %fd1073, {%r216, %r26}; + +$L__BB2_23: + selp.f64 %fd1074, %fd1073, %fd4, %p92; + @%p2 bra $L__BB2_28; + + { + .reg .b32 %temp; + mov.b64 {%r217, %temp}, %fd551; + } + setp.eq.s32 %p104, %r217, 0; + and.pred %p105, %p123, %p104; + @%p105 bra $L__BB2_27; + bra.uni $L__BB2_25; + +$L__BB2_27: + mov.u32 %r220, 0; + mov.b64 %fd1074, {%r220, %r28}; + bra.uni $L__BB2_28; + +$L__BB2_25: + setp.ne.s32 %p106, %r29, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r218, %temp}, %fd2; + } + setp.ne.s32 %p107, %r218, 0; + or.pred %p108, %p106, %p107; + mov.f64 %fd1074, %fd1073; + @%p108 bra $L__BB2_28; + + mov.u32 %r219, 0; + mov.b64 %fd1074, {%r219, %r30}; + +$L__BB2_28: + setp.eq.s32 %p109, %r24, 1; + selp.f64 %fd558, 0d3FF0000000000000, %fd1074, %p109; + mov.f64 %fd559, 0d3FF0000000000000; + mul.f64 %fd13, %fd558, %fd1; + neg.f64 %fd560, %fd13; + mov.f64 %fd561, 0d4338000000000000; + mov.f64 %fd562, 0d3FF71547652B82FE; + fma.rn.f64 %fd563, %fd560, %fd562, %fd561; + { + .reg .b32 %temp; + mov.b64 {%r33, %temp}, %fd563; + } + mov.f64 %fd564, 0dC338000000000000; + add.rn.f64 %fd565, %fd563, %fd564; + mov.f64 %fd566, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd567, %fd565, %fd566, %fd560; + mov.f64 %fd568, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd569, %fd565, %fd568, %fd567; + mov.f64 %fd570, 0d3E928AF3FCA213EA; + mov.f64 %fd571, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd572, %fd571, %fd569, %fd570; + mov.f64 %fd573, 0d3EC71DEE62401315; + fma.rn.f64 %fd574, %fd572, %fd569, %fd573; + mov.f64 %fd575, 0d3EFA01997C89EB71; + fma.rn.f64 %fd576, %fd574, %fd569, %fd575; + mov.f64 %fd577, 0d3F2A01A014761F65; + fma.rn.f64 %fd578, %fd576, %fd569, %fd577; + mov.f64 %fd579, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd580, %fd578, %fd569, %fd579; + mov.f64 %fd581, 0d3F81111111122322; + fma.rn.f64 %fd582, %fd580, %fd569, %fd581; + mov.f64 %fd583, 0d3FA55555555502A1; + fma.rn.f64 %fd584, %fd582, %fd569, %fd583; + mov.f64 %fd585, 0d3FC5555555555511; + fma.rn.f64 %fd586, %fd584, %fd569, %fd585; + mov.f64 %fd587, 0d3FE000000000000B; + fma.rn.f64 %fd588, %fd586, %fd569, %fd587; + fma.rn.f64 %fd589, %fd588, %fd569, %fd559; + fma.rn.f64 %fd590, %fd589, %fd569, %fd559; + { + .reg .b32 %temp; + mov.b64 {%r34, %temp}, %fd590; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r35}, %fd590; + } + shl.b32 %r221, %r33, 20; + add.s32 %r222, %r35, %r221; + mov.b64 %fd1075, {%r34, %r222}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r223}, %fd560; + } + mov.b32 %f583, %r223; + abs.f32 %f42, %f583; + setp.lt.f32 %p110, %f42, 0f4086232B; + @%p110 bra $L__BB2_31; + + setp.gt.f64 %p111, %fd13, 0d8000000000000000; + mov.f64 %fd591, 0d7FF0000000000000; + sub.f64 %fd592, %fd591, %fd13; + selp.f64 %fd1075, 0d0000000000000000, %fd592, %p111; + setp.geu.f32 %p112, %f42, 0f40874800; + @%p112 bra $L__BB2_31; + + shr.u32 %r224, %r33, 31; + add.s32 %r225, %r33, %r224; + shr.s32 %r226, %r225, 1; + shl.b32 %r227, %r226, 20; + add.s32 %r228, %r35, %r227; + mov.b64 %fd593, {%r34, %r228}; + sub.s32 %r229, %r33, %r226; + shl.b32 %r230, %r229, 20; + add.s32 %r231, %r230, 1072693248; + mov.u32 %r232, 0; + mov.b64 %fd594, {%r232, %r231}; + mul.f64 %fd1075, %fd593, %fd594; + +$L__BB2_31: + sub.s32 %r36, %r1366, %r1368; + cvt.rn.f32.s32 %f584, %r36; + cvt.f64.f32 %fd18, %f584; + { + .reg .b32 %temp; + mov.b64 {%temp, %r37}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 26, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd551; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1077, [retval0+0]; + } // callseq 26 + setp.lt.s32 %p113, %r37, 0; + and.pred %p3, %p113, %p90; + not.pred %p115, %p3; + @%p115 bra $L__BB2_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r233}, %fd1077; + } + xor.b32 %r234, %r233, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r235, %temp}, %fd1077; + } + mov.b64 %fd1077, {%r235, %r234}; + +$L__BB2_33: + setp.eq.s32 %p116, %r36, 0; + @%p116 bra $L__BB2_37; + + setp.gt.s32 %p117, %r37, -1; + @%p117 bra $L__BB2_38; + + cvt.rzi.f64.f64 %fd597, %fd551; + setp.eq.f64 %p118, %fd597, 0d4000000000000000; + @%p118 bra $L__BB2_38; + + mov.f64 %fd1077, 0dFFF8000000000000; + bra.uni $L__BB2_38; + +$L__BB2_37: + mov.u32 %r236, 0; + selp.b32 %r237, %r37, 0, %p90; + or.b32 %r238, %r237, 2146435072; + selp.b32 %r239, %r238, %r237, %p91; + mov.b64 %fd1077, {%r236, %r239}; + +$L__BB2_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r240}, %fd25; + } + and.b32 %r241, %r240, 2146435072; + setp.ne.s32 %p121, %r241, 2146435072; + mov.f64 %fd1078, %fd1077; + @%p121 bra $L__BB2_44; + + setp.gtu.f64 %p122, %fd19, 0d7FF0000000000000; + mov.f64 %fd1078, %fd25; + @%p122 bra $L__BB2_44; + + { + .reg .b32 %temp; + mov.b64 {%r242, %temp}, %fd551; + } + setp.eq.s32 %p124, %r242, 0; + and.pred %p125, %p123, %p124; + @%p125 bra $L__BB2_43; + bra.uni $L__BB2_41; + +$L__BB2_43: + mov.u32 %r247, 0; + setp.gt.f64 %p132, %fd19, 0d3FF0000000000000; + selp.b32 %r248, 2146435072, 0, %p132; + xor.b32 %r249, %r248, 2146435072; + selp.b32 %r250, %r249, %r248, %p91; + setp.eq.s32 %p133, %r36, -1; + selp.b32 %r251, 1072693248, %r250, %p133; + mov.b64 %fd1078, {%r247, %r251}; + bra.uni $L__BB2_44; + +$L__BB2_41: + { + .reg .b32 %temp; + mov.b64 {%r243, %temp}, %fd18; + } + and.b32 %r244, %r37, 2147483647; + setp.ne.s32 %p126, %r244, 2146435072; + setp.ne.s32 %p127, %r243, 0; + or.pred %p128, %p126, %p127; + mov.f64 %fd1078, %fd1077; + @%p128 bra $L__BB2_44; + + and.pred %p130, %p96, %p3; + selp.b32 %r245, %r20, %r19, %p130; + mov.u32 %r246, 0; + mov.b64 %fd1078, {%r246, %r245}; + +$L__BB2_44: + setp.eq.s32 %p134, %r36, 1; + selp.f64 %fd600, 0d3FF0000000000000, %fd1078, %p134; + mul.f64 %fd29, %fd600, %fd1; + neg.f64 %fd602, %fd29; + fma.rn.f64 %fd605, %fd602, %fd562, %fd561; + { + .reg .b32 %temp; + mov.b64 {%r38, %temp}, %fd605; + } + add.rn.f64 %fd607, %fd605, %fd564; + fma.rn.f64 %fd609, %fd607, %fd566, %fd602; + fma.rn.f64 %fd611, %fd607, %fd568, %fd609; + fma.rn.f64 %fd614, %fd571, %fd611, %fd570; + fma.rn.f64 %fd616, %fd614, %fd611, %fd573; + fma.rn.f64 %fd618, %fd616, %fd611, %fd575; + fma.rn.f64 %fd620, %fd618, %fd611, %fd577; + fma.rn.f64 %fd622, %fd620, %fd611, %fd579; + fma.rn.f64 %fd624, %fd622, %fd611, %fd581; + fma.rn.f64 %fd626, %fd624, %fd611, %fd583; + fma.rn.f64 %fd628, %fd626, %fd611, %fd585; + fma.rn.f64 %fd630, %fd628, %fd611, %fd587; + fma.rn.f64 %fd631, %fd630, %fd611, %fd559; + fma.rn.f64 %fd632, %fd631, %fd611, %fd559; + { + .reg .b32 %temp; + mov.b64 {%r39, %temp}, %fd632; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r40}, %fd632; + } + shl.b32 %r252, %r38, 20; + add.s32 %r253, %r40, %r252; + mov.b64 %fd1079, {%r39, %r253}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r254}, %fd602; + } + mov.b32 %f585, %r254; + abs.f32 %f43, %f585; + setp.lt.f32 %p135, %f43, 0f4086232B; + @%p135 bra $L__BB2_47; + + setp.gt.f64 %p136, %fd29, 0d8000000000000000; + mov.f64 %fd633, 0d7FF0000000000000; + sub.f64 %fd634, %fd633, %fd29; + selp.f64 %fd1079, 0d0000000000000000, %fd634, %p136; + setp.geu.f32 %p137, %f43, 0f40874800; + @%p137 bra $L__BB2_47; + + shr.u32 %r255, %r38, 31; + add.s32 %r256, %r38, %r255; + shr.s32 %r257, %r256, 1; + shl.b32 %r258, %r257, 20; + add.s32 %r259, %r40, %r258; + mov.b64 %fd635, {%r39, %r259}; + sub.s32 %r260, %r38, %r257; + shl.b32 %r261, %r260, 20; + add.s32 %r262, %r261, 1072693248; + mov.u32 %r263, 0; + mov.b64 %fd636, {%r263, %r262}; + mul.f64 %fd1079, %fd635, %fd636; + +$L__BB2_47: + add.s32 %r264, %r1368, %r31; + add.s32 %r265, %r264, %r2; + mul.wide.s32 %rd19, %r265, 4; + add.s64 %rd20, %rd1, %rd19; + ld.global.f32 %f586, [%rd20]; + cvt.f64.f32 %fd637, %f586; + mul.f64 %fd638, %fd1075, %fd1079; + cvt.f64.f32 %fd639, %f3200; + fma.rn.f64 %fd640, %fd638, %fd637, %fd639; + cvt.rn.f32.f64 %f3200, %fd640; + cvt.f64.f32 %fd641, %f3199; + add.f64 %fd642, %fd638, %fd641; + cvt.rn.f32.f64 %f3199, %fd642; + add.s32 %r1368, %r1368, 1; + setp.lt.s32 %p138, %r1368, %r182; + @%p138 bra $L__BB2_16; + + add.s32 %r1367, %r1367, 1; + setp.lt.s32 %p139, %r1367, %r182; + @%p139 bra $L__BB2_15; + + div.rn.f32 %f587, %f3200, %f3199; + max.f32 %f3196, %f3196, %f587; + min.f32 %f3275, %f3275, %f587; + add.s32 %r1366, %r1366, 1; + setp.lt.s32 %p140, %r1366, %r182; + @%p140 bra $L__BB2_14; + + add.s32 %r1365, %r1365, 1; + setp.lt.s32 %p141, %r1365, %r182; + @%p141 bra $L__BB2_13; + +$L__BB2_51: + sub.f32 %f589, %f3196, %f3275; + add.f32 %f590, %f589, %f589; + mul.f32 %f591, %f590, 0f40490FD8; + mul.f32 %f592, %f591, %f535; + mul.f32 %f593, %f592, %f542; + mul.f32 %f594, %f593, 0f3FB504F3; + max.f32 %f3276, %f545, %f594; + setp.lt.s32 %p142, %r183, 1; + mov.f32 %f3274, %f545; + @%p142 bra $L__BB2_623; + + mul.f32 %f51, %f535, 0f3F000000; + mul.f32 %f52, %f542, 0f3F000000; + mul.f32 %f596, %f536, 0f40400000; + cvt.f64.f32 %fd34, %f596; + mul.f32 %f53, %f541, %f541; + mul.f32 %f54, %f53, %f541; + mul.f32 %f597, %f538, 0f40800000; + cvt.f64.f32 %fd35, %f597; + cvt.f64.f32 %fd643, %f541; + add.f64 %fd36, %fd643, 0d4010000000000000; + mul.f32 %f598, %f537, 0f40400000; + cvt.f64.f32 %fd37, %f598; + mul.f32 %f599, %f539, 0f40800000; + cvt.f64.f32 %fd38, %f599; + mul.f32 %f55, %f535, 0fBE800000; + mul.f32 %f56, %f542, 0fBE800000; + mov.f32 %f600, 0f40000000; + div.rn.f32 %f57, %f600, %f53; + mul.f32 %f58, %f536, 0f40C00000; + mul.f32 %f601, %f538, 0f41400000; + cvt.f64.f32 %fd39, %f601; + mul.f32 %f59, %f537, 0f40C00000; + mul.f32 %f602, %f539, 0f41400000; + cvt.f64.f32 %fd40, %f602; + mov.u32 %r266, 0; + { + .reg .b32 %temp; + mov.b64 {%temp, %r311}, %fd36; + } + and.b32 %r92, %r311, 2146435072; + setp.ne.s32 %p189, %r92, 2146435072; + setp.eq.f32 %p193, %f541, 0fBF800000; + mov.f32 %f3274, %f545; + mov.u32 %r1369, %r266; + +$L__BB2_53: + mov.f32 %f3218, %f545; + mov.f32 %f3219, %f545; + mov.f32 %f3220, %f545; + mov.f32 %f3221, %f545; + mov.f32 %f3222, %f545; + mov.f32 %f3223, %f545; + mov.f32 %f3224, %f545; + mov.f32 %f3225, %f545; + mov.f32 %f3226, %f545; + mov.f32 %f3227, %f545; + @%p80 bra $L__BB2_622; + + sub.f32 %f65, %f3274, %f540; + div.rn.f32 %f66, %f65, %f541; + cvt.f64.f32 %fd41, %f66; + add.f32 %f67, %f3274, %f540; + div.rn.f32 %f68, %f67, %f541; + cvt.f64.f32 %fd42, %f68; + div.rn.f32 %f69, %f3276, 0fC0206C98; + div.rn.f32 %f623, %f3276, 0f40206C98; + cvt.f64.f32 %fd43, %f623; + add.f32 %f624, %f65, %f65; + div.rn.f32 %f625, %f624, %f53; + cvt.f64.f32 %fd44, %f625; + cvt.f64.f32 %fd45, %f65; + add.f64 %fd46, %fd45, 0d4000000000000000; + add.f64 %fd47, %fd45, 0d4008000000000000; + add.f32 %f626, %f67, %f67; + div.rn.f32 %f627, %f626, %f53; + cvt.f64.f32 %fd48, %f627; + cvt.f64.f32 %fd49, %f67; + add.f64 %fd50, %fd49, 0d4000000000000000; + add.f64 %fd51, %fd49, 0d4008000000000000; + mul.f32 %f628, %f58, %f65; + div.rn.f32 %f629, %f628, %f54; + add.f32 %f630, %f57, %f629; + cvt.f64.f32 %fd52, %f630; + mul.f32 %f631, %f59, %f67; + div.rn.f32 %f632, %f631, %f54; + add.f32 %f633, %f57, %f632; + cvt.f64.f32 %fd53, %f633; + mov.f32 %f3218, %f545; + mov.f32 %f3219, %f545; + mov.f32 %f3220, %f545; + mov.f32 %f3221, %f545; + mov.f32 %f3222, %f545; + mov.f32 %f3223, %f545; + mov.f32 %f3224, %f545; + mov.f32 %f3225, %f545; + mov.f32 %f3226, %f545; + mov.f32 %f3227, %f545; + mov.u32 %r1370, %r266; + +$L__BB2_55: + cvt.f64.f32 %fd1054, %f66; + cvt.f64.f32 %fd1053, %f68; + add.f32 %f3054, %f3274, %f540; + sub.f32 %f3053, %f3274, %f540; + mov.f64 %fd644, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r47}, %fd644; + } + and.b32 %r48, %r47, 2146435072; + setp.eq.s32 %p144, %r48, 1062207488; + abs.f64 %fd645, %fd1054; + { // callseq 27, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd645; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd54, [retval0+0]; + } // callseq 27 + { + .reg .b32 %temp; + mov.b64 {%temp, %r49}, %fd1054; + } + setp.lt.s32 %p145, %r49, 0; + and.pred %p4, %p145, %p144; + setp.lt.s32 %p146, %r47, 0; + add.f64 %fd646, %fd1054, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r269}, %fd646; + } + and.b32 %r50, %r269, 2146435072; + setp.ne.s32 %p147, %r50, 2146435072; + setp.gtu.f64 %p148, %fd645, 0d7FF0000000000000; + mov.f64 %fd647, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r51}, %fd647; + } + and.b32 %r52, %r51, 2146435072; + setp.eq.s32 %p149, %r52, 1073741824; + { // callseq 28, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd645; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd55, [retval0+0]; + } // callseq 28 + and.pred %p5, %p145, %p149; + and.b32 %r53, %r47, 2147483647; + setp.gt.f64 %p150, %fd645, 0d3FF0000000000000; + selp.b32 %r270, 2146435072, 0, %p150; + xor.b32 %r271, %r270, 2146435072; + selp.b32 %r272, %r271, %r270, %p146; + setp.eq.f32 %p151, %f66, 0fBF800000; + selp.b32 %r54, 1072693248, %r272, %p151; + setp.lt.s32 %p152, %r51, 0; + add.f64 %fd648, %fd1054, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r273}, %fd648; + } + and.b32 %r55, %r273, 2146435072; + setp.ne.s32 %p153, %r55, 2146435072; + setp.gt.s32 %p154, %r47, -1; + selp.b32 %r56, 2146435072, 0, %p154; + setp.ne.s32 %p155, %r53, 1071644672; + or.b32 %r57, %r56, -2147483648; + mov.f64 %fd649, 0d4010000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r274}, %fd649; + } + and.b32 %r275, %r274, 2146435072; + setp.eq.s32 %p156, %r275, 1072693248; + { // callseq 29, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd645; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd56, [retval0+0]; + } // callseq 29 + and.pred %p6, %p145, %p156; + and.b32 %r58, %r51, 2147483647; + selp.b32 %r276, %r271, %r270, %p152; + selp.b32 %r59, 1072693248, %r276, %p151; + selp.b32 %r277, %r49, 0, %p156; + setp.lt.s32 %p157, %r274, 0; + or.b32 %r278, %r277, 2146435072; + selp.b32 %r60, %r278, %r277, %p157; + add.f64 %fd650, %fd1054, 0d4010000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r279}, %fd650; + } + and.b32 %r61, %r279, 2146435072; + setp.ne.s32 %p158, %r61, 2146435072; + setp.gt.s32 %p159, %r51, -1; + selp.b32 %r62, 2146435072, 0, %p159; + or.b32 %r63, %r62, -2147483648; + abs.f64 %fd651, %fd1053; + { // callseq 30, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd651; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd57, [retval0+0]; + } // callseq 30 + { + .reg .b32 %temp; + mov.b64 {%temp, %r64}, %fd1053; + } + setp.lt.s32 %p160, %r64, 0; + and.pred %p7, %p160, %p144; + and.b32 %r65, %r274, 2147483647; + selp.b32 %r280, %r271, %r270, %p157; + selp.b32 %r66, 1072693248, %r280, %p151; + add.f64 %fd652, %fd1053, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r281}, %fd652; + } + and.b32 %r67, %r281, 2146435072; + setp.ne.s32 %p161, %r67, 2146435072; + setp.gt.s32 %p162, %r274, -1; + selp.b32 %r282, 2146435072, 0, %p162; + setp.ne.s32 %p163, %r65, 1071644672; + and.pred %p164, %p163, %p6; + or.b32 %r283, %r282, -2147483648; + selp.b32 %r68, %r283, %r282, %p164; + setp.gtu.f64 %p165, %fd651, 0d7FF0000000000000; + { // callseq 31, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd651; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd58, [retval0+0]; + } // callseq 31 + and.pred %p8, %p160, %p149; + setp.gt.f64 %p166, %fd651, 0d3FF0000000000000; + selp.b32 %r284, 2146435072, 0, %p166; + xor.b32 %r285, %r284, 2146435072; + selp.b32 %r286, %r285, %r284, %p146; + setp.eq.f32 %p167, %f68, 0fBF800000; + selp.b32 %r69, 1072693248, %r286, %p167; + add.f64 %fd653, %fd1053, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r287}, %fd653; + } + and.b32 %r70, %r287, 2146435072; + setp.ne.s32 %p168, %r70, 2146435072; + { // callseq 32, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd651; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd59, [retval0+0]; + } // callseq 32 + and.pred %p9, %p160, %p156; + selp.b32 %r288, %r285, %r284, %p152; + selp.b32 %r71, 1072693248, %r288, %p167; + selp.b32 %r289, %r64, 0, %p156; + or.b32 %r290, %r289, 2146435072; + selp.b32 %r72, %r290, %r289, %p157; + add.f64 %fd654, %fd1053, 0d4010000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r291}, %fd654; + } + and.b32 %r73, %r291, 2146435072; + setp.ne.s32 %p169, %r73, 2146435072; + cvt.rn.f32.s32 %f80, %r1370; + sub.f32 %f634, %f80, %f3278; + add.f32 %f81, %f634, 0f3F000000; + add.f32 %f82, %f634, 0fBF000000; + selp.b32 %r292, %r285, %r284, %p157; + selp.b32 %r74, 1072693248, %r292, %p167; + and.pred %p170, %p163, %p9; + selp.b32 %r75, %r283, %r282, %p170; + cvt.f64.f32 %fd655, %f81; + { + .reg .b32 %temp; + mov.b64 {%temp, %r76}, %fd655; + } + abs.f64 %fd656, %fd655; + { // callseq 33, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd656; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd60, [retval0+0]; + } // callseq 33 + setp.lt.s32 %p171, %r76, 0; + and.pred %p10, %p171, %p149; + add.f64 %fd61, %fd655, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r293}, %fd61; + } + and.b32 %r77, %r293, 2146435072; + setp.ne.s32 %p172, %r77, 2146435072; + mov.f64 %fd657, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r78}, %fd657; + } + setp.gtu.f64 %p173, %fd656, 0d7FF0000000000000; + cvt.f64.f32 %fd658, %f82; + { + .reg .b32 %temp; + mov.b64 {%temp, %r79}, %fd658; + } + abs.f64 %fd659, %fd658; + { // callseq 34, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd659; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd62, [retval0+0]; + } // callseq 34 + setp.lt.s32 %p174, %r79, 0; + and.pred %p11, %p174, %p149; + setp.gt.f64 %p175, %fd656, 0d3FF0000000000000; + selp.b32 %r294, 2146435072, 0, %p175; + xor.b32 %r295, %r294, 2146435072; + selp.b32 %r296, %r295, %r294, %p152; + setp.eq.f32 %p176, %f81, 0fBF800000; + selp.b32 %r80, 1072693248, %r296, %p176; + add.f64 %fd63, %fd658, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r297}, %fd63; + } + and.b32 %r81, %r297, 2146435072; + setp.ne.s32 %p177, %r81, 2146435072; + setp.gtu.f64 %p178, %fd659, 0d7FF0000000000000; + setp.gt.f64 %p179, %fd659, 0d3FF0000000000000; + selp.b32 %r298, 2146435072, 0, %p179; + xor.b32 %r299, %r298, 2146435072; + selp.b32 %r300, %r299, %r298, %p152; + setp.eq.f32 %p180, %f82, 0fBF800000; + selp.b32 %r82, 1072693248, %r300, %p180; + abs.f64 %fd660, %fd45; + { // callseq 35, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd660; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd64, [retval0+0]; + } // callseq 35 + { + .reg .b32 %temp; + mov.b64 {%temp, %r83}, %fd45; + } + setp.lt.s32 %p181, %r83, 0; + and.pred %p12, %p181, %p144; + selp.b32 %r301, %r83, 0, %p144; + or.b32 %r302, %r301, 2146435072; + selp.b32 %r84, %r302, %r301, %p146; + { + .reg .b32 %temp; + mov.b64 {%temp, %r303}, %fd46; + } + and.b32 %r85, %r303, 2146435072; + setp.ne.s32 %p182, %r85, 2146435072; + setp.gtu.f64 %p183, %fd660, 0d7FF0000000000000; + { // callseq 36, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd660; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd65, [retval0+0]; + } // callseq 36 + and.pred %p13, %p181, %p149; + setp.gt.f64 %p184, %fd660, 0d3FF0000000000000; + selp.b32 %r304, 2146435072, 0, %p184; + xor.b32 %r305, %r304, 2146435072; + selp.b32 %r306, %r305, %r304, %p146; + setp.eq.f32 %p185, %f3053, 0fBF800000; + selp.b32 %r86, 1072693248, %r306, %p185; + { + .reg .b32 %temp; + mov.b64 {%temp, %r307}, %fd47; + } + and.b32 %r87, %r307, 2146435072; + setp.ne.s32 %p186, %r87, 2146435072; + and.pred %p187, %p155, %p12; + selp.b32 %r88, %r57, %r56, %p187; + { + .reg .b32 %temp; + mov.b64 {%temp, %r89}, %fd643; + } + abs.f64 %fd662, %fd643; + { // callseq 37, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd662; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd66, [retval0+0]; + } // callseq 37 + setp.lt.s32 %p188, %r89, 0; + and.pred %p14, %p188, %p156; + selp.b32 %r308, %r305, %r304, %p152; + selp.b32 %r90, 1072693248, %r308, %p185; + selp.b32 %r309, %r89, 0, %p156; + or.b32 %r310, %r309, 2146435072; + selp.b32 %r91, %r310, %r309, %p157; + setp.gtu.f64 %p190, %fd662, 0d7FF0000000000000; + abs.f64 %fd663, %fd49; + { // callseq 38, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd663; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd67, [retval0+0]; + } // callseq 38 + { + .reg .b32 %temp; + mov.b64 {%temp, %r93}, %fd49; + } + setp.lt.s32 %p191, %r93, 0; + and.pred %p15, %p191, %p144; + setp.gt.f64 %p192, %fd662, 0d3FF0000000000000; + selp.b32 %r312, 2146435072, 0, %p192; + xor.b32 %r313, %r312, 2146435072; + selp.b32 %r314, %r313, %r312, %p157; + selp.b32 %r94, 1072693248, %r314, %p193; + selp.b32 %r315, %r93, 0, %p144; + or.b32 %r316, %r315, 2146435072; + selp.b32 %r95, %r316, %r315, %p146; + { + .reg .b32 %temp; + mov.b64 {%temp, %r317}, %fd50; + } + and.b32 %r96, %r317, 2146435072; + setp.ne.s32 %p194, %r96, 2146435072; + and.pred %p195, %p163, %p14; + selp.b32 %r97, %r283, %r282, %p195; + setp.gtu.f64 %p196, %fd663, 0d7FF0000000000000; + { // callseq 39, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd663; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd68, [retval0+0]; + } // callseq 39 + and.pred %p16, %p191, %p149; + setp.gt.f64 %p197, %fd663, 0d3FF0000000000000; + selp.b32 %r318, 2146435072, 0, %p197; + xor.b32 %r319, %r318, 2146435072; + selp.b32 %r320, %r319, %r318, %p146; + setp.eq.f32 %p198, %f3054, 0fBF800000; + selp.b32 %r98, 1072693248, %r320, %p198; + { + .reg .b32 %temp; + mov.b64 {%temp, %r321}, %fd51; + } + and.b32 %r99, %r321, 2146435072; + setp.ne.s32 %p199, %r99, 2146435072; + and.pred %p200, %p155, %p15; + selp.b32 %r100, %r57, %r56, %p200; + selp.b32 %r322, %r319, %r318, %p152; + selp.b32 %r101, 1072693248, %r322, %p198; + or.pred %p17, %p147, %p148; + or.pred %p18, %p153, %p148; + or.pred %p19, %p158, %p148; + or.pred %p20, %p161, %p165; + or.pred %p21, %p168, %p165; + or.pred %p22, %p169, %p165; + or.pred %p23, %p172, %p173; + or.pred %p24, %p177, %p178; + or.pred %p25, %p182, %p183; + or.pred %p26, %p186, %p183; + or.pred %p27, %p189, %p190; + or.pred %p28, %p194, %p196; + or.pred %p29, %p199, %p196; + shr.s32 %r323, %r47, 31; + and.b32 %r102, %r323, 2146435072; + mov.u32 %r1371, %r266; + +$L__BB2_56: + not.pred %p201, %p4; + mov.f64 %fd1081, %fd54; + @%p201 bra $L__BB2_58; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r324}, %fd54; + } + xor.b32 %r325, %r324, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r326, %temp}, %fd54; + } + mov.b64 %fd1081, {%r326, %r325}; + +$L__BB2_58: + setp.eq.f32 %p202, %f66, 0f00000000; + @%p202 bra $L__BB2_62; + bra.uni $L__BB2_59; + +$L__BB2_62: + mov.u32 %r327, 0; + selp.b32 %r329, %r49, 0, %p144; + or.b32 %r330, %r329, 2146435072; + selp.b32 %r331, %r330, %r329, %p146; + mov.b64 %fd1081, {%r327, %r331}; + bra.uni $L__BB2_63; + +$L__BB2_59: + setp.gt.s32 %p203, %r49, -1; + @%p203 bra $L__BB2_63; + + cvt.rzi.f64.f64 %fd665, %fd644; + setp.eq.f64 %p204, %fd665, 0d4000000000000000; + @%p204 bra $L__BB2_63; + + mov.f64 %fd1081, 0dFFF8000000000000; + +$L__BB2_63: + cvt.f64.f32 %fd1056, %f66; + add.f64 %fd1055, %fd1054, 0d4000000000000000; + selp.f64 %fd1082, %fd1081, %fd1055, %p147; + @%p17 bra $L__BB2_68; + + setp.eq.s32 %p208, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r332, %temp}, %fd644; + } + setp.eq.s32 %p209, %r332, 0; + and.pred %p210, %p208, %p209; + @%p210 bra $L__BB2_67; + bra.uni $L__BB2_65; + +$L__BB2_67: + mov.u32 %r339, 0; + mov.b64 %fd1082, {%r339, %r54}; + bra.uni $L__BB2_68; + +$L__BB2_65: + and.b32 %r333, %r49, 2147483647; + setp.ne.s32 %p211, %r333, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r334, %temp}, %fd41; + } + setp.ne.s32 %p212, %r334, 0; + or.pred %p213, %p211, %p212; + mov.f64 %fd1082, %fd1081; + @%p213 bra $L__BB2_68; + + and.pred %p215, %p155, %p4; + selp.b32 %r337, %r57, %r56, %p215; + mov.u32 %r338, 0; + mov.b64 %fd1082, {%r338, %r337}; + +$L__BB2_68: + not.pred %p216, %p5; + mov.f64 %fd1084, %fd55; + @%p216 bra $L__BB2_70; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r340}, %fd55; + } + xor.b32 %r341, %r340, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r342, %temp}, %fd55; + } + mov.b64 %fd1084, {%r342, %r341}; + +$L__BB2_70: + @%p202 bra $L__BB2_74; + bra.uni $L__BB2_71; + +$L__BB2_74: + mov.u32 %r343, 0; + selp.b32 %r345, %r49, 0, %p149; + or.b32 %r346, %r345, 2146435072; + selp.b32 %r347, %r346, %r345, %p152; + mov.b64 %fd1084, {%r343, %r347}; + bra.uni $L__BB2_75; + +$L__BB2_71: + setp.gt.s32 %p218, %r49, -1; + @%p218 bra $L__BB2_75; + + cvt.rzi.f64.f64 %fd670, %fd647; + setp.eq.f64 %p219, %fd670, 0d4008000000000000; + @%p219 bra $L__BB2_75; + + mov.f64 %fd1084, 0dFFF8000000000000; + +$L__BB2_75: + cvt.f64.f32 %fd1058, %f66; + add.f64 %fd1057, %fd1054, 0d4008000000000000; + selp.f64 %fd1085, %fd1084, %fd1057, %p153; + @%p18 bra $L__BB2_80; + + setp.eq.s32 %p223, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r348, %temp}, %fd647; + } + setp.eq.s32 %p224, %r348, 0; + and.pred %p225, %p223, %p224; + @%p225 bra $L__BB2_79; + bra.uni $L__BB2_77; + +$L__BB2_79: + mov.u32 %r355, 0; + mov.b64 %fd1085, {%r355, %r59}; + bra.uni $L__BB2_80; + +$L__BB2_77: + and.b32 %r349, %r49, 2147483647; + setp.ne.s32 %p226, %r349, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r350, %temp}, %fd41; + } + setp.ne.s32 %p227, %r350, 0; + or.pred %p228, %p226, %p227; + mov.f64 %fd1085, %fd1084; + @%p228 bra $L__BB2_80; + + setp.ne.s32 %p229, %r58, 1071644672; + and.pred %p230, %p229, %p5; + selp.b32 %r353, %r63, %r62, %p230; + mov.u32 %r354, 0; + mov.b64 %fd1085, {%r354, %r353}; + +$L__BB2_80: + setp.eq.f32 %p231, %f66, 0f3F800000; + selp.f64 %fd674, 0d3FF0000000000000, %fd1085, %p231; + cvt.f64.f32 %fd675, %f536; + add.f64 %fd676, %fd1082, 0d3FF0000000000000; + selp.f64 %fd677, 0d4000000000000000, %fd676, %p231; + fma.rn.f64 %fd85, %fd674, %fd675, %fd677; + not.pred %p232, %p6; + mov.f64 %fd1087, %fd56; + @%p232 bra $L__BB2_82; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r356}, %fd56; + } + xor.b32 %r357, %r356, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r358, %temp}, %fd56; + } + mov.b64 %fd1087, {%r358, %r357}; + +$L__BB2_82: + @%p202 bra $L__BB2_86; + bra.uni $L__BB2_83; + +$L__BB2_86: + mov.u32 %r359, 0; + mov.b64 %fd1087, {%r359, %r60}; + bra.uni $L__BB2_87; + +$L__BB2_83: + setp.gt.s32 %p234, %r49, -1; + @%p234 bra $L__BB2_87; + + cvt.rzi.f64.f64 %fd679, %fd649; + setp.eq.f64 %p235, %fd679, 0d4010000000000000; + @%p235 bra $L__BB2_87; + + mov.f64 %fd1087, 0dFFF8000000000000; + +$L__BB2_87: + cvt.f64.f32 %fd1060, %f66; + add.f64 %fd1059, %fd1054, 0d4010000000000000; + selp.f64 %fd1088, %fd1087, %fd1059, %p158; + @%p19 bra $L__BB2_92; + + setp.eq.s32 %p237, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r360, %temp}, %fd649; + } + setp.eq.s32 %p238, %r360, 0; + and.pred %p239, %p237, %p238; + @%p239 bra $L__BB2_91; + bra.uni $L__BB2_89; + +$L__BB2_91: + mov.u32 %r364, 0; + mov.b64 %fd1088, {%r364, %r66}; + bra.uni $L__BB2_92; + +$L__BB2_89: + and.b32 %r361, %r49, 2147483647; + setp.ne.s32 %p240, %r361, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r362, %temp}, %fd41; + } + setp.ne.s32 %p241, %r362, 0; + or.pred %p242, %p240, %p241; + mov.f64 %fd1088, %fd1087; + @%p242 bra $L__BB2_92; + + mov.u32 %r363, 0; + mov.b64 %fd1088, {%r363, %r68}; + +$L__BB2_92: + selp.f64 %fd683, 0d3FF0000000000000, %fd1088, %p231; + cvt.f64.f32 %fd684, %f538; + fma.rn.f64 %fd685, %fd683, %fd684, %fd85; + cvt.rn.f32.f64 %f96, %fd685; + not.pred %p244, %p7; + mov.f64 %fd1090, %fd57; + @%p244 bra $L__BB2_94; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r365}, %fd57; + } + xor.b32 %r366, %r365, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r367, %temp}, %fd57; + } + mov.b64 %fd1090, {%r367, %r366}; + +$L__BB2_94: + setp.eq.f32 %p245, %f68, 0f00000000; + @%p245 bra $L__BB2_98; + bra.uni $L__BB2_95; + +$L__BB2_98: + mov.u32 %r368, 0; + selp.b32 %r370, %r64, 0, %p144; + or.b32 %r371, %r370, 2146435072; + selp.b32 %r372, %r371, %r370, %p146; + mov.b64 %fd1090, {%r368, %r372}; + bra.uni $L__BB2_99; + +$L__BB2_95: + setp.gt.s32 %p246, %r64, -1; + @%p246 bra $L__BB2_99; + + cvt.rzi.f64.f64 %fd687, %fd644; + setp.eq.f64 %p247, %fd687, 0d4000000000000000; + @%p247 bra $L__BB2_99; + + mov.f64 %fd1090, 0dFFF8000000000000; + +$L__BB2_99: + cvt.f64.f32 %fd1062, %f68; + add.f64 %fd1061, %fd1053, 0d4000000000000000; + selp.f64 %fd1091, %fd1090, %fd1061, %p161; + @%p20 bra $L__BB2_104; + + setp.eq.s32 %p251, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r373, %temp}, %fd644; + } + setp.eq.s32 %p252, %r373, 0; + and.pred %p253, %p251, %p252; + @%p253 bra $L__BB2_103; + bra.uni $L__BB2_101; + +$L__BB2_103: + mov.u32 %r380, 0; + mov.b64 %fd1091, {%r380, %r69}; + bra.uni $L__BB2_104; + +$L__BB2_101: + and.b32 %r374, %r64, 2147483647; + setp.ne.s32 %p254, %r374, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r375, %temp}, %fd42; + } + setp.ne.s32 %p255, %r375, 0; + or.pred %p256, %p254, %p255; + mov.f64 %fd1091, %fd1090; + @%p256 bra $L__BB2_104; + + and.pred %p258, %p155, %p7; + selp.b32 %r378, %r57, %r56, %p258; + mov.u32 %r379, 0; + mov.b64 %fd1091, {%r379, %r378}; + +$L__BB2_104: + not.pred %p259, %p8; + mov.f64 %fd1093, %fd58; + @%p259 bra $L__BB2_106; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r381}, %fd58; + } + xor.b32 %r382, %r381, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r383, %temp}, %fd58; + } + mov.b64 %fd1093, {%r383, %r382}; + +$L__BB2_106: + @%p245 bra $L__BB2_110; + bra.uni $L__BB2_107; + +$L__BB2_110: + mov.u32 %r384, 0; + selp.b32 %r386, %r64, 0, %p149; + or.b32 %r387, %r386, 2146435072; + selp.b32 %r388, %r387, %r386, %p152; + mov.b64 %fd1093, {%r384, %r388}; + bra.uni $L__BB2_111; + +$L__BB2_107: + setp.gt.s32 %p261, %r64, -1; + @%p261 bra $L__BB2_111; + + cvt.rzi.f64.f64 %fd692, %fd647; + setp.eq.f64 %p262, %fd692, 0d4008000000000000; + @%p262 bra $L__BB2_111; + + mov.f64 %fd1093, 0dFFF8000000000000; + +$L__BB2_111: + cvt.f64.f32 %fd1064, %f68; + add.f64 %fd1063, %fd1053, 0d4008000000000000; + selp.f64 %fd1094, %fd1093, %fd1063, %p168; + @%p21 bra $L__BB2_116; + + setp.eq.s32 %p266, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r389, %temp}, %fd647; + } + setp.eq.s32 %p267, %r389, 0; + and.pred %p268, %p266, %p267; + @%p268 bra $L__BB2_115; + bra.uni $L__BB2_113; + +$L__BB2_115: + mov.u32 %r396, 0; + mov.b64 %fd1094, {%r396, %r71}; + bra.uni $L__BB2_116; + +$L__BB2_113: + and.b32 %r390, %r64, 2147483647; + setp.ne.s32 %p269, %r390, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r391, %temp}, %fd42; + } + setp.ne.s32 %p270, %r391, 0; + or.pred %p271, %p269, %p270; + mov.f64 %fd1094, %fd1093; + @%p271 bra $L__BB2_116; + + setp.ne.s32 %p272, %r58, 1071644672; + and.pred %p273, %p272, %p8; + selp.b32 %r394, %r63, %r62, %p273; + mov.u32 %r395, 0; + mov.b64 %fd1094, {%r395, %r394}; + +$L__BB2_116: + setp.eq.f32 %p274, %f68, 0f3F800000; + selp.f64 %fd696, 0d3FF0000000000000, %fd1094, %p274; + cvt.f64.f32 %fd697, %f537; + add.f64 %fd698, %fd1091, 0d3FF0000000000000; + selp.f64 %fd699, 0d4000000000000000, %fd698, %p274; + fma.rn.f64 %fd110, %fd696, %fd697, %fd699; + not.pred %p275, %p9; + mov.f64 %fd1096, %fd59; + @%p275 bra $L__BB2_118; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r397}, %fd59; + } + xor.b32 %r398, %r397, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r399, %temp}, %fd59; + } + mov.b64 %fd1096, {%r399, %r398}; + +$L__BB2_118: + @%p245 bra $L__BB2_122; + bra.uni $L__BB2_119; + +$L__BB2_122: + mov.u32 %r400, 0; + mov.b64 %fd1096, {%r400, %r72}; + bra.uni $L__BB2_123; + +$L__BB2_119: + setp.gt.s32 %p277, %r64, -1; + @%p277 bra $L__BB2_123; + + cvt.rzi.f64.f64 %fd701, %fd649; + setp.eq.f64 %p278, %fd701, 0d4010000000000000; + @%p278 bra $L__BB2_123; + + mov.f64 %fd1096, 0dFFF8000000000000; + +$L__BB2_123: + cvt.f64.f32 %fd1066, %f68; + add.f64 %fd1065, %fd1053, 0d4010000000000000; + selp.f64 %fd1097, %fd1096, %fd1065, %p169; + @%p22 bra $L__BB2_128; + + setp.eq.s32 %p280, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r401, %temp}, %fd649; + } + setp.eq.s32 %p281, %r401, 0; + and.pred %p282, %p280, %p281; + @%p282 bra $L__BB2_127; + bra.uni $L__BB2_125; + +$L__BB2_127: + mov.u32 %r405, 0; + mov.b64 %fd1097, {%r405, %r74}; + bra.uni $L__BB2_128; + +$L__BB2_125: + and.b32 %r402, %r64, 2147483647; + setp.ne.s32 %p283, %r402, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r403, %temp}, %fd42; + } + setp.ne.s32 %p284, %r403, 0; + or.pred %p285, %p283, %p284; + mov.f64 %fd1097, %fd1096; + @%p285 bra $L__BB2_128; + + mov.u32 %r404, 0; + mov.b64 %fd1097, {%r404, %r75}; + +$L__BB2_128: + selp.f64 %fd705, 0d3FF0000000000000, %fd1097, %p274; + cvt.f64.f32 %fd706, %f539; + fma.rn.f64 %fd707, %fd705, %fd706, %fd110; + cvt.rn.f32.f64 %f97, %fd707; + sqrt.rn.f32 %f98, %f96; + mul.f32 %f99, %f98, %f535; + sqrt.rn.f32 %f100, %f97; + mul.f32 %f101, %f100, %f542; + mov.f32 %f646, 0f3F000000; + div.rn.f32 %f647, %f646, %f99; + div.rn.f32 %f648, %f647, %f99; + sqrt.rn.f32 %f102, %f648; + mul.f32 %f103, %f102, %f81; + abs.f32 %f649, %f103; + setp.ltu.f32 %p287, %f649, 0f3F8060FE; + setp.ge.f32 %p288, %f649, 0f3F8060FE; + mul.f32 %f650, %f103, %f103; + selp.f32 %f651, %f649, %f650, %p288; + selp.f32 %f652, 0f3789CA3C, 0f38B1E96A, %p288; + selp.f32 %f653, 0fB9F560B9, 0fBA574D20, %p288; + fma.rn.f32 %f654, %f652, %f651, %f653; + selp.f32 %f655, 0f3BAC840B, 0f3BAAD5EA, %p288; + fma.rn.f32 %f656, %f654, %f651, %f655; + selp.f32 %f657, 0fBD0C8162, 0fBCDC1BE7, %p288; + fma.rn.f32 %f658, %f656, %f651, %f657; + selp.f32 %f659, 0f3E1CF906, 0f3DE718AF, %p288; + fma.rn.f32 %f660, %f658, %f651, %f659; + selp.f32 %f661, 0f3F6A937E, 0fBEC093AC, %p288; + fma.rn.f32 %f662, %f660, %f651, %f661; + selp.f32 %f663, 0f3F20D842, 0f3E0375D3, %p288; + fma.rn.f32 %f664, %f662, %f651, %f663; + neg.f32 %f665, %f649; + selp.f32 %f666, %f665, %f103, %p288; + fma.rn.f32 %f3228, %f664, %f666, %f666; + @%p287 bra $L__BB2_130; + + mov.f32 %f3132, 0f3F800000; + ex2.approx.ftz.f32 %f667, %f3228; + sub.f32 %f669, %f3132, %f667; + mov.b32 %r406, %f669; + mov.b32 %r407, %f103; + and.b32 %r408, %r407, -2147483648; + or.b32 %r409, %r408, %r406; + mov.b32 %f3228, %r409; + +$L__BB2_130: + mul.f32 %f107, %f102, %f82; + abs.f32 %f670, %f107; + setp.ltu.f32 %p289, %f670, 0f3F8060FE; + setp.ge.f32 %p290, %f670, 0f3F8060FE; + mul.f32 %f671, %f107, %f107; + selp.f32 %f672, %f670, %f671, %p290; + selp.f32 %f673, 0f3789CA3C, 0f38B1E96A, %p290; + selp.f32 %f674, 0fB9F560B9, 0fBA574D20, %p290; + fma.rn.f32 %f675, %f673, %f672, %f674; + selp.f32 %f676, 0f3BAC840B, 0f3BAAD5EA, %p290; + fma.rn.f32 %f677, %f675, %f672, %f676; + selp.f32 %f678, 0fBD0C8162, 0fBCDC1BE7, %p290; + fma.rn.f32 %f679, %f677, %f672, %f678; + selp.f32 %f680, 0f3E1CF906, 0f3DE718AF, %p290; + fma.rn.f32 %f681, %f679, %f672, %f680; + selp.f32 %f682, 0f3F6A937E, 0fBEC093AC, %p290; + fma.rn.f32 %f683, %f681, %f672, %f682; + selp.f32 %f684, 0f3F20D842, 0f3E0375D3, %p290; + fma.rn.f32 %f685, %f683, %f672, %f684; + neg.f32 %f686, %f670; + selp.f32 %f687, %f686, %f107, %p290; + fma.rn.f32 %f3229, %f685, %f687, %f687; + @%p289 bra $L__BB2_132; + + mov.f32 %f3131, 0f3F800000; + ex2.approx.ftz.f32 %f688, %f3229; + sub.f32 %f690, %f3131, %f688; + mov.b32 %r410, %f690; + mov.b32 %r411, %f107; + and.b32 %r412, %r411, -2147483648; + or.b32 %r413, %r412, %r410; + mov.b32 %f3229, %r413; + +$L__BB2_132: + mov.f32 %f3055, 0f3F000000; + sub.f32 %f691, %f3228, %f3229; + mul.f32 %f111, %f691, 0f3F000000; + div.rn.f32 %f693, %f3055, %f101; + div.rn.f32 %f694, %f693, %f101; + cvt.rn.f32.s32 %f112, %r1371; + sub.f32 %f113, %f112, %f3277; + add.f32 %f114, %f113, 0f3F000000; + sqrt.rn.f32 %f115, %f694; + mul.f32 %f116, %f115, %f114; + abs.f32 %f695, %f116; + setp.ltu.f32 %p291, %f695, 0f3F8060FE; + setp.ge.f32 %p292, %f695, 0f3F8060FE; + mul.f32 %f696, %f116, %f116; + selp.f32 %f697, %f695, %f696, %p292; + selp.f32 %f698, 0f3789CA3C, 0f38B1E96A, %p292; + selp.f32 %f699, 0fB9F560B9, 0fBA574D20, %p292; + fma.rn.f32 %f700, %f698, %f697, %f699; + selp.f32 %f701, 0f3BAC840B, 0f3BAAD5EA, %p292; + fma.rn.f32 %f702, %f700, %f697, %f701; + selp.f32 %f703, 0fBD0C8162, 0fBCDC1BE7, %p292; + fma.rn.f32 %f704, %f702, %f697, %f703; + selp.f32 %f705, 0f3E1CF906, 0f3DE718AF, %p292; + fma.rn.f32 %f706, %f704, %f697, %f705; + selp.f32 %f707, 0f3F6A937E, 0fBEC093AC, %p292; + fma.rn.f32 %f708, %f706, %f697, %f707; + selp.f32 %f709, 0f3F20D842, 0f3E0375D3, %p292; + fma.rn.f32 %f710, %f708, %f697, %f709; + neg.f32 %f711, %f695; + selp.f32 %f712, %f711, %f116, %p292; + fma.rn.f32 %f3230, %f710, %f712, %f712; + @%p291 bra $L__BB2_134; + + mov.f32 %f3130, 0f3F800000; + ex2.approx.ftz.f32 %f713, %f3230; + sub.f32 %f715, %f3130, %f713; + mov.b32 %r414, %f715; + mov.b32 %r415, %f116; + and.b32 %r416, %r415, -2147483648; + or.b32 %r417, %r416, %r414; + mov.b32 %f3230, %r417; + +$L__BB2_134: + cvt.rn.f32.s32 %f3057, %r1371; + sub.f32 %f3056, %f3057, %f3277; + add.f32 %f120, %f3056, 0fBF000000; + mul.f32 %f121, %f115, %f120; + abs.f32 %f716, %f121; + setp.ltu.f32 %p293, %f716, 0f3F8060FE; + setp.ge.f32 %p294, %f716, 0f3F8060FE; + mul.f32 %f717, %f121, %f121; + selp.f32 %f718, %f716, %f717, %p294; + selp.f32 %f719, 0f3789CA3C, 0f38B1E96A, %p294; + selp.f32 %f720, 0fB9F560B9, 0fBA574D20, %p294; + fma.rn.f32 %f721, %f719, %f718, %f720; + selp.f32 %f722, 0f3BAC840B, 0f3BAAD5EA, %p294; + fma.rn.f32 %f723, %f721, %f718, %f722; + selp.f32 %f724, 0fBD0C8162, 0fBCDC1BE7, %p294; + fma.rn.f32 %f725, %f723, %f718, %f724; + selp.f32 %f726, 0f3E1CF906, 0f3DE718AF, %p294; + fma.rn.f32 %f727, %f725, %f718, %f726; + selp.f32 %f728, 0f3F6A937E, 0fBEC093AC, %p294; + fma.rn.f32 %f729, %f727, %f718, %f728; + selp.f32 %f730, 0f3F20D842, 0f3E0375D3, %p294; + fma.rn.f32 %f731, %f729, %f718, %f730; + neg.f32 %f732, %f716; + selp.f32 %f733, %f732, %f121, %p294; + fma.rn.f32 %f3231, %f731, %f733, %f733; + @%p293 bra $L__BB2_136; + + mov.f32 %f3129, 0f3F800000; + ex2.approx.ftz.f32 %f734, %f3231; + sub.f32 %f736, %f3129, %f734; + mov.b32 %r418, %f736; + mov.b32 %r419, %f121; + and.b32 %r420, %r419, -2147483648; + or.b32 %r421, %r420, %r418; + mov.b32 %f3231, %r421; + +$L__BB2_136: + cvt.rn.f32.s32 %f3060, %r1370; + add.f32 %f3059, %f3060, 0f3F000000; + sub.f32 %f3058, %f3059, %f3278; + sub.f32 %f738, %f3230, %f3231; + mul.f32 %f125, %f738, 0f3F000000; + div.rn.f32 %f126, %f3058, %f99; + abs.f32 %f127, %f126; + setp.lt.f32 %p295, %f127, 0f00800000; + mul.f32 %f739, %f127, 0f4B800000; + selp.f32 %f740, %f739, %f127, %p295; + selp.f32 %f741, 0fC3170000, 0fC2FE0000, %p295; + mov.b32 %r422, %f740; + and.b32 %r423, %r422, 8388607; + or.b32 %r424, %r423, 1065353216; + mov.b32 %f742, %r424; + shr.u32 %r425, %r422, 23; + cvt.rn.f32.u32 %f743, %r425; + add.f32 %f744, %f741, %f743; + setp.gt.f32 %p296, %f742, 0f3FB504F3; + mul.f32 %f745, %f742, 0f3F000000; + add.f32 %f746, %f744, 0f3F800000; + selp.f32 %f747, %f746, %f744, %p296; + selp.f32 %f748, %f745, %f742, %p296; + add.f32 %f749, %f748, 0fBF800000; + add.f32 %f750, %f748, 0f3F800000; + rcp.approx.ftz.f32 %f751, %f750; + add.f32 %f752, %f749, %f749; + mul.f32 %f754, %f752, %f751; + mul.f32 %f755, %f754, %f754; + mov.f32 %f756, 0f3C4CAF63; + mov.f32 %f757, 0f3B18F0FE; + fma.rn.f32 %f758, %f757, %f755, %f756; + mov.f32 %f759, 0f3DAAAABD; + fma.rn.f32 %f760, %f758, %f755, %f759; + mul.rn.f32 %f761, %f760, %f755; + mul.rn.f32 %f762, %f761, %f754; + sub.f32 %f763, %f749, %f754; + add.f32 %f764, %f763, %f763; + neg.f32 %f765, %f754; + fma.rn.f32 %f766, %f765, %f749, %f764; + mul.rn.f32 %f767, %f751, %f766; + add.f32 %f768, %f762, %f754; + sub.f32 %f769, %f754, %f768; + add.f32 %f770, %f762, %f769; + add.f32 %f771, %f767, %f770; + add.f32 %f772, %f768, %f771; + sub.f32 %f773, %f768, %f772; + add.f32 %f774, %f771, %f773; + mov.f32 %f775, 0f3F317200; + mul.rn.f32 %f776, %f747, %f775; + mov.f32 %f777, 0f35BFBE8E; + mul.rn.f32 %f778, %f747, %f777; + add.f32 %f779, %f776, %f772; + sub.f32 %f780, %f776, %f779; + add.f32 %f781, %f772, %f780; + add.f32 %f782, %f774, %f781; + add.f32 %f783, %f778, %f782; + add.f32 %f784, %f779, %f783; + sub.f32 %f785, %f779, %f784; + add.f32 %f786, %f783, %f785; + mul.rn.f32 %f787, %f600, %f784; neg.f32 %f788, %f787; - mul.f32 %f789, %f787, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f790, %f789; - mov.f32 %f791, 0fBF317200; - fma.rn.f32 %f792, %f790, %f791, %f788; - mov.f32 %f793, 0fB5BFBE8E; - fma.rn.f32 %f794, %f790, %f793, %f792; - mul.f32 %f795, %f794, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f796, %f795; - add.f32 %f797, %f790, 0f00000000; - ex2.approx.f32 %f798, %f797; - mul.f32 %f799, %f796, %f798; - setp.gt.f32 %p43, %f787, 0f42D20000; - selp.f32 %f800, 0f00000000, %f799, %p43; - setp.lt.f32 %p44, %f787, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f800, %p44; - mov.f32 %f3276, %f3270; - mov.f32 %f3277, %f3271; - -BB2_32: - sub.s32 %r104, %r312, %r317; - cvt.rn.f32.s32 %f801, %r104; - mul.f32 %f802, %f801, %f801; - mul.f32 %f803, %f40, %f802; - neg.f32 %f804, %f803; - mul.f32 %f805, %f803, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f806, %f805; - fma.rn.f32 %f808, %f806, %f791, %f804; - fma.rn.f32 %f810, %f806, %f793, %f808; - mul.f32 %f811, %f810, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f812, %f811; - add.f32 %f813, %f806, 0f00000000; - ex2.approx.f32 %f814, %f813; - mul.f32 %f815, %f812, %f814; - setp.gt.f32 %p45, %f803, 0f42D20000; - selp.f32 %f816, 0f00000000, %f815, %p45; - setp.lt.f32 %p46, %f803, 0fC2D20000; - selp.f32 %f817, 0f7F800000, %f816, %p46; - mul.f32 %f818, %f75, %f817; - add.s32 %r105, %r317, %r20; - add.s32 %r106, %r105, %r4; - mul.wide.s32 %rd68, %r106, 4; - add.s64 %rd69, %rd1, %rd68; - ld.global.f32 %f819, [%rd69]; - fma.rn.f32 %f820, %f819, %f818, %f3277; - add.f32 %f821, %f3276, %f818; - add.s32 %r107, %r317, 1; - sub.s32 %r108, %r312, %r107; - cvt.rn.f32.s32 %f822, %r108; - mul.f32 %f823, %f822, %f822; - mul.f32 %f824, %f40, %f823; - neg.f32 %f825, %f824; - mul.f32 %f826, %f824, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f827, %f826; - fma.rn.f32 %f828, %f827, %f791, %f825; - fma.rn.f32 %f829, %f827, %f793, %f828; - mul.f32 %f830, %f829, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f831, %f830; - add.f32 %f832, %f827, 0f00000000; - ex2.approx.f32 %f833, %f832; - mul.f32 %f834, %f831, %f833; - setp.gt.f32 %p47, %f824, 0f42D20000; - selp.f32 %f835, 0f00000000, %f834, %p47; - setp.lt.f32 %p48, %f824, 0fC2D20000; - selp.f32 %f836, 0f7F800000, %f835, %p48; - mul.f32 %f837, %f75, %f836; - ld.global.f32 %f838, [%rd69+4]; - fma.rn.f32 %f839, %f838, %f837, %f820; - add.f32 %f840, %f821, %f837; - add.s32 %r109, %r317, 2; - sub.s32 %r110, %r312, %r109; - cvt.rn.f32.s32 %f841, %r110; - mul.f32 %f842, %f841, %f841; - mul.f32 %f843, %f40, %f842; - neg.f32 %f844, %f843; - mul.f32 %f845, %f843, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f846, %f845; - fma.rn.f32 %f847, %f846, %f791, %f844; - fma.rn.f32 %f848, %f846, %f793, %f847; - mul.f32 %f849, %f848, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f850, %f849; - add.f32 %f851, %f846, 0f00000000; - ex2.approx.f32 %f852, %f851; - mul.f32 %f853, %f850, %f852; - setp.gt.f32 %p49, %f843, 0f42D20000; - selp.f32 %f854, 0f00000000, %f853, %p49; - setp.lt.f32 %p50, %f843, 0fC2D20000; - selp.f32 %f855, 0f7F800000, %f854, %p50; - mul.f32 %f856, %f75, %f855; - ld.global.f32 %f857, [%rd69+8]; - fma.rn.f32 %f858, %f857, %f856, %f839; - add.f32 %f859, %f840, %f856; - add.s32 %r111, %r317, 3; - sub.s32 %r112, %r312, %r111; - cvt.rn.f32.s32 %f860, %r112; - mul.f32 %f861, %f860, %f860; - mul.f32 %f862, %f40, %f861; - neg.f32 %f863, %f862; - mul.f32 %f864, %f862, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f865, %f864; - fma.rn.f32 %f866, %f865, %f791, %f863; - fma.rn.f32 %f867, %f865, %f793, %f866; - mul.f32 %f868, %f867, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f869, %f868; - add.f32 %f870, %f865, 0f00000000; - ex2.approx.f32 %f871, %f870; - mul.f32 %f872, %f869, %f871; - setp.gt.f32 %p51, %f862, 0f42D20000; - selp.f32 %f873, 0f00000000, %f872, %p51; - setp.lt.f32 %p52, %f862, 0fC2D20000; - selp.f32 %f874, 0f7F800000, %f873, %p52; - mul.f32 %f875, %f75, %f874; - ld.global.f32 %f876, [%rd69+12]; - fma.rn.f32 %f3277, %f876, %f875, %f858; - add.f32 %f3276, %f859, %f875; - add.s32 %r317, %r317, 4; - setp.lt.s32 %p53, %r317, %r63; - @%p53 bra BB2_32; - -BB2_33: - add.s32 %r313, %r313, 1; - setp.lt.s32 %p54, %r313, %r63; - @%p54 bra BB2_21; - - div.rn.f32 %f877, %f3277, %f3276; - max.f32 %f3259, %f3259, %f877; - min.f32 %f3258, %f3258, %f877; - add.s32 %r312, %r312, 1; - setp.lt.s32 %p55, %r312, %r63; - @%p55 bra BB2_20; - - add.s32 %r311, %r311, 1; - setp.lt.s32 %p56, %r311, %r63; - @%p56 bra BB2_19; - -BB2_36: - sub.f32 %f879, %f3259, %f3258; - add.f32 %f880, %f879, %f879; - mul.f32 %f881, %f880, 0f40490FD8; - mul.f32 %f882, %f881, %f643; - mul.f32 %f883, %f882, %f650; - mul.f32 %f884, %f883, 0f3FB504F3; - max.f32 %f3347, %f3345, %f884; - setp.lt.s32 %p57, %r64, 1; - @%p57 bra BB2_128; - - mul.f32 %f87, %f643, 0f3F000000; - mul.f32 %f88, %f650, 0f3F000000; - mul.f32 %f89, %f644, 0f40400000; - mul.f32 %f90, %f649, %f649; - mul.f32 %f91, %f90, %f649; - mul.f32 %f92, %f646, 0f40800000; - mul.f32 %f93, %f90, %f90; - mul.f32 %f94, %f645, 0f40400000; - mul.f32 %f95, %f647, 0f40800000; - mul.f32 %f96, %f643, 0fBE800000; - mul.f32 %f97, %f650, 0fBE800000; - mov.f32 %f886, 0f40000000; - div.rn.f32 %f98, %f886, %f90; - mul.f32 %f99, %f644, 0f40C00000; - mul.f32 %f100, %f646, 0f41400000; - mul.f32 %f101, %f645, 0f40C00000; - mul.f32 %f102, %f647, 0f41400000; - mov.u32 %r318, 0; - mov.f32 %f885, 0f00000000; - mov.f32 %f3345, %f885; - -BB2_38: - mov.f32 %f3295, %f885; - mov.f32 %f3296, %f885; - mov.f32 %f3297, %f885; - mov.f32 %f3298, %f885; - mov.f32 %f3299, %f885; - mov.f32 %f3300, %f885; - mov.f32 %f3301, %f885; - mov.f32 %f3302, %f885; - mov.f32 %f3303, %f885; - mov.f32 %f3304, %f885; - @%p19 bra BB2_127; - - sub.f32 %f907, %f3345, %f648; - div.rn.f32 %f908, %f907, %f649; - mul.f32 %f909, %f908, %f908; - mul.f32 %f910, %f908, %f909; - add.f32 %f911, %f909, 0f3F800000; - fma.rn.f32 %f912, %f910, %f644, %f911; - mul.f32 %f913, %f909, %f909; - fma.rn.f32 %f108, %f913, %f646, %f912; - add.f32 %f914, %f3345, %f648; - div.rn.f32 %f915, %f914, %f649; - mul.f32 %f916, %f915, %f915; - mul.f32 %f917, %f915, %f916; - add.f32 %f918, %f916, 0f3F800000; - fma.rn.f32 %f919, %f917, %f645, %f918; - mul.f32 %f920, %f916, %f916; - fma.rn.f32 %f109, %f920, %f647, %f919; - neg.f32 %f921, %f3347; - div.rn.f32 %f110, %f921, 0f40206C98; - div.rn.f32 %f111, %f3347, 0f40206C98; - add.f32 %f922, %f907, %f907; - div.rn.f32 %f923, %f922, %f90; - mul.f32 %f924, %f907, %f907; - mul.f32 %f925, %f89, %f924; - div.rn.f32 %f926, %f925, %f91; - add.f32 %f927, %f923, %f926; - mul.f32 %f928, %f907, %f924; - mul.f32 %f929, %f92, %f928; - div.rn.f32 %f930, %f929, %f93; - add.f32 %f112, %f927, %f930; - add.f32 %f931, %f914, %f914; - div.rn.f32 %f932, %f931, %f90; - mul.f32 %f933, %f914, %f914; - mul.f32 %f934, %f94, %f933; - div.rn.f32 %f935, %f934, %f91; - add.f32 %f936, %f932, %f935; - mul.f32 %f937, %f914, %f933; - mul.f32 %f938, %f95, %f937; - div.rn.f32 %f939, %f938, %f93; - add.f32 %f113, %f936, %f939; - mul.f32 %f940, %f99, %f907; - div.rn.f32 %f941, %f940, %f91; - add.f32 %f942, %f98, %f941; - mul.f32 %f943, %f100, %f924; - div.rn.f32 %f944, %f943, %f93; - add.f32 %f118, %f942, %f944; - mul.f32 %f945, %f101, %f914; - div.rn.f32 %f946, %f945, %f91; - add.f32 %f947, %f98, %f946; - mul.f32 %f948, %f102, %f933; - div.rn.f32 %f949, %f948, %f93; - add.f32 %f119, %f947, %f949; - mov.u32 %r319, 0; - mov.f32 %f3295, 0f00000000; - mov.f32 %f3296, %f3295; - mov.f32 %f3297, %f3295; - mov.f32 %f3298, %f3295; - mov.f32 %f3299, %f3295; - mov.f32 %f3300, %f3295; - mov.f32 %f3301, %f3295; - mov.f32 %f3302, %f3295; - mov.f32 %f3303, %f3295; - mov.f32 %f3304, %f3295; - -BB2_40: - mov.u32 %r320, 0; - add.f32 %f3175, %f109, %f109; - add.f32 %f3174, %f108, %f108; - sqrt.rn.f32 %f950, %f108; - mul.f32 %f951, %f950, %f643; - sqrt.rn.f32 %f952, %f109; - mul.f32 %f132, %f952, %f650; - div.rn.f32 %f954, %f684, %f951; - div.rn.f32 %f955, %f954, %f951; - cvt.rn.f32.s32 %f956, %r319; - sub.f32 %f133, %f956, %f3349; - add.f32 %f134, %f133, 0f3F800000; - sqrt.rn.f32 %f957, %f955; - mul.f32 %f135, %f134, %f957; - abs.f32 %f136, %f135; - mul.f32 %f137, %f135, %f135; - mul.f32 %f138, %f133, %f957; - abs.f32 %f139, %f138; - div.rn.f32 %f958, %f684, %f132; - div.rn.f32 %f959, %f958, %f132; - sqrt.rn.f32 %f141, %f959; - add.f32 %f960, %f956, 0f3F800000; - sub.f32 %f142, %f960, %f3349; - div.rn.f32 %f143, %f142, %f951; - mov.f32 %f961, 0f3F800000; - cvt.rzi.f32.f32 %f962, %f961; - add.f32 %f963, %f962, %f962; - sub.f32 %f965, %f886, %f963; - abs.f32 %f144, %f965; - setp.eq.f32 %p59, %f144, 0f3F800000; - abs.f32 %f145, %f143; - setp.lt.f32 %p60, %f145, 0f00800000; - mul.f32 %f966, %f145, 0f4B800000; - selp.f32 %f967, 0fC3170000, 0fC2FE0000, %p60; - selp.f32 %f968, %f966, %f145, %p60; - mov.b32 %r116, %f968; - and.b32 %r117, %r116, 8388607; - or.b32 %r118, %r117, 1065353216; - mov.b32 %f969, %r118; - shr.u32 %r119, %r116, 23; - cvt.rn.f32.u32 %f970, %r119; - add.f32 %f971, %f967, %f970; - setp.gt.f32 %p61, %f969, 0f3FB504F3; - mul.f32 %f972, %f969, 0f3F000000; - add.f32 %f973, %f971, 0f3F800000; - selp.f32 %f974, %f972, %f969, %p61; - selp.f32 %f975, %f973, %f971, %p61; - add.f32 %f146, %f974, 0fBF800000; - add.f32 %f147, %f974, 0f3F800000; - add.f32 %f148, %f146, %f146; - mov.f32 %f976, 0f3F317200; - mul.rn.f32 %f149, %f975, %f976; - mov.f32 %f977, 0f35BFBE8E; - mul.rn.f32 %f150, %f975, %f977; - setp.lt.f32 %p62, %f143, 0f00000000; - and.pred %p3, %p62, %p59; - add.f32 %f978, %f143, %f143; - selp.f32 %f151, %f978, 0f00000000, %p59; - div.rn.f32 %f152, %f133, %f951; - abs.f32 %f153, %f152; - setp.lt.f32 %p63, %f153, 0f00800000; - mul.f32 %f980, %f153, 0f4B800000; - selp.f32 %f981, 0fC3170000, 0fC2FE0000, %p63; - selp.f32 %f982, %f980, %f153, %p63; - mov.b32 %r120, %f982; - and.b32 %r121, %r120, 8388607; - or.b32 %r122, %r121, 1065353216; - mov.b32 %f983, %r122; - shr.u32 %r123, %r120, 23; - cvt.rn.f32.u32 %f984, %r123; - add.f32 %f985, %f981, %f984; - setp.gt.f32 %p64, %f983, 0f3FB504F3; - mul.f32 %f986, %f983, 0f3F000000; - add.f32 %f987, %f985, 0f3F800000; - selp.f32 %f988, %f986, %f983, %p64; - selp.f32 %f989, %f987, %f985, %p64; - add.f32 %f154, %f988, 0fBF800000; - add.f32 %f155, %f988, 0f3F800000; - add.f32 %f156, %f154, %f154; - mul.rn.f32 %f157, %f989, %f976; - mul.rn.f32 %f158, %f989, %f977; - setp.lt.f32 %p65, %f152, 0f00000000; - and.pred %p4, %p65, %p59; - add.f32 %f990, %f152, %f152; - selp.f32 %f159, %f990, 0f00000000, %p59; - div.rn.f32 %f160, %f110, %f951; - div.rn.f32 %f161, %f110, %f132; - div.rn.f32 %f162, %f160, %f951; - mov.f32 %f992, 0fC0000000; - div.rn.f32 %f163, %f992, %f951; - div.rn.f32 %f164, %f161, %f132; - div.rn.f32 %f165, %f992, %f132; - div.rn.f32 %f993, %f87, %f950; - div.rn.f32 %f994, %f88, %f952; - mul.f32 %f166, %f993, %f112; - mul.f32 %f167, %f994, %f113; - mov.f32 %f995, 0f3F400000; - cvt.rzi.f32.f32 %f996, %f995; - add.f32 %f997, %f996, %f996; - mov.f32 %f998, 0f3FC00000; - sub.f32 %f999, %f998, %f997; - abs.f32 %f1000, %f999; - setp.eq.f32 %p66, %f1000, 0f3F800000; - abs.f32 %f168, %f108; - setp.lt.f32 %p67, %f168, 0f00800000; - mul.f32 %f1001, %f168, 0f4B800000; - selp.f32 %f1002, 0fC3170000, 0fC2FE0000, %p67; - selp.f32 %f1003, %f1001, %f168, %p67; - mov.b32 %r124, %f1003; - and.b32 %r125, %r124, 8388607; - or.b32 %r126, %r125, 1065353216; - mov.b32 %f1004, %r126; - shr.u32 %r127, %r124, 23; - cvt.rn.f32.u32 %f1005, %r127; - add.f32 %f1006, %f1002, %f1005; - setp.gt.f32 %p68, %f1004, 0f3FB504F3; - mul.f32 %f1007, %f1004, 0f3F000000; - add.f32 %f1008, %f1006, 0f3F800000; - selp.f32 %f1009, %f1007, %f1004, %p68; - selp.f32 %f1010, %f1008, %f1006, %p68; - add.f32 %f169, %f1009, 0fBF800000; - add.f32 %f170, %f1009, 0f3F800000; - add.f32 %f171, %f169, %f169; - mul.rn.f32 %f172, %f1010, %f976; - mul.rn.f32 %f173, %f1010, %f977; - setp.lt.f32 %p69, %f108, 0f00000000; - and.pred %p5, %p69, %p66; - selp.f32 %f174, %f3174, 0f00000000, %p66; - abs.f32 %f176, %f109; - setp.lt.f32 %p70, %f176, 0f00800000; - mul.f32 %f1011, %f176, 0f4B800000; - selp.f32 %f1012, 0fC3170000, 0fC2FE0000, %p70; - selp.f32 %f1013, %f1011, %f176, %p70; - mov.b32 %r128, %f1013; - and.b32 %r129, %r128, 8388607; - or.b32 %r130, %r129, 1065353216; - mov.b32 %f1014, %r130; - shr.u32 %r131, %r128, 23; - cvt.rn.f32.u32 %f1015, %r131; - add.f32 %f1016, %f1012, %f1015; - setp.gt.f32 %p71, %f1014, 0f3FB504F3; - mul.f32 %f1017, %f1014, 0f3F000000; - add.f32 %f1018, %f1016, 0f3F800000; - selp.f32 %f1019, %f1017, %f1014, %p71; - selp.f32 %f1020, %f1018, %f1016, %p71; - add.f32 %f177, %f1019, 0fBF800000; - add.f32 %f178, %f1019, 0f3F800000; - add.f32 %f179, %f177, %f177; - mul.rn.f32 %f180, %f1020, %f976; - mul.rn.f32 %f181, %f1020, %f977; - setp.lt.f32 %p72, %f109, 0f00000000; - and.pred %p6, %p72, %p66; - selp.f32 %f182, %f3175, 0f00000000, %p66; - mul.f32 %f184, %f993, %f118; - mul.f32 %f185, %f994, %f119; - mul.f32 %f1021, %f951, %f951; - mul.f32 %f1022, %f951, %f1021; - div.rn.f32 %f188, %f110, %f1022; - mul.f32 %f1023, %f132, %f132; - mul.f32 %f1024, %f132, %f1023; - div.rn.f32 %f189, %f110, %f1024; - mul.f32 %f1025, %f1021, %f1021; - mul.f32 %f1026, %f951, %f1025; - div.rn.f32 %f190, %f111, %f1026; - mul.f32 %f1027, %f134, %f134; - mul.f32 %f191, %f134, %f1027; - mul.f32 %f1029, %f1023, %f1023; - mul.f32 %f1030, %f132, %f1029; - div.rn.f32 %f193, %f111, %f1030; - -BB2_41: - setp.ltu.f32 %p73, %f136, 0f3F800000; - @%p73 bra BB2_43; - bra.uni BB2_42; - -BB2_43: - mov.f32 %f1049, 0f3BA0C9F8; - mov.f32 %f1050, 0fBA1268FB; - fma.rn.f32 %f1051, %f1050, %f137, %f1049; - mov.f32 %f1052, 0fBCDABFD4; - fma.rn.f32 %f1053, %f1051, %f137, %f1052; - mov.f32 %f1054, 0f3DE70331; - fma.rn.f32 %f1055, %f1053, %f137, %f1054; - mov.f32 %f1056, 0fBEC09330; - fma.rn.f32 %f1057, %f1055, %f137, %f1056; - mov.f32 %f1058, 0f3F906EBA; - fma.rn.f32 %f1059, %f1057, %f137, %f1058; - mul.f32 %f3305, %f135, %f1059; - bra.uni BB2_44; - -BB2_42: - mov.f32 %f3176, 0f3F800000; - setp.ltu.f32 %p74, %f136, 0f407AD445; - mov.f32 %f1031, 0f3A03BB71; - mov.f32 %f1032, 0fB7B730FB; - fma.rn.f32 %f1033, %f1032, %f136, %f1031; - mov.f32 %f1034, 0fBBACA3B3; - fma.rn.f32 %f1035, %f1033, %f136, %f1034; - mov.f32 %f1036, 0f3D0A7445; - fma.rn.f32 %f1037, %f1035, %f136, %f1036; - mov.f32 %f1038, 0fBE1B3B75; - fma.rn.f32 %f1039, %f1037, %f136, %f1038; - mov.f32 %f1040, 0fBF6B385A; - fma.rn.f32 %f1041, %f1039, %f136, %f1040; - mov.f32 %f1042, 0fBFD0316E; - fma.rn.f32 %f1043, %f1041, %f136, %f1042; - mov.f32 %f1044, 0fBA031CCE; - fma.rn.f32 %f1045, %f1043, %f136, %f1044; - ex2.approx.ftz.f32 %f1046, %f1045; - sub.f32 %f1048, %f3176, %f1046; - mov.b32 %r132, %f1048; - selp.b32 %r133, %r132, 1065353216, %p74; - mov.b32 %r134, %f135; - and.b32 %r135, %r134, -2147483648; - or.b32 %r136, %r133, %r135; - mov.b32 %f3305, %r136; - -BB2_44: - setp.ltu.f32 %p75, %f139, 0f3F800000; - @%p75 bra BB2_46; - bra.uni BB2_45; - -BB2_46: - cvt.rn.f32.s32 %f3214, %r319; - sub.f32 %f3213, %f3214, %f3349; - mul.f32 %f3212, %f3213, %f957; - mul.f32 %f3211, %f3212, %f3212; - mov.f32 %f1078, 0f3BA0C9F8; - mov.f32 %f1079, 0fBA1268FB; - fma.rn.f32 %f1080, %f1079, %f3211, %f1078; - mov.f32 %f1081, 0fBCDABFD4; - fma.rn.f32 %f1082, %f1080, %f3211, %f1081; - mov.f32 %f1083, 0f3DE70331; - fma.rn.f32 %f1084, %f1082, %f3211, %f1083; - mov.f32 %f1085, 0fBEC09330; - fma.rn.f32 %f1086, %f1084, %f3211, %f1085; - mov.f32 %f1087, 0f3F906EBA; - fma.rn.f32 %f1088, %f1086, %f3211, %f1087; - mul.f32 %f3306, %f3212, %f1088; - bra.uni BB2_47; - -BB2_45: - cvt.rn.f32.s32 %f3227, %r319; - sub.f32 %f3226, %f3227, %f3349; - mul.f32 %f3225, %f3226, %f957; - mov.f32 %f3177, 0f3F800000; - setp.ltu.f32 %p76, %f139, 0f407AD445; - mov.f32 %f1060, 0f3A03BB71; - mov.f32 %f1061, 0fB7B730FB; - fma.rn.f32 %f1062, %f1061, %f139, %f1060; - mov.f32 %f1063, 0fBBACA3B3; - fma.rn.f32 %f1064, %f1062, %f139, %f1063; - mov.f32 %f1065, 0f3D0A7445; - fma.rn.f32 %f1066, %f1064, %f139, %f1065; - mov.f32 %f1067, 0fBE1B3B75; - fma.rn.f32 %f1068, %f1066, %f139, %f1067; - mov.f32 %f1069, 0fBF6B385A; - fma.rn.f32 %f1070, %f1068, %f139, %f1069; - mov.f32 %f1071, 0fBFD0316E; - fma.rn.f32 %f1072, %f1070, %f139, %f1071; - mov.f32 %f1073, 0fBA031CCE; - fma.rn.f32 %f1074, %f1072, %f139, %f1073; - ex2.approx.ftz.f32 %f1075, %f1074; - sub.f32 %f1077, %f3177, %f1075; - mov.b32 %r137, %f1077; - selp.b32 %r138, %r137, 1065353216, %p76; - mov.b32 %r139, %f3225; - and.b32 %r140, %r139, -2147483648; - or.b32 %r141, %r138, %r140; - mov.b32 %f3306, %r141; - -BB2_47: - sub.f32 %f1089, %f3305, %f3306; - mul.f32 %f210, %f1089, 0f3F000000; - cvt.rn.f32.s32 %f211, %r320; - sub.f32 %f212, %f211, %f3348; - add.f32 %f213, %f212, 0f3F800000; - mul.f32 %f214, %f213, %f141; - abs.f32 %f215, %f214; - setp.ltu.f32 %p77, %f215, 0f3F800000; - @%p77 bra BB2_49; - bra.uni BB2_48; - -BB2_49: - mul.f32 %f1108, %f214, %f214; - mov.f32 %f1109, 0f3BA0C9F8; - mov.f32 %f1110, 0fBA1268FB; - fma.rn.f32 %f1111, %f1110, %f1108, %f1109; - mov.f32 %f1112, 0fBCDABFD4; - fma.rn.f32 %f1113, %f1111, %f1108, %f1112; - mov.f32 %f1114, 0f3DE70331; - fma.rn.f32 %f1115, %f1113, %f1108, %f1114; - mov.f32 %f1116, 0fBEC09330; - fma.rn.f32 %f1117, %f1115, %f1108, %f1116; - mov.f32 %f1118, 0f3F906EBA; - fma.rn.f32 %f1119, %f1117, %f1108, %f1118; - mul.f32 %f3307, %f214, %f1119; - bra.uni BB2_50; - -BB2_48: - mov.f32 %f3178, 0f3F800000; - mov.f32 %f1090, 0f3A03BB71; - mov.f32 %f1091, 0fB7B730FB; - fma.rn.f32 %f1092, %f1091, %f215, %f1090; - mov.f32 %f1093, 0fBBACA3B3; - fma.rn.f32 %f1094, %f1092, %f215, %f1093; - mov.f32 %f1095, 0f3D0A7445; - fma.rn.f32 %f1096, %f1094, %f215, %f1095; - mov.f32 %f1097, 0fBE1B3B75; - fma.rn.f32 %f1098, %f1096, %f215, %f1097; - mov.f32 %f1099, 0fBF6B385A; - fma.rn.f32 %f1100, %f1098, %f215, %f1099; - mov.f32 %f1101, 0fBFD0316E; - fma.rn.f32 %f1102, %f1100, %f215, %f1101; - mov.f32 %f1103, 0fBA031CCE; - fma.rn.f32 %f1104, %f1102, %f215, %f1103; - ex2.approx.ftz.f32 %f1105, %f1104; - sub.f32 %f1107, %f3178, %f1105; - mov.b32 %r142, %f1107; - setp.ltu.f32 %p78, %f215, 0f407AD445; - selp.b32 %r143, %r142, 1065353216, %p78; - mov.b32 %r144, %f214; - and.b32 %r145, %r144, -2147483648; - or.b32 %r146, %r143, %r145; - mov.b32 %f3307, %r146; - -BB2_50: - cvt.rn.f32.s32 %f3229, %r320; - sub.f32 %f3228, %f3229, %f3348; - mul.f32 %f219, %f3228, %f141; - abs.f32 %f220, %f219; - setp.ltu.f32 %p79, %f220, 0f3F800000; - @%p79 bra BB2_52; - bra.uni BB2_51; - -BB2_52: - mul.f32 %f1138, %f219, %f219; - mov.f32 %f1139, 0f3BA0C9F8; - mov.f32 %f1140, 0fBA1268FB; - fma.rn.f32 %f1141, %f1140, %f1138, %f1139; - mov.f32 %f1142, 0fBCDABFD4; - fma.rn.f32 %f1143, %f1141, %f1138, %f1142; - mov.f32 %f1144, 0f3DE70331; - fma.rn.f32 %f1145, %f1143, %f1138, %f1144; - mov.f32 %f1146, 0fBEC09330; - fma.rn.f32 %f1147, %f1145, %f1138, %f1146; - mov.f32 %f1148, 0f3F906EBA; - fma.rn.f32 %f1149, %f1147, %f1138, %f1148; - mul.f32 %f3308, %f219, %f1149; - bra.uni BB2_53; - -BB2_51: - mov.f32 %f3179, 0f3F800000; - mov.f32 %f1120, 0f3A03BB71; - mov.f32 %f1121, 0fB7B730FB; - fma.rn.f32 %f1122, %f1121, %f220, %f1120; - mov.f32 %f1123, 0fBBACA3B3; - fma.rn.f32 %f1124, %f1122, %f220, %f1123; - mov.f32 %f1125, 0f3D0A7445; - fma.rn.f32 %f1126, %f1124, %f220, %f1125; - mov.f32 %f1127, 0fBE1B3B75; - fma.rn.f32 %f1128, %f1126, %f220, %f1127; - mov.f32 %f1129, 0fBF6B385A; - fma.rn.f32 %f1130, %f1128, %f220, %f1129; - mov.f32 %f1131, 0fBFD0316E; - fma.rn.f32 %f1132, %f1130, %f220, %f1131; - mov.f32 %f1133, 0fBA031CCE; - fma.rn.f32 %f1134, %f1132, %f220, %f1133; - ex2.approx.ftz.f32 %f1135, %f1134; - sub.f32 %f1137, %f3179, %f1135; - mov.b32 %r147, %f1137; - setp.ltu.f32 %p80, %f220, 0f407AD445; - selp.b32 %r148, %r147, 1065353216, %p80; - mov.b32 %r149, %f219; - and.b32 %r150, %r149, -2147483648; - or.b32 %r151, %r148, %r150; - mov.b32 %f3308, %r151; - -BB2_53: - sub.f32 %f1152, %f3307, %f3308; - mul.f32 %f224, %f1152, 0f3F000000; - // inline asm - rcp.approx.ftz.f32 %f1150,%f147; - // inline asm - mul.f32 %f1153, %f1150, %f148; - mul.f32 %f1154, %f1153, %f1153; - mov.f32 %f1155, 0f3C4CAF63; - mov.f32 %f1156, 0f3B18F0FE; - fma.rn.f32 %f1157, %f1156, %f1154, %f1155; - mov.f32 %f1158, 0f3DAAAABD; - fma.rn.f32 %f1159, %f1157, %f1154, %f1158; - mul.rn.f32 %f1160, %f1159, %f1154; - mul.rn.f32 %f1161, %f1160, %f1153; - sub.f32 %f1162, %f146, %f1153; - neg.f32 %f1163, %f1153; - add.f32 %f1164, %f1162, %f1162; - fma.rn.f32 %f1165, %f1163, %f146, %f1164; - mul.rn.f32 %f1166, %f1150, %f1165; - add.f32 %f1167, %f1161, %f1153; - sub.f32 %f1168, %f1153, %f1167; - add.f32 %f1169, %f1161, %f1168; - add.f32 %f1170, %f1166, %f1169; - add.f32 %f1171, %f1167, %f1170; - sub.f32 %f1172, %f1167, %f1171; - add.f32 %f1173, %f1170, %f1172; - add.f32 %f1174, %f149, %f1171; - sub.f32 %f1175, %f149, %f1174; - add.f32 %f1176, %f1171, %f1175; - add.f32 %f1177, %f1173, %f1176; - add.f32 %f1178, %f150, %f1177; - add.f32 %f1179, %f1174, %f1178; - sub.f32 %f1180, %f1174, %f1179; + fma.rn.f32 %f789, %f600, %f784, %f788; + fma.rn.f32 %f790, %f600, %f786, %f789; + mov.f32 %f3263, 0f00000000; + fma.rn.f32 %f792, %f3263, %f784, %f790; + add.rn.f32 %f793, %f787, %f792; + neg.f32 %f794, %f793; + add.rn.f32 %f795, %f787, %f794; + add.rn.f32 %f796, %f795, %f792; + mov.b32 %r426, %f793; + setp.eq.s32 %p297, %r426, 1118925336; + add.s32 %r427, %r426, -1; + mov.b32 %f797, %r427; + add.f32 %f798, %f796, 0f37000000; + selp.f32 %f128, %f798, %f796, %p297; + selp.f32 %f799, %f797, %f793, %p297; + mov.f32 %f800, 0f3FB8AA3B; + mul.rn.f32 %f801, %f799, %f800; + cvt.rzi.f32.f32 %f802, %f801; + abs.f32 %f803, %f802; + setp.gt.f32 %p298, %f803, 0f42FC0000; + mov.b32 %r428, %f802; + and.b32 %r429, %r428, -2147483648; + or.b32 %r430, %r429, 1123811328; + mov.b32 %f804, %r430; + selp.f32 %f805, %f804, %f802, %p298; + mov.f32 %f806, 0fBF317218; + fma.rn.f32 %f807, %f805, %f806, %f799; + mov.f32 %f808, 0f3102E308; + fma.rn.f32 %f809, %f805, %f808, %f807; + mul.f32 %f810, %f809, 0f3FB8AA3B; + add.f32 %f811, %f805, 0f4B40007F; + mov.b32 %r431, %f811; + shl.b32 %r432, %r431, 23; + mov.b32 %f812, %r432; + ex2.approx.ftz.f32 %f813, %f810; + mul.f32 %f129, %f813, %f812; + setp.eq.f32 %p299, %f129, 0f7F800000; + mov.f32 %f3232, 0f7F800000; + @%p299 bra $L__BB2_138; + + fma.rn.f32 %f3232, %f129, %f128, %f129; + +$L__BB2_138: + mov.f32 %f3065, 0f3F800000; + cvt.rzi.f32.f32 %f3064, %f3065; + add.f32 %f3063, %f3064, %f3064; + sub.f32 %f3062, %f600, %f3063; + abs.f32 %f3061, %f3062; + setp.lt.f32 %p300, %f126, 0f00000000; + setp.eq.f32 %p301, %f3061, 0f3F800000; + and.pred %p30, %p300, %p301; + setp.eq.f32 %p302, %f126, 0f00000000; + @%p302 bra $L__BB2_142; + bra.uni $L__BB2_139; + +$L__BB2_142: + add.f32 %f818, %f126, %f126; + selp.f32 %f3234, %f818, 0f00000000, %p301; + bra.uni $L__BB2_143; + +$L__BB2_139: + mov.b32 %r433, %f3232; + xor.b32 %r434, %r433, -2147483648; + mov.b32 %f814, %r434; + selp.f32 %f3234, %f814, %f3232, %p30; + setp.geu.f32 %p303, %f126, 0f00000000; + @%p303 bra $L__BB2_143; + + cvt.rzi.f32.f32 %f816, %f600; + setp.eq.f32 %p304, %f816, 0f40000000; + @%p304 bra $L__BB2_143; + + mov.f32 %f3234, 0f7FFFFFFF; + +$L__BB2_143: + add.f32 %f819, %f127, 0f40000000; + mov.b32 %r435, %f819; + setp.lt.s32 %p306, %r435, 2139095040; + @%p306 bra $L__BB2_148; + + setp.gtu.f32 %p307, %f127, 0f7F800000; + @%p307 bra $L__BB2_147; + bra.uni $L__BB2_145; + +$L__BB2_147: + add.f32 %f3234, %f126, 0f40000000; + bra.uni $L__BB2_148; + +$L__BB2_145: + setp.neu.f32 %p308, %f127, 0f7F800000; + @%p308 bra $L__BB2_148; + + selp.f32 %f3234, 0fFF800000, 0f7F800000, %p30; + +$L__BB2_148: + mov.f32 %f3073, 0f3102E308; + mov.f32 %f3072, 0fBF317218; + mov.f32 %f3071, 0f35BFBE8E; + mov.f32 %f3070, 0f3F317200; + mov.f32 %f3069, 0f3DAAAABD; + mov.f32 %f3068, 0f3C4CAF63; + mov.f32 %f3067, 0f3B18F0FE; + mov.f32 %f3066, 0f3F000000; + mul.f32 %f821, %f3234, 0fBF000000; + setp.eq.f32 %p309, %f126, 0f3F800000; + selp.f32 %f822, 0fBF000000, %f821, %p309; + mov.f32 %f824, 0f3BBB989D; + fma.rn.f32 %f825, %f822, %f824, %f3066; + mov.f32 %f827, 0f437C0000; + cvt.sat.f32.f32 %f828, %f825; + mov.f32 %f829, 0f4B400001; + fma.rm.f32 %f830, %f828, %f827, %f829; + add.f32 %f831, %f830, 0fCB40007F; + neg.f32 %f832, %f831; + fma.rn.f32 %f833, %f822, %f800, %f832; + mov.f32 %f834, 0f32A57060; + fma.rn.f32 %f835, %f822, %f834, %f833; + mov.b32 %r436, %f830; + shl.b32 %r437, %r436, 23; + mov.b32 %f836, %r437; + ex2.approx.ftz.f32 %f837, %f835; + mul.f32 %f138, %f837, %f836; + div.rn.f32 %f139, %f82, %f99; + abs.f32 %f140, %f139; + setp.lt.f32 %p310, %f140, 0f00800000; + mul.f32 %f838, %f140, 0f4B800000; + selp.f32 %f839, %f838, %f140, %p310; + selp.f32 %f840, 0fC3170000, 0fC2FE0000, %p310; + mov.b32 %r438, %f839; + and.b32 %r439, %r438, 8388607; + or.b32 %r440, %r439, 1065353216; + mov.b32 %f841, %r440; + shr.u32 %r441, %r438, 23; + cvt.rn.f32.u32 %f842, %r441; + add.f32 %f843, %f840, %f842; + setp.gt.f32 %p311, %f841, 0f3FB504F3; + mul.f32 %f844, %f841, 0f3F000000; + add.f32 %f845, %f843, 0f3F800000; + selp.f32 %f846, %f845, %f843, %p311; + selp.f32 %f847, %f844, %f841, %p311; + add.f32 %f848, %f847, 0fBF800000; + add.f32 %f849, %f847, 0f3F800000; + rcp.approx.ftz.f32 %f850, %f849; + add.f32 %f851, %f848, %f848; + mul.f32 %f853, %f851, %f850; + mul.f32 %f854, %f853, %f853; + fma.rn.f32 %f857, %f3067, %f854, %f3068; + fma.rn.f32 %f859, %f857, %f854, %f3069; + mul.rn.f32 %f860, %f859, %f854; + mul.rn.f32 %f861, %f860, %f853; + sub.f32 %f862, %f848, %f853; + add.f32 %f863, %f862, %f862; + neg.f32 %f864, %f853; + fma.rn.f32 %f865, %f864, %f848, %f863; + mul.rn.f32 %f866, %f850, %f865; + add.f32 %f867, %f861, %f853; + sub.f32 %f868, %f853, %f867; + add.f32 %f869, %f861, %f868; + add.f32 %f870, %f866, %f869; + add.f32 %f871, %f867, %f870; + sub.f32 %f872, %f867, %f871; + add.f32 %f873, %f870, %f872; + mul.rn.f32 %f875, %f846, %f3070; + mul.rn.f32 %f877, %f846, %f3071; + add.f32 %f878, %f875, %f871; + sub.f32 %f879, %f875, %f878; + add.f32 %f880, %f871, %f879; + add.f32 %f881, %f873, %f880; + add.f32 %f882, %f877, %f881; + add.f32 %f883, %f878, %f882; + sub.f32 %f884, %f878, %f883; + add.f32 %f885, %f882, %f884; + mul.rn.f32 %f886, %f600, %f883; + neg.f32 %f887, %f886; + fma.rn.f32 %f888, %f600, %f883, %f887; + fma.rn.f32 %f889, %f600, %f885, %f888; + fma.rn.f32 %f891, %f3263, %f883, %f889; + add.rn.f32 %f892, %f886, %f891; + neg.f32 %f893, %f892; + add.rn.f32 %f894, %f886, %f893; + add.rn.f32 %f895, %f894, %f891; + mov.b32 %r442, %f892; + setp.eq.s32 %p312, %r442, 1118925336; + add.s32 %r443, %r442, -1; + mov.b32 %f896, %r443; + add.f32 %f897, %f895, 0f37000000; + selp.f32 %f141, %f897, %f895, %p312; + selp.f32 %f898, %f896, %f892, %p312; + mul.rn.f32 %f899, %f898, %f800; + cvt.rzi.f32.f32 %f900, %f899; + abs.f32 %f901, %f900; + setp.gt.f32 %p313, %f901, 0f42FC0000; + mov.b32 %r444, %f900; + and.b32 %r445, %r444, -2147483648; + or.b32 %r446, %r445, 1123811328; + mov.b32 %f902, %r446; + selp.f32 %f903, %f902, %f900, %p313; + fma.rn.f32 %f905, %f903, %f3072, %f898; + fma.rn.f32 %f907, %f903, %f3073, %f905; + mul.f32 %f908, %f907, 0f3FB8AA3B; + add.f32 %f909, %f903, 0f4B40007F; + mov.b32 %r447, %f909; + shl.b32 %r448, %r447, 23; + mov.b32 %f910, %r448; + ex2.approx.ftz.f32 %f911, %f908; + mul.f32 %f142, %f911, %f910; + setp.eq.f32 %p314, %f142, 0f7F800000; + mov.f32 %f3235, 0f7F800000; + @%p314 bra $L__BB2_150; + + fma.rn.f32 %f3235, %f142, %f141, %f142; + +$L__BB2_150: + setp.lt.f32 %p315, %f139, 0f00000000; + and.pred %p31, %p315, %p301; + setp.eq.f32 %p317, %f139, 0f00000000; + @%p317 bra $L__BB2_154; + bra.uni $L__BB2_151; + +$L__BB2_154: + add.f32 %f916, %f139, %f139; + selp.f32 %f3237, %f916, 0f00000000, %p301; + bra.uni $L__BB2_155; + +$L__BB2_151: + mov.b32 %r449, %f3235; + xor.b32 %r450, %r449, -2147483648; + mov.b32 %f912, %r450; + selp.f32 %f3237, %f912, %f3235, %p31; + setp.geu.f32 %p318, %f139, 0f00000000; + @%p318 bra $L__BB2_155; + + cvt.rzi.f32.f32 %f914, %f600; + setp.eq.f32 %p319, %f914, 0f40000000; + @%p319 bra $L__BB2_155; + + mov.f32 %f3237, 0f7FFFFFFF; + +$L__BB2_155: + add.f32 %f917, %f140, 0f40000000; + mov.b32 %r451, %f917; + setp.lt.s32 %p321, %r451, 2139095040; + @%p321 bra $L__BB2_160; + + setp.gtu.f32 %p322, %f140, 0f7F800000; + @%p322 bra $L__BB2_159; + bra.uni $L__BB2_157; + +$L__BB2_159: + add.f32 %f3237, %f139, 0f40000000; + bra.uni $L__BB2_160; + +$L__BB2_157: + setp.neu.f32 %p323, %f140, 0f7F800000; + @%p323 bra $L__BB2_160; + + selp.f32 %f3237, 0fFF800000, 0f7F800000, %p31; + +$L__BB2_160: + mov.f32 %f3078, 0f32A57060; + mov.f32 %f3077, 0f4B400001; + mov.f32 %f3076, 0f437C0000; + mov.f32 %f3075, 0f3BBB989D; + mov.f32 %f3074, 0f3F000000; + mul.f32 %f918, %f3237, 0fBF000000; + setp.eq.f32 %p324, %f139, 0f3F800000; + selp.f32 %f919, 0fBF000000, %f918, %p324; + fma.rn.f32 %f922, %f919, %f3075, %f3074; + cvt.sat.f32.f32 %f925, %f922; + fma.rm.f32 %f927, %f925, %f3076, %f3077; + add.f32 %f928, %f927, 0fCB40007F; + neg.f32 %f929, %f928; + fma.rn.f32 %f930, %f919, %f800, %f929; + fma.rn.f32 %f932, %f919, %f3078, %f930; + mov.b32 %r452, %f927; + shl.b32 %r453, %r452, 23; + mov.b32 %f933, %r453; + ex2.approx.ftz.f32 %f934, %f932; + mul.f32 %f151, %f934, %f933; + sub.f32 %f935, %f138, %f151; + div.rn.f32 %f152, %f69, %f99; + mul.f32 %f936, %f152, %f935; + mul.f32 %f153, %f125, %f936; + cvt.f64.f32 %fd119, %f99; + { + .reg .b32 %temp; + mov.b64 {%temp, %r104}, %fd119; + } + abs.f64 %fd120, %fd119; + { // callseq 40, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd120; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1099, [retval0+0]; + } // callseq 40 + setp.lt.s32 %p325, %r104, 0; + and.pred %p32, %p325, %p149; + not.pred %p327, %p32; + @%p327 bra $L__BB2_162; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r454}, %fd1099; + } + xor.b32 %r455, %r454, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r456, %temp}, %fd1099; + } + mov.b64 %fd1099, {%r456, %r455}; + +$L__BB2_162: + setp.eq.f32 %p328, %f99, 0f00000000; + @%p328 bra $L__BB2_166; + bra.uni $L__BB2_163; + +$L__BB2_166: + mov.u32 %r457, 0; + selp.b32 %r458, %r104, 0, %p149; + or.b32 %r459, %r458, 2146435072; + selp.b32 %r460, %r459, %r458, %p152; + mov.b64 %fd1099, {%r457, %r460}; + bra.uni $L__BB2_167; + +$L__BB2_163: + setp.gt.s32 %p329, %r104, -1; + @%p329 bra $L__BB2_167; + + cvt.rzi.f64.f64 %fd710, %fd647; + setp.eq.f64 %p330, %fd710, 0d4008000000000000; + @%p330 bra $L__BB2_167; + + mov.f64 %fd1099, 0dFFF8000000000000; + +$L__BB2_167: + add.f64 %fd126, %fd119, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r461}, %fd126; + } + and.b32 %r462, %r461, 2146435072; + setp.ne.s32 %p333, %r462, 2146435072; + mov.f64 %fd1100, %fd1099; + @%p333 bra $L__BB2_173; + + setp.gtu.f64 %p334, %fd120, 0d7FF0000000000000; + mov.f64 %fd1100, %fd126; + @%p334 bra $L__BB2_173; + + setp.eq.s32 %p335, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r463, %temp}, %fd647; + } + setp.eq.s32 %p336, %r463, 0; + and.pred %p337, %p335, %p336; + @%p337 bra $L__BB2_172; + bra.uni $L__BB2_170; + +$L__BB2_172: + mov.u32 %r468, 0; + setp.gt.f64 %p344, %fd120, 0d3FF0000000000000; + selp.b32 %r469, 2146435072, 0, %p344; + xor.b32 %r470, %r469, 2146435072; + selp.b32 %r471, %r470, %r469, %p152; + setp.eq.f32 %p345, %f99, 0fBF800000; + selp.b32 %r472, 1072693248, %r471, %p345; + mov.b64 %fd1100, {%r468, %r472}; + bra.uni $L__BB2_173; + +$L__BB2_170: + { + .reg .b32 %temp; + mov.b64 {%r464, %temp}, %fd119; + } + and.b32 %r465, %r104, 2147483647; + setp.ne.s32 %p338, %r465, 2146435072; + setp.ne.s32 %p339, %r464, 0; + or.pred %p340, %p338, %p339; + mov.f64 %fd1100, %fd1099; + @%p340 bra $L__BB2_173; + + setp.ne.s32 %p341, %r58, 1071644672; + and.pred %p342, %p341, %p32; + selp.b32 %r466, %r63, %r62, %p342; + mov.u32 %r467, 0; + mov.b64 %fd1100, {%r467, %r466}; + +$L__BB2_173: + cvt.rn.f32.s32 %f3089, %r1371; + mov.f32 %f3088, 0f3102E308; + mov.f32 %f3087, 0fBF317218; + mov.f32 %f3086, 0f35BFBE8E; + mov.f32 %f3085, 0f3F317200; + mov.f32 %f3084, 0f3DAAAABD; + mov.f32 %f3083, 0f3C4CAF63; + mov.f32 %f3082, 0f3B18F0FE; + cvt.rn.f32.s32 %f3081, %r1370; + add.f32 %f3080, %f3081, 0f3F000000; + sub.f32 %f3079, %f3080, %f3278; + setp.eq.f32 %p346, %f99, 0f3F800000; + selp.f64 %fd713, 0d3FF0000000000000, %fd1100, %p346; + cvt.f64.f32 %fd714, %f69; + div.rn.f64 %fd715, %fd714, %fd713; + mul.f32 %f938, %f82, %f151; + mul.f32 %f939, %f3079, %f138; + sub.f32 %f940, %f939, %f938; + cvt.f64.f32 %fd716, %f940; + mul.f64 %fd717, %fd715, %fd716; + cvt.f64.f32 %fd718, %f125; + mul.f64 %fd719, %fd717, %fd718; + cvt.rn.f32.f64 %f154, %fd719; + add.f32 %f941, %f3089, 0f3F000000; + sub.f32 %f155, %f941, %f3277; + div.rn.f32 %f156, %f155, %f101; + abs.f32 %f157, %f156; + setp.lt.f32 %p347, %f157, 0f00800000; + mul.f32 %f942, %f157, 0f4B800000; + selp.f32 %f943, %f942, %f157, %p347; + selp.f32 %f944, 0fC3170000, 0fC2FE0000, %p347; + mov.b32 %r473, %f943; + and.b32 %r474, %r473, 8388607; + or.b32 %r475, %r474, 1065353216; + mov.b32 %f945, %r475; + shr.u32 %r476, %r473, 23; + cvt.rn.f32.u32 %f946, %r476; + add.f32 %f947, %f944, %f946; + setp.gt.f32 %p348, %f945, 0f3FB504F3; + mul.f32 %f948, %f945, 0f3F000000; + add.f32 %f949, %f947, 0f3F800000; + selp.f32 %f950, %f949, %f947, %p348; + selp.f32 %f951, %f948, %f945, %p348; + add.f32 %f952, %f951, 0fBF800000; + add.f32 %f953, %f951, 0f3F800000; + rcp.approx.ftz.f32 %f954, %f953; + add.f32 %f955, %f952, %f952; + mul.f32 %f957, %f955, %f954; + mul.f32 %f958, %f957, %f957; + fma.rn.f32 %f961, %f3082, %f958, %f3083; + fma.rn.f32 %f963, %f961, %f958, %f3084; + mul.rn.f32 %f964, %f963, %f958; + mul.rn.f32 %f965, %f964, %f957; + sub.f32 %f966, %f952, %f957; + add.f32 %f967, %f966, %f966; + neg.f32 %f968, %f957; + fma.rn.f32 %f969, %f968, %f952, %f967; + mul.rn.f32 %f970, %f954, %f969; + add.f32 %f971, %f965, %f957; + sub.f32 %f972, %f957, %f971; + add.f32 %f973, %f965, %f972; + add.f32 %f974, %f970, %f973; + add.f32 %f975, %f971, %f974; + sub.f32 %f976, %f971, %f975; + add.f32 %f977, %f974, %f976; + mul.rn.f32 %f979, %f950, %f3085; + mul.rn.f32 %f981, %f950, %f3086; + add.f32 %f982, %f979, %f975; + sub.f32 %f983, %f979, %f982; + add.f32 %f984, %f975, %f983; + add.f32 %f985, %f977, %f984; + add.f32 %f986, %f981, %f985; + add.f32 %f987, %f982, %f986; + sub.f32 %f988, %f982, %f987; + add.f32 %f989, %f986, %f988; + mul.rn.f32 %f990, %f600, %f987; + neg.f32 %f991, %f990; + fma.rn.f32 %f992, %f600, %f987, %f991; + fma.rn.f32 %f993, %f600, %f989, %f992; + fma.rn.f32 %f995, %f3263, %f987, %f993; + add.rn.f32 %f996, %f990, %f995; + neg.f32 %f997, %f996; + add.rn.f32 %f998, %f990, %f997; + add.rn.f32 %f999, %f998, %f995; + mov.b32 %r477, %f996; + setp.eq.s32 %p349, %r477, 1118925336; + add.s32 %r478, %r477, -1; + mov.b32 %f1000, %r478; + add.f32 %f1001, %f999, 0f37000000; + selp.f32 %f158, %f1001, %f999, %p349; + selp.f32 %f1002, %f1000, %f996, %p349; + mul.rn.f32 %f1004, %f1002, %f800; + cvt.rzi.f32.f32 %f1005, %f1004; + abs.f32 %f1006, %f1005; + setp.gt.f32 %p350, %f1006, 0f42FC0000; + mov.b32 %r479, %f1005; + and.b32 %r480, %r479, -2147483648; + or.b32 %r481, %r480, 1123811328; + mov.b32 %f1007, %r481; + selp.f32 %f1008, %f1007, %f1005, %p350; + fma.rn.f32 %f1010, %f1008, %f3087, %f1002; + fma.rn.f32 %f1012, %f1008, %f3088, %f1010; + mul.f32 %f1013, %f1012, 0f3FB8AA3B; + add.f32 %f1014, %f1008, 0f4B40007F; + mov.b32 %r482, %f1014; + shl.b32 %r483, %r482, 23; + mov.b32 %f1015, %r483; + ex2.approx.ftz.f32 %f1016, %f1013; + mul.f32 %f159, %f1016, %f1015; + setp.eq.f32 %p351, %f159, 0f7F800000; + mov.f32 %f3238, 0f7F800000; + @%p351 bra $L__BB2_175; + + fma.rn.f32 %f3238, %f159, %f158, %f159; + +$L__BB2_175: + setp.lt.f32 %p352, %f156, 0f00000000; + and.pred %p33, %p352, %p301; + setp.eq.f32 %p354, %f156, 0f00000000; + @%p354 bra $L__BB2_179; + bra.uni $L__BB2_176; + +$L__BB2_179: + add.f32 %f1021, %f156, %f156; + selp.f32 %f3240, %f1021, 0f00000000, %p301; + bra.uni $L__BB2_180; + +$L__BB2_176: + mov.b32 %r484, %f3238; + xor.b32 %r485, %r484, -2147483648; + mov.b32 %f1017, %r485; + selp.f32 %f3240, %f1017, %f3238, %p33; + setp.geu.f32 %p355, %f156, 0f00000000; + @%p355 bra $L__BB2_180; + + cvt.rzi.f32.f32 %f1019, %f600; + setp.eq.f32 %p356, %f1019, 0f40000000; + @%p356 bra $L__BB2_180; + + mov.f32 %f3240, 0f7FFFFFFF; + +$L__BB2_180: + abs.f32 %f3137, %f156; + add.f32 %f1022, %f3137, 0f40000000; + mov.b32 %r486, %f1022; + setp.lt.s32 %p358, %r486, 2139095040; + @%p358 bra $L__BB2_185; + + abs.f32 %f3141, %f156; + setp.gtu.f32 %p359, %f3141, 0f7F800000; + @%p359 bra $L__BB2_184; + bra.uni $L__BB2_182; + +$L__BB2_184: + add.f32 %f3240, %f156, 0f40000000; + bra.uni $L__BB2_185; + +$L__BB2_182: + abs.f32 %f3142, %f156; + setp.neu.f32 %p360, %f3142, 0f7F800000; + @%p360 bra $L__BB2_185; + + selp.f32 %f3240, 0fFF800000, 0f7F800000, %p33; + +$L__BB2_185: + mov.f32 %f3101, 0f32A57060; + mov.f32 %f3100, 0f4B400001; + mov.f32 %f3099, 0f437C0000; + mov.f32 %f3098, 0f3BBB989D; + mov.f32 %f3097, 0f3102E308; + mov.f32 %f3096, 0fBF317218; + mov.f32 %f3095, 0f35BFBE8E; + mov.f32 %f3094, 0f3F317200; + mov.f32 %f3093, 0f3DAAAABD; + mov.f32 %f3092, 0f3C4CAF63; + mov.f32 %f3091, 0f3B18F0FE; + mov.f32 %f3090, 0f3F000000; + mul.f32 %f1024, %f3240, 0fBF000000; + setp.eq.f32 %p361, %f156, 0f3F800000; + selp.f32 %f1025, 0fBF000000, %f1024, %p361; + fma.rn.f32 %f1028, %f1025, %f3098, %f3090; + cvt.sat.f32.f32 %f1031, %f1028; + fma.rm.f32 %f1033, %f1031, %f3099, %f3100; + add.f32 %f1034, %f1033, 0fCB40007F; + neg.f32 %f1035, %f1034; + fma.rn.f32 %f1036, %f1025, %f800, %f1035; + fma.rn.f32 %f1038, %f1025, %f3101, %f1036; + mov.b32 %r487, %f1033; + shl.b32 %r488, %r487, 23; + mov.b32 %f1039, %r488; + ex2.approx.ftz.f32 %f1040, %f1038; + mul.f32 %f168, %f1040, %f1039; + div.rn.f32 %f169, %f120, %f101; + abs.f32 %f170, %f169; + setp.lt.f32 %p362, %f170, 0f00800000; + mul.f32 %f1041, %f170, 0f4B800000; + selp.f32 %f1042, %f1041, %f170, %p362; + selp.f32 %f1043, 0fC3170000, 0fC2FE0000, %p362; + mov.b32 %r489, %f1042; + and.b32 %r490, %r489, 8388607; + or.b32 %r491, %r490, 1065353216; + mov.b32 %f1044, %r491; + shr.u32 %r492, %r489, 23; + cvt.rn.f32.u32 %f1045, %r492; + add.f32 %f1046, %f1043, %f1045; + setp.gt.f32 %p363, %f1044, 0f3FB504F3; + mul.f32 %f1047, %f1044, 0f3F000000; + add.f32 %f1048, %f1046, 0f3F800000; + selp.f32 %f1049, %f1048, %f1046, %p363; + selp.f32 %f1050, %f1047, %f1044, %p363; + add.f32 %f1051, %f1050, 0fBF800000; + add.f32 %f1052, %f1050, 0f3F800000; + rcp.approx.ftz.f32 %f1053, %f1052; + add.f32 %f1054, %f1051, %f1051; + mul.f32 %f1056, %f1054, %f1053; + mul.f32 %f1057, %f1056, %f1056; + fma.rn.f32 %f1060, %f3091, %f1057, %f3092; + fma.rn.f32 %f1062, %f1060, %f1057, %f3093; + mul.rn.f32 %f1063, %f1062, %f1057; + mul.rn.f32 %f1064, %f1063, %f1056; + sub.f32 %f1065, %f1051, %f1056; + add.f32 %f1066, %f1065, %f1065; + neg.f32 %f1067, %f1056; + fma.rn.f32 %f1068, %f1067, %f1051, %f1066; + mul.rn.f32 %f1069, %f1053, %f1068; + add.f32 %f1070, %f1064, %f1056; + sub.f32 %f1071, %f1056, %f1070; + add.f32 %f1072, %f1064, %f1071; + add.f32 %f1073, %f1069, %f1072; + add.f32 %f1074, %f1070, %f1073; + sub.f32 %f1075, %f1070, %f1074; + add.f32 %f1076, %f1073, %f1075; + mul.rn.f32 %f1078, %f1049, %f3094; + mul.rn.f32 %f1080, %f1049, %f3095; + add.f32 %f1081, %f1078, %f1074; + sub.f32 %f1082, %f1078, %f1081; + add.f32 %f1083, %f1074, %f1082; + add.f32 %f1084, %f1076, %f1083; + add.f32 %f1085, %f1080, %f1084; + add.f32 %f1086, %f1081, %f1085; + sub.f32 %f1087, %f1081, %f1086; + add.f32 %f1088, %f1085, %f1087; + mul.rn.f32 %f1089, %f600, %f1086; + neg.f32 %f1090, %f1089; + fma.rn.f32 %f1091, %f600, %f1086, %f1090; + fma.rn.f32 %f1092, %f600, %f1088, %f1091; + fma.rn.f32 %f1094, %f3263, %f1086, %f1092; + add.rn.f32 %f1095, %f1089, %f1094; + neg.f32 %f1096, %f1095; + add.rn.f32 %f1097, %f1089, %f1096; + add.rn.f32 %f1098, %f1097, %f1094; + mov.b32 %r493, %f1095; + setp.eq.s32 %p364, %r493, 1118925336; + add.s32 %r494, %r493, -1; + mov.b32 %f1099, %r494; + add.f32 %f1100, %f1098, 0f37000000; + selp.f32 %f171, %f1100, %f1098, %p364; + selp.f32 %f1101, %f1099, %f1095, %p364; + mul.rn.f32 %f1102, %f1101, %f800; + cvt.rzi.f32.f32 %f1103, %f1102; + abs.f32 %f1104, %f1103; + setp.gt.f32 %p365, %f1104, 0f42FC0000; + mov.b32 %r495, %f1103; + and.b32 %r496, %r495, -2147483648; + or.b32 %r497, %r496, 1123811328; + mov.b32 %f1105, %r497; + selp.f32 %f1106, %f1105, %f1103, %p365; + fma.rn.f32 %f1108, %f1106, %f3096, %f1101; + fma.rn.f32 %f1110, %f1106, %f3097, %f1108; + mul.f32 %f1111, %f1110, 0f3FB8AA3B; + add.f32 %f1112, %f1106, 0f4B40007F; + mov.b32 %r498, %f1112; + shl.b32 %r499, %r498, 23; + mov.b32 %f1113, %r499; + ex2.approx.ftz.f32 %f1114, %f1111; + mul.f32 %f172, %f1114, %f1113; + setp.eq.f32 %p366, %f172, 0f7F800000; + mov.f32 %f3241, 0f7F800000; + @%p366 bra $L__BB2_187; + + fma.rn.f32 %f3241, %f172, %f171, %f172; + +$L__BB2_187: + setp.lt.f32 %p367, %f169, 0f00000000; + and.pred %p34, %p367, %p301; + setp.eq.f32 %p369, %f169, 0f00000000; + @%p369 bra $L__BB2_191; + bra.uni $L__BB2_188; + +$L__BB2_191: + add.f32 %f1119, %f169, %f169; + selp.f32 %f3243, %f1119, 0f00000000, %p301; + bra.uni $L__BB2_192; + +$L__BB2_188: + mov.b32 %r500, %f3241; + xor.b32 %r501, %r500, -2147483648; + mov.b32 %f1115, %r501; + selp.f32 %f3243, %f1115, %f3241, %p34; + setp.geu.f32 %p370, %f169, 0f00000000; + @%p370 bra $L__BB2_192; + + cvt.rzi.f32.f32 %f1117, %f600; + setp.eq.f32 %p371, %f1117, 0f40000000; + @%p371 bra $L__BB2_192; + + mov.f32 %f3243, 0f7FFFFFFF; + +$L__BB2_192: + abs.f32 %f3143, %f169; + add.f32 %f1120, %f3143, 0f40000000; + mov.b32 %r502, %f1120; + setp.lt.s32 %p373, %r502, 2139095040; + @%p373 bra $L__BB2_197; + + abs.f32 %f3144, %f169; + setp.gtu.f32 %p374, %f3144, 0f7F800000; + @%p374 bra $L__BB2_196; + bra.uni $L__BB2_194; + +$L__BB2_196: + add.f32 %f3243, %f169, 0f40000000; + bra.uni $L__BB2_197; + +$L__BB2_194: + abs.f32 %f3145, %f169; + setp.neu.f32 %p375, %f3145, 0f7F800000; + @%p375 bra $L__BB2_197; + + selp.f32 %f3243, 0fFF800000, 0f7F800000, %p34; + +$L__BB2_197: + mov.f32 %f3106, 0f32A57060; + mov.f32 %f3105, 0f4B400001; + mov.f32 %f3104, 0f437C0000; + mov.f32 %f3103, 0f3BBB989D; + mov.f32 %f3102, 0f3F000000; + mul.f32 %f1121, %f3243, 0fBF000000; + setp.eq.f32 %p376, %f169, 0f3F800000; + selp.f32 %f1122, 0fBF000000, %f1121, %p376; + fma.rn.f32 %f1125, %f1122, %f3103, %f3102; + cvt.sat.f32.f32 %f1128, %f1125; + fma.rm.f32 %f1130, %f1128, %f3104, %f3105; + add.f32 %f1131, %f1130, 0fCB40007F; + neg.f32 %f1132, %f1131; + fma.rn.f32 %f1133, %f1122, %f800, %f1132; + fma.rn.f32 %f1135, %f1122, %f3106, %f1133; + mov.b32 %r503, %f1130; + shl.b32 %r504, %r503, 23; + mov.b32 %f1136, %r504; + ex2.approx.ftz.f32 %f1137, %f1135; + mul.f32 %f181, %f1137, %f1136; + sub.f32 %f1138, %f168, %f181; + div.rn.f32 %f182, %f69, %f101; + mul.f32 %f1139, %f182, %f1138; + mul.f32 %f183, %f111, %f1139; + cvt.f64.f32 %fd130, %f101; + { + .reg .b32 %temp; + mov.b64 {%temp, %r105}, %fd130; + } + abs.f64 %fd131, %fd130; + { // callseq 41, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd131; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1102, [retval0+0]; + } // callseq 41 + setp.lt.s32 %p377, %r105, 0; + and.pred %p35, %p377, %p149; + not.pred %p379, %p35; + @%p379 bra $L__BB2_199; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r505}, %fd1102; + } + xor.b32 %r506, %r505, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r507, %temp}, %fd1102; + } + mov.b64 %fd1102, {%r507, %r506}; + +$L__BB2_199: + setp.eq.f32 %p380, %f101, 0f00000000; + @%p380 bra $L__BB2_203; + bra.uni $L__BB2_200; + +$L__BB2_203: + mov.u32 %r508, 0; + selp.b32 %r509, %r105, 0, %p149; + or.b32 %r510, %r509, 2146435072; + selp.b32 %r511, %r510, %r509, %p152; + mov.b64 %fd1102, {%r508, %r511}; + bra.uni $L__BB2_204; + +$L__BB2_200: + setp.gt.s32 %p381, %r105, -1; + @%p381 bra $L__BB2_204; + + cvt.rzi.f64.f64 %fd722, %fd647; + setp.eq.f64 %p382, %fd722, 0d4008000000000000; + @%p382 bra $L__BB2_204; + + mov.f64 %fd1102, 0dFFF8000000000000; + +$L__BB2_204: + add.f64 %fd137, %fd130, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r512}, %fd137; + } + and.b32 %r513, %r512, 2146435072; + setp.ne.s32 %p385, %r513, 2146435072; + mov.f64 %fd1103, %fd1102; + @%p385 bra $L__BB2_210; + + setp.gtu.f64 %p386, %fd131, 0d7FF0000000000000; + mov.f64 %fd1103, %fd137; + @%p386 bra $L__BB2_210; + + setp.eq.s32 %p387, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r514, %temp}, %fd647; + } + setp.eq.s32 %p388, %r514, 0; + and.pred %p389, %p387, %p388; + @%p389 bra $L__BB2_209; + bra.uni $L__BB2_207; + +$L__BB2_209: + mov.u32 %r519, 0; + setp.gt.f64 %p396, %fd131, 0d3FF0000000000000; + selp.b32 %r520, 2146435072, 0, %p396; + xor.b32 %r521, %r520, 2146435072; + selp.b32 %r522, %r521, %r520, %p152; + setp.eq.f32 %p397, %f101, 0fBF800000; + selp.b32 %r523, 1072693248, %r522, %p397; + mov.b64 %fd1103, {%r519, %r523}; + bra.uni $L__BB2_210; + +$L__BB2_207: + { + .reg .b32 %temp; + mov.b64 {%r515, %temp}, %fd130; + } + and.b32 %r516, %r105, 2147483647; + setp.ne.s32 %p390, %r516, 2146435072; + setp.ne.s32 %p391, %r515, 0; + or.pred %p392, %p390, %p391; + mov.f64 %fd1103, %fd1102; + @%p392 bra $L__BB2_210; + + setp.ne.s32 %p393, %r58, 1071644672; + and.pred %p394, %p393, %p35; + selp.b32 %r517, %r63, %r62, %p394; + mov.u32 %r518, 0; + mov.b64 %fd1103, {%r518, %r517}; + +$L__BB2_210: + cvt.rn.f32.s32 %f3140, %r1371; + add.f32 %f3139, %f3140, 0f3F000000; + sub.f32 %f3138, %f3139, %f3277; + cvt.f64.f32 %fd1071, %f69; + cvt.rn.f32.s32 %f3114, %r1370; + mov.f32 %f3113, 0f3102E308; + mov.f32 %f3112, 0fBF317218; + mov.f32 %f3111, 0f35BFBE8E; + mov.f32 %f3110, 0f3F317200; + mov.f32 %f3109, 0f3DAAAABD; + mov.f32 %f3108, 0f3C4CAF63; + mov.f32 %f3107, 0f3B18F0FE; + setp.eq.f32 %p398, %f101, 0f3F800000; + selp.f64 %fd725, 0d3FF0000000000000, %fd1103, %p398; + div.rn.f64 %fd727, %fd1071, %fd725; + mul.f32 %f1141, %f120, %f181; + mul.f32 %f1142, %f3138, %f168; + sub.f32 %f1143, %f1142, %f1141; + cvt.f64.f32 %fd728, %f1143; + mul.f64 %fd729, %fd727, %fd728; + cvt.f64.f32 %fd730, %f111; + mul.f64 %fd731, %fd729, %fd730; + cvt.rn.f32.f64 %f184, %fd731; + add.f32 %f1144, %f3114, 0f3F800000; + sub.f32 %f1145, %f1144, %f3278; + div.rn.f32 %f185, %f1145, %f99; + abs.f32 %f186, %f185; + setp.lt.f32 %p399, %f186, 0f00800000; + mul.f32 %f1146, %f186, 0f4B800000; + selp.f32 %f1147, %f1146, %f186, %p399; + selp.f32 %f1148, 0fC3170000, 0fC2FE0000, %p399; + mov.b32 %r524, %f1147; + and.b32 %r525, %r524, 8388607; + or.b32 %r526, %r525, 1065353216; + mov.b32 %f1149, %r526; + shr.u32 %r527, %r524, 23; + cvt.rn.f32.u32 %f1150, %r527; + add.f32 %f1151, %f1148, %f1150; + setp.gt.f32 %p400, %f1149, 0f3FB504F3; + mul.f32 %f1152, %f1149, 0f3F000000; + add.f32 %f1153, %f1151, 0f3F800000; + selp.f32 %f1154, %f1153, %f1151, %p400; + selp.f32 %f1155, %f1152, %f1149, %p400; + add.f32 %f1156, %f1155, 0fBF800000; + add.f32 %f1157, %f1155, 0f3F800000; + rcp.approx.ftz.f32 %f1158, %f1157; + add.f32 %f1159, %f1156, %f1156; + mul.f32 %f1161, %f1159, %f1158; + mul.f32 %f1162, %f1161, %f1161; + fma.rn.f32 %f1165, %f3107, %f1162, %f3108; + fma.rn.f32 %f1167, %f1165, %f1162, %f3109; + mul.rn.f32 %f1168, %f1167, %f1162; + mul.rn.f32 %f1169, %f1168, %f1161; + sub.f32 %f1170, %f1156, %f1161; + add.f32 %f1171, %f1170, %f1170; + neg.f32 %f1172, %f1161; + fma.rn.f32 %f1173, %f1172, %f1156, %f1171; + mul.rn.f32 %f1174, %f1158, %f1173; + add.f32 %f1175, %f1169, %f1161; + sub.f32 %f1176, %f1161, %f1175; + add.f32 %f1177, %f1169, %f1176; + add.f32 %f1178, %f1174, %f1177; + add.f32 %f1179, %f1175, %f1178; + sub.f32 %f1180, %f1175, %f1179; add.f32 %f1181, %f1178, %f1180; - mul.rn.f32 %f1183, %f886, %f1179; - neg.f32 %f1184, %f1183; - fma.rn.f32 %f1185, %f886, %f1179, %f1184; - fma.rn.f32 %f1186, %f886, %f1181, %f1185; - mov.f32 %f3334, 0f00000000; - fma.rn.f32 %f1188, %f3334, %f1179, %f1186; - add.rn.f32 %f1189, %f1183, %f1188; - neg.f32 %f1190, %f1189; - add.rn.f32 %f1191, %f1183, %f1190; - add.rn.f32 %f1192, %f1191, %f1188; - mov.b32 %r152, %f1189; - setp.eq.s32 %p81, %r152, 1118925336; - add.s32 %r153, %r152, -1; - mov.b32 %f1193, %r153; - add.f32 %f1194, %f1192, 0f37000000; - selp.f32 %f1195, %f1193, %f1189, %p81; - selp.f32 %f225, %f1194, %f1192, %p81; - mul.f32 %f1196, %f1195, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1197, %f1196; - mov.f32 %f1198, 0fBF317200; - fma.rn.f32 %f1199, %f1197, %f1198, %f1195; - mov.f32 %f1200, 0fB5BFBE8E; - fma.rn.f32 %f1201, %f1197, %f1200, %f1199; - mul.f32 %f1202, %f1201, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1203, %f1202; - add.f32 %f1204, %f1197, 0f00000000; - ex2.approx.f32 %f1205, %f1204; - mul.f32 %f1206, %f1203, %f1205; - setp.lt.f32 %p82, %f1195, 0fC2D20000; - selp.f32 %f1207, 0f00000000, %f1206, %p82; - setp.gt.f32 %p83, %f1195, 0f42D20000; - selp.f32 %f3309, 0f7F800000, %f1207, %p83; - setp.eq.f32 %p84, %f3309, 0f7F800000; - @%p84 bra BB2_55; - - fma.rn.f32 %f3309, %f3309, %f225, %f3309; - -BB2_55: - setp.geu.f32 %p379, %f143, 0f00000000; - mov.b32 %r154, %f3309; - xor.b32 %r155, %r154, -2147483648; - mov.b32 %f1208, %r155; - selp.f32 %f229, %f1208, %f3309, %p3; - setp.eq.f32 %p85, %f143, 0f00000000; - selp.f32 %f3310, %f151, %f229, %p85; - @%p379 bra BB2_57; - - cvt.rzi.f32.f32 %f1210, %f886; - setp.neu.f32 %p86, %f1210, 0f40000000; - selp.f32 %f3310, 0f7FFFFFFF, %f229, %p86; - -BB2_57: - abs.f32 %f3186, %f143; - add.f32 %f3185, %f3186, 0f40000000; - mov.b32 %r302, %f3185; - mov.f32 %f3184, 0f3DAAAABD; - mov.f32 %f3183, 0f3C4CAF63; - mov.f32 %f3182, 0f3B18F0FE; - mov.f32 %f3181, 0fB5BFBE8E; - mov.f32 %f3180, 0fBF317200; - add.f32 %f1213, %f143, 0f40000000; - setp.gtu.f32 %p87, %f3186, 0f7F800000; - selp.f32 %f1214, %f1213, %f3310, %p87; - selp.f32 %f1215, 0fFF800000, 0f7F800000, %p3; - setp.neu.f32 %p88, %f3186, 0f7F800000; - selp.f32 %f1216, %f1214, %f1215, %p88; - setp.gt.s32 %p89, %r302, 2139095039; - selp.f32 %f1217, %f1216, %f3310, %p89; - mul.f32 %f1218, %f1217, 0fBF000000; - setp.eq.f32 %p90, %f143, 0f3F800000; - selp.f32 %f1219, 0fBF000000, %f1218, %p90; - mul.f32 %f1220, %f1219, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1221, %f1220; - fma.rn.f32 %f1223, %f1221, %f3180, %f1219; - fma.rn.f32 %f1225, %f1221, %f3181, %f1223; - mul.f32 %f1226, %f1225, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1227, %f1226; - add.f32 %f1228, %f1221, 0f00000000; - ex2.approx.f32 %f1229, %f1228; - mul.f32 %f1230, %f1227, %f1229; - setp.lt.f32 %p91, %f1219, 0fC2D20000; - selp.f32 %f1231, 0f00000000, %f1230, %p91; - setp.gt.f32 %p92, %f1219, 0f42D20000; - selp.f32 %f233, 0f7F800000, %f1231, %p92; - // inline asm - rcp.approx.ftz.f32 %f1211,%f155; - // inline asm - mul.f32 %f1232, %f1211, %f156; - mul.f32 %f1233, %f1232, %f1232; - fma.rn.f32 %f1236, %f3182, %f1233, %f3183; - fma.rn.f32 %f1238, %f1236, %f1233, %f3184; - mul.rn.f32 %f1239, %f1238, %f1233; - mul.rn.f32 %f1240, %f1239, %f1232; - sub.f32 %f1241, %f154, %f1232; - neg.f32 %f1242, %f1232; - add.f32 %f1243, %f1241, %f1241; - fma.rn.f32 %f1244, %f1242, %f154, %f1243; - mul.rn.f32 %f1245, %f1211, %f1244; - add.f32 %f1246, %f1240, %f1232; - sub.f32 %f1247, %f1232, %f1246; - add.f32 %f1248, %f1240, %f1247; - add.f32 %f1249, %f1245, %f1248; - add.f32 %f1250, %f1246, %f1249; - sub.f32 %f1251, %f1246, %f1250; - add.f32 %f1252, %f1249, %f1251; - add.f32 %f1253, %f157, %f1250; - sub.f32 %f1254, %f157, %f1253; - add.f32 %f1255, %f1250, %f1254; - add.f32 %f1256, %f1252, %f1255; - add.f32 %f1257, %f158, %f1256; - add.f32 %f1258, %f1253, %f1257; - sub.f32 %f1259, %f1253, %f1258; - add.f32 %f1260, %f1257, %f1259; - mul.rn.f32 %f1262, %f886, %f1258; - neg.f32 %f1263, %f1262; - fma.rn.f32 %f1264, %f886, %f1258, %f1263; - fma.rn.f32 %f1265, %f886, %f1260, %f1264; - fma.rn.f32 %f1267, %f3334, %f1258, %f1265; - add.rn.f32 %f1268, %f1262, %f1267; - neg.f32 %f1269, %f1268; - add.rn.f32 %f1270, %f1262, %f1269; - add.rn.f32 %f1271, %f1270, %f1267; - mov.b32 %r156, %f1268; - setp.eq.s32 %p93, %r156, 1118925336; - add.s32 %r157, %r156, -1; - mov.b32 %f1272, %r157; - add.f32 %f1273, %f1271, 0f37000000; - selp.f32 %f1274, %f1272, %f1268, %p93; - selp.f32 %f234, %f1273, %f1271, %p93; - mul.f32 %f1275, %f1274, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1276, %f1275; - fma.rn.f32 %f1277, %f1276, %f3180, %f1274; - fma.rn.f32 %f1278, %f1276, %f3181, %f1277; - mul.f32 %f1279, %f1278, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1280, %f1279; - add.f32 %f1281, %f1276, 0f00000000; - ex2.approx.f32 %f1282, %f1281; - mul.f32 %f1283, %f1280, %f1282; - setp.lt.f32 %p94, %f1274, 0fC2D20000; - selp.f32 %f1284, 0f00000000, %f1283, %p94; - setp.gt.f32 %p95, %f1274, 0f42D20000; - selp.f32 %f3311, 0f7F800000, %f1284, %p95; - setp.eq.f32 %p96, %f3311, 0f7F800000; - @%p96 bra BB2_59; - - fma.rn.f32 %f3311, %f3311, %f234, %f3311; - -BB2_59: - setp.geu.f32 %p380, %f152, 0f00000000; - mov.b32 %r158, %f3311; - xor.b32 %r159, %r158, -2147483648; - mov.b32 %f1285, %r159; - selp.f32 %f238, %f1285, %f3311, %p4; - setp.eq.f32 %p97, %f152, 0f00000000; - selp.f32 %f3312, %f159, %f238, %p97; - @%p380 bra BB2_61; - - cvt.rzi.f32.f32 %f1287, %f886; - setp.neu.f32 %p98, %f1287, 0f40000000; - selp.f32 %f3312, 0f7FFFFFFF, %f238, %p98; - -BB2_61: - abs.f32 %f3200, %f152; - add.f32 %f3199, %f3200, 0f40000000; - mov.b32 %r303, %f3199; - cvt.rn.f32.s32 %f3198, %r319; - cvt.rn.f32.s32 %f3197, %r320; - mov.f32 %f3196, 0f35BFBE8E; - mov.f32 %f3195, 0f3F317200; - add.f32 %f3194, %f3198, 0f3F800000; - sub.f32 %f3193, %f3194, %f3349; - sub.f32 %f3192, %f3198, %f3349; - mov.f32 %f3191, 0f3DAAAABD; - mov.f32 %f3190, 0f3C4CAF63; - mov.f32 %f3189, 0f3B18F0FE; - mov.f32 %f3188, 0fB5BFBE8E; - mov.f32 %f3187, 0fBF317200; - add.f32 %f1290, %f152, 0f40000000; - setp.gtu.f32 %p99, %f3200, 0f7F800000; - selp.f32 %f1291, %f1290, %f3312, %p99; - selp.f32 %f1292, 0fFF800000, 0f7F800000, %p4; - setp.neu.f32 %p100, %f3200, 0f7F800000; - selp.f32 %f1293, %f1291, %f1292, %p100; - setp.gt.s32 %p101, %r303, 2139095039; - selp.f32 %f1294, %f1293, %f3312, %p101; - mul.f32 %f1295, %f1294, 0fBF000000; - setp.eq.f32 %p102, %f152, 0f3F800000; - selp.f32 %f1296, 0fBF000000, %f1295, %p102; - mul.f32 %f1297, %f1296, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1298, %f1297; - fma.rn.f32 %f1300, %f1298, %f3187, %f1296; - fma.rn.f32 %f1302, %f1298, %f3188, %f1300; - mul.f32 %f1303, %f1302, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1304, %f1303; - add.f32 %f1305, %f1298, 0f00000000; - ex2.approx.f32 %f1306, %f1305; - mul.f32 %f1307, %f1304, %f1306; - setp.lt.f32 %p103, %f1296, 0fC2D20000; - selp.f32 %f1308, 0f00000000, %f1307, %p103; - setp.gt.f32 %p104, %f1296, 0f42D20000; - selp.f32 %f1309, 0f7F800000, %f1308, %p104; - sub.f32 %f1310, %f233, %f1309; - mul.f32 %f1311, %f160, %f1310; - mul.f32 %f242, %f224, %f1311; - mul.f32 %f1312, %f3192, %f1309; - mul.f32 %f1313, %f3193, %f233; - sub.f32 %f1314, %f1313, %f1312; - mul.f32 %f1315, %f1314, %f188; - mul.f32 %f243, %f224, %f1315; - add.f32 %f1316, %f3197, 0f3F800000; - sub.f32 %f244, %f1316, %f3348; - div.rn.f32 %f245, %f244, %f132; - abs.f32 %f246, %f245; - setp.lt.f32 %p105, %f246, 0f00800000; - mul.f32 %f1317, %f246, 0f4B800000; - selp.f32 %f1318, 0fC3170000, 0fC2FE0000, %p105; - selp.f32 %f1319, %f1317, %f246, %p105; - mov.b32 %r160, %f1319; - and.b32 %r161, %r160, 8388607; - or.b32 %r162, %r161, 1065353216; - mov.b32 %f1320, %r162; - shr.u32 %r163, %r160, 23; - cvt.rn.f32.u32 %f1321, %r163; - add.f32 %f1322, %f1318, %f1321; - setp.gt.f32 %p106, %f1320, 0f3FB504F3; - mul.f32 %f1323, %f1320, 0f3F000000; - add.f32 %f1324, %f1322, 0f3F800000; - selp.f32 %f1325, %f1323, %f1320, %p106; - selp.f32 %f1326, %f1324, %f1322, %p106; - add.f32 %f247, %f1325, 0fBF800000; - add.f32 %f1289, %f1325, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1288,%f1289; - // inline asm - add.f32 %f249, %f247, %f247; - mul.f32 %f1327, %f1288, %f249; - mul.f32 %f1328, %f1327, %f1327; - fma.rn.f32 %f1331, %f3189, %f1328, %f3190; - fma.rn.f32 %f1333, %f1331, %f1328, %f3191; - mul.rn.f32 %f1334, %f1333, %f1328; - mul.rn.f32 %f1335, %f1334, %f1327; - sub.f32 %f1336, %f247, %f1327; - neg.f32 %f1337, %f1327; - add.f32 %f1338, %f1336, %f1336; - fma.rn.f32 %f1339, %f1337, %f247, %f1338; - mul.rn.f32 %f1340, %f1288, %f1339; - add.f32 %f1341, %f1335, %f1327; - sub.f32 %f1342, %f1327, %f1341; - add.f32 %f1343, %f1335, %f1342; - add.f32 %f1344, %f1340, %f1343; - add.f32 %f1345, %f1341, %f1344; - sub.f32 %f1346, %f1341, %f1345; - add.f32 %f1347, %f1344, %f1346; - mul.rn.f32 %f250, %f1326, %f3195; - mul.rn.f32 %f251, %f1326, %f3196; - add.f32 %f1350, %f250, %f1345; - sub.f32 %f1351, %f250, %f1350; - add.f32 %f1352, %f1345, %f1351; - add.f32 %f1353, %f1347, %f1352; - add.f32 %f1354, %f251, %f1353; - add.f32 %f1355, %f1350, %f1354; - sub.f32 %f1356, %f1350, %f1355; - add.f32 %f1357, %f1354, %f1356; - mul.rn.f32 %f1359, %f886, %f1355; - neg.f32 %f1360, %f1359; - fma.rn.f32 %f1361, %f886, %f1355, %f1360; - fma.rn.f32 %f1362, %f886, %f1357, %f1361; - fma.rn.f32 %f1364, %f3334, %f1355, %f1362; - add.rn.f32 %f1365, %f1359, %f1364; - neg.f32 %f1366, %f1365; - add.rn.f32 %f1367, %f1359, %f1366; - add.rn.f32 %f1368, %f1367, %f1364; - mov.b32 %r164, %f1365; - setp.eq.s32 %p107, %r164, 1118925336; - add.s32 %r165, %r164, -1; - mov.b32 %f1369, %r165; - add.f32 %f1370, %f1368, 0f37000000; - selp.f32 %f1371, %f1369, %f1365, %p107; - selp.f32 %f252, %f1370, %f1368, %p107; - mul.f32 %f1372, %f1371, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1373, %f1372; - fma.rn.f32 %f1374, %f1373, %f3187, %f1371; - fma.rn.f32 %f1375, %f1373, %f3188, %f1374; - mul.f32 %f1376, %f1375, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1377, %f1376; - add.f32 %f1378, %f1373, 0f00000000; - ex2.approx.f32 %f1379, %f1378; - mul.f32 %f1380, %f1377, %f1379; - setp.lt.f32 %p108, %f1371, 0fC2D20000; - selp.f32 %f1381, 0f00000000, %f1380, %p108; - setp.gt.f32 %p109, %f1371, 0f42D20000; - selp.f32 %f3313, 0f7F800000, %f1381, %p109; - setp.eq.f32 %p110, %f3313, 0f7F800000; - @%p110 bra BB2_63; - - fma.rn.f32 %f3313, %f3313, %f252, %f3313; - -BB2_63: - setp.lt.f32 %p111, %f245, 0f00000000; - and.pred %p9, %p111, %p59; - mov.b32 %r166, %f3313; - xor.b32 %r167, %r166, -2147483648; - mov.b32 %f1382, %r167; - selp.f32 %f3315, %f1382, %f3313, %p9; - setp.eq.f32 %p113, %f245, 0f00000000; - @%p113 bra BB2_66; - bra.uni BB2_64; - -BB2_66: - add.f32 %f1385, %f245, %f245; - selp.f32 %f3315, %f1385, 0f00000000, %p59; - bra.uni BB2_67; - -BB2_64: - setp.geu.f32 %p114, %f245, 0f00000000; - @%p114 bra BB2_67; - - cvt.rzi.f32.f32 %f1384, %f886; - setp.neu.f32 %p115, %f1384, 0f40000000; - selp.f32 %f3315, 0f7FFFFFFF, %f3315, %p115; - -BB2_67: - abs.f32 %f3201, %f245; - add.f32 %f1386, %f3201, 0f40000000; - mov.b32 %r36, %f1386; - setp.lt.s32 %p117, %r36, 2139095040; - @%p117 bra BB2_72; - - abs.f32 %f3209, %f245; - setp.gtu.f32 %p118, %f3209, 0f7F800000; - @%p118 bra BB2_71; - bra.uni BB2_69; - -BB2_71: - add.f32 %f3315, %f245, 0f40000000; - bra.uni BB2_72; - -BB2_69: - abs.f32 %f3210, %f245; - setp.neu.f32 %p119, %f3210, 0f7F800000; - @%p119 bra BB2_72; - - selp.f32 %f3315, 0fFF800000, 0f7F800000, %p9; - -BB2_72: - cvt.rn.f32.s32 %f3222, %r320; - sub.f32 %f3221, %f3222, %f3348; - mov.f32 %f3208, 0f35BFBE8E; - mov.f32 %f3207, 0f3F317200; - mov.f32 %f3206, 0f3DAAAABD; - mov.f32 %f3205, 0f3C4CAF63; - mov.f32 %f3204, 0f3B18F0FE; - mov.f32 %f3203, 0fB5BFBE8E; - mov.f32 %f3202, 0fBF317200; - mul.f32 %f1389, %f3315, 0fBF000000; - setp.eq.f32 %p120, %f245, 0f3F800000; - selp.f32 %f1390, 0fBF000000, %f1389, %p120; - mul.f32 %f1391, %f1390, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1392, %f1391; - fma.rn.f32 %f1394, %f1392, %f3202, %f1390; - fma.rn.f32 %f1396, %f1392, %f3203, %f1394; - mul.f32 %f1397, %f1396, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1398, %f1397; - add.f32 %f1399, %f1392, 0f00000000; - ex2.approx.f32 %f1400, %f1399; - mul.f32 %f1401, %f1398, %f1400; - setp.lt.f32 %p121, %f1390, 0fC2D20000; - selp.f32 %f1402, 0f00000000, %f1401, %p121; - setp.gt.f32 %p122, %f1390, 0f42D20000; - selp.f32 %f263, 0f7F800000, %f1402, %p122; - div.rn.f32 %f264, %f3221, %f132; - abs.f32 %f265, %f264; - setp.lt.f32 %p123, %f265, 0f00800000; - mul.f32 %f1403, %f265, 0f4B800000; - selp.f32 %f1404, 0fC3170000, 0fC2FE0000, %p123; - selp.f32 %f1405, %f1403, %f265, %p123; - mov.b32 %r168, %f1405; - and.b32 %r169, %r168, 8388607; - or.b32 %r170, %r169, 1065353216; - mov.b32 %f1406, %r170; - shr.u32 %r171, %r168, 23; - cvt.rn.f32.u32 %f1407, %r171; - add.f32 %f1408, %f1404, %f1407; - setp.gt.f32 %p124, %f1406, 0f3FB504F3; - mul.f32 %f1409, %f1406, 0f3F000000; - add.f32 %f1410, %f1408, 0f3F800000; - selp.f32 %f1411, %f1409, %f1406, %p124; - selp.f32 %f1412, %f1410, %f1408, %p124; - add.f32 %f266, %f1411, 0fBF800000; - add.f32 %f1388, %f1411, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1387,%f1388; - // inline asm - add.f32 %f268, %f266, %f266; - mul.f32 %f1413, %f1387, %f268; - mul.f32 %f1414, %f1413, %f1413; - fma.rn.f32 %f1417, %f3204, %f1414, %f3205; - fma.rn.f32 %f1419, %f1417, %f1414, %f3206; - mul.rn.f32 %f1420, %f1419, %f1414; - mul.rn.f32 %f1421, %f1420, %f1413; - sub.f32 %f1422, %f266, %f1413; - neg.f32 %f1423, %f1413; - add.f32 %f1424, %f1422, %f1422; - fma.rn.f32 %f1425, %f1423, %f266, %f1424; - mul.rn.f32 %f1426, %f1387, %f1425; - add.f32 %f1427, %f1421, %f1413; - sub.f32 %f1428, %f1413, %f1427; - add.f32 %f1429, %f1421, %f1428; - add.f32 %f1430, %f1426, %f1429; - add.f32 %f1431, %f1427, %f1430; - sub.f32 %f1432, %f1427, %f1431; - add.f32 %f1433, %f1430, %f1432; - mul.rn.f32 %f269, %f1412, %f3207; - mul.rn.f32 %f270, %f1412, %f3208; - add.f32 %f1436, %f269, %f1431; - sub.f32 %f1437, %f269, %f1436; - add.f32 %f1438, %f1431, %f1437; - add.f32 %f1439, %f1433, %f1438; - add.f32 %f1440, %f270, %f1439; - add.f32 %f1441, %f1436, %f1440; - sub.f32 %f1442, %f1436, %f1441; - add.f32 %f1443, %f1440, %f1442; - mul.rn.f32 %f1445, %f886, %f1441; - neg.f32 %f1446, %f1445; - fma.rn.f32 %f1447, %f886, %f1441, %f1446; - fma.rn.f32 %f1448, %f886, %f1443, %f1447; - fma.rn.f32 %f1450, %f3334, %f1441, %f1448; - add.rn.f32 %f1451, %f1445, %f1450; - neg.f32 %f1452, %f1451; - add.rn.f32 %f1453, %f1445, %f1452; - add.rn.f32 %f1454, %f1453, %f1450; - mov.b32 %r172, %f1451; - setp.eq.s32 %p125, %r172, 1118925336; - add.s32 %r173, %r172, -1; - mov.b32 %f1455, %r173; - add.f32 %f1456, %f1454, 0f37000000; - selp.f32 %f1457, %f1455, %f1451, %p125; - selp.f32 %f271, %f1456, %f1454, %p125; - mul.f32 %f1458, %f1457, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1459, %f1458; - fma.rn.f32 %f1460, %f1459, %f3202, %f1457; - fma.rn.f32 %f1461, %f1459, %f3203, %f1460; - mul.f32 %f1462, %f1461, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1463, %f1462; - add.f32 %f1464, %f1459, 0f00000000; - ex2.approx.f32 %f1465, %f1464; - mul.f32 %f1466, %f1463, %f1465; - setp.lt.f32 %p126, %f1457, 0fC2D20000; - selp.f32 %f1467, 0f00000000, %f1466, %p126; - setp.gt.f32 %p127, %f1457, 0f42D20000; - selp.f32 %f3316, 0f7F800000, %f1467, %p127; - setp.eq.f32 %p128, %f3316, 0f7F800000; - @%p128 bra BB2_74; - - fma.rn.f32 %f3316, %f3316, %f271, %f3316; - -BB2_74: - setp.lt.f32 %p129, %f264, 0f00000000; - and.pred %p10, %p129, %p59; - mov.b32 %r174, %f3316; - xor.b32 %r175, %r174, -2147483648; - mov.b32 %f1468, %r175; - selp.f32 %f3318, %f1468, %f3316, %p10; - setp.eq.f32 %p131, %f264, 0f00000000; - @%p131 bra BB2_77; - bra.uni BB2_75; - -BB2_77: - add.f32 %f1471, %f264, %f264; - selp.f32 %f3318, %f1471, 0f00000000, %p59; - bra.uni BB2_78; - -BB2_75: - setp.geu.f32 %p132, %f264, 0f00000000; - @%p132 bra BB2_78; - - cvt.rzi.f32.f32 %f1470, %f886; - setp.neu.f32 %p133, %f1470, 0f40000000; - selp.f32 %f3318, 0f7FFFFFFF, %f3318, %p133; - -BB2_78: - abs.f32 %f3106, %f264; - add.f32 %f1472, %f3106, 0f40000000; - mov.b32 %r37, %f1472; - setp.lt.s32 %p135, %r37, 2139095040; - @%p135 bra BB2_83; - - abs.f32 %f3219, %f264; - setp.gtu.f32 %p136, %f3219, 0f7F800000; - @%p136 bra BB2_82; - bra.uni BB2_80; - -BB2_82: - add.f32 %f3318, %f264, 0f40000000; - bra.uni BB2_83; - -BB2_80: - abs.f32 %f3220, %f264; - setp.neu.f32 %p137, %f3220, 0f7F800000; - @%p137 bra BB2_83; - - selp.f32 %f3318, 0fFF800000, 0f7F800000, %p10; - -BB2_83: - cvt.rn.f32.s32 %f3224, %r320; - sub.f32 %f3223, %f3224, %f3348; - cvt.rn.f32.s32 %f3114, %r320; - add.f32 %f3113, %f3114, 0f3F800000; - sub.f32 %f3112, %f3113, %f3348; - mov.f32 %f3111, 0f3DAAAABD; - mov.f32 %f3110, 0f3C4CAF63; - mov.f32 %f3109, 0f3B18F0FE; - mov.f32 %f3108, 0fB5BFBE8E; - mov.f32 %f3107, 0fBF317200; - mul.f32 %f1475, %f3318, 0fBF000000; - setp.eq.f32 %p138, %f264, 0f3F800000; - selp.f32 %f1476, 0fBF000000, %f1475, %p138; - mul.f32 %f1477, %f1476, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1478, %f1477; - fma.rn.f32 %f1480, %f1478, %f3107, %f1476; - fma.rn.f32 %f1482, %f1478, %f3108, %f1480; - mul.f32 %f1483, %f1482, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1484, %f1483; - add.f32 %f1485, %f1478, 0f00000000; - ex2.approx.f32 %f1486, %f1485; - mul.f32 %f1487, %f1484, %f1486; - setp.lt.f32 %p139, %f1476, 0fC2D20000; - selp.f32 %f1488, 0f00000000, %f1487, %p139; - setp.gt.f32 %p140, %f1476, 0f42D20000; - selp.f32 %f1489, 0f7F800000, %f1488, %p140; - sub.f32 %f1490, %f263, %f1489; - mul.f32 %f1491, %f161, %f1490; - mul.f32 %f282, %f210, %f1491; - mul.f32 %f1492, %f3223, %f1489; - mul.f32 %f1493, %f3112, %f263; - sub.f32 %f1494, %f1493, %f1492; - mul.f32 %f1495, %f1494, %f189; - mul.f32 %f283, %f210, %f1495; - // inline asm - rcp.approx.ftz.f32 %f1473,%f147; - // inline asm - mul.f32 %f1496, %f1473, %f148; - mul.f32 %f1497, %f1496, %f1496; - fma.rn.f32 %f1500, %f3109, %f1497, %f3110; - fma.rn.f32 %f1502, %f1500, %f1497, %f3111; - mul.rn.f32 %f1503, %f1502, %f1497; - mul.rn.f32 %f1504, %f1503, %f1496; - sub.f32 %f1505, %f146, %f1496; - neg.f32 %f1506, %f1496; - add.f32 %f1507, %f1505, %f1505; - fma.rn.f32 %f1508, %f1506, %f146, %f1507; - mul.rn.f32 %f1509, %f1473, %f1508; - add.f32 %f1510, %f1504, %f1496; - sub.f32 %f1511, %f1496, %f1510; - add.f32 %f1512, %f1504, %f1511; - add.f32 %f1513, %f1509, %f1512; - add.f32 %f1514, %f1510, %f1513; - sub.f32 %f1515, %f1510, %f1514; - add.f32 %f1516, %f1513, %f1515; - add.f32 %f1517, %f149, %f1514; - sub.f32 %f1518, %f149, %f1517; - add.f32 %f1519, %f1514, %f1518; - add.f32 %f1520, %f1516, %f1519; - add.f32 %f1521, %f150, %f1520; - add.f32 %f1522, %f1517, %f1521; - sub.f32 %f1523, %f1517, %f1522; - add.f32 %f1524, %f1521, %f1523; - mul.rn.f32 %f1526, %f886, %f1522; - neg.f32 %f1527, %f1526; - fma.rn.f32 %f1528, %f886, %f1522, %f1527; - fma.rn.f32 %f1529, %f886, %f1524, %f1528; - fma.rn.f32 %f1531, %f3334, %f1522, %f1529; - add.rn.f32 %f1532, %f1526, %f1531; - neg.f32 %f1533, %f1532; - add.rn.f32 %f1534, %f1526, %f1533; - add.rn.f32 %f1535, %f1534, %f1531; - mov.b32 %r176, %f1532; - setp.eq.s32 %p141, %r176, 1118925336; - add.s32 %r177, %r176, -1; - mov.b32 %f1536, %r177; - add.f32 %f1537, %f1535, 0f37000000; - selp.f32 %f1538, %f1536, %f1532, %p141; - selp.f32 %f284, %f1537, %f1535, %p141; - mul.f32 %f1539, %f1538, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1540, %f1539; - fma.rn.f32 %f1541, %f1540, %f3107, %f1538; - fma.rn.f32 %f1542, %f1540, %f3108, %f1541; - mul.f32 %f1543, %f1542, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1544, %f1543; - add.f32 %f1545, %f1540, 0f00000000; - ex2.approx.f32 %f1546, %f1545; - mul.f32 %f1547, %f1544, %f1546; - setp.lt.f32 %p142, %f1538, 0fC2D20000; - selp.f32 %f1548, 0f00000000, %f1547, %p142; - setp.gt.f32 %p143, %f1538, 0f42D20000; - selp.f32 %f3319, 0f7F800000, %f1548, %p143; - setp.eq.f32 %p144, %f3319, 0f7F800000; - @%p144 bra BB2_85; - - fma.rn.f32 %f3319, %f3319, %f284, %f3319; - -BB2_85: - setp.eq.f32 %p363, %f143, 0f00000000; - setp.geu.f32 %p362, %f143, 0f00000000; - mov.b32 %r178, %f3319; - xor.b32 %r179, %r178, -2147483648; - mov.b32 %f1549, %r179; - selp.f32 %f288, %f1549, %f3319, %p3; - selp.f32 %f3320, %f151, %f288, %p363; - @%p362 bra BB2_87; - - cvt.rzi.f32.f32 %f1551, %f886; - setp.neu.f32 %p146, %f1551, 0f40000000; - selp.f32 %f3320, 0f7FFFFFFF, %f288, %p146; - -BB2_87: - abs.f32 %f3123, %f143; - setp.eq.f32 %p367, %f143, 0f3F800000; - add.f32 %f3122, %f3123, 0f40000000; - mov.b32 %r293, %f3122; - setp.gt.s32 %p366, %r293, 2139095039; - setp.neu.f32 %p365, %f3123, 0f7F800000; - selp.f32 %f3121, 0fFF800000, 0f7F800000, %p3; - setp.gtu.f32 %p364, %f3123, 0f7F800000; - add.f32 %f3120, %f143, 0f40000000; - mov.f32 %f3119, 0f3DAAAABD; - mov.f32 %f3118, 0f3C4CAF63; - mov.f32 %f3117, 0f3B18F0FE; - mov.f32 %f3116, 0fB5BFBE8E; - mov.f32 %f3115, 0fBF317200; - selp.f32 %f1555, %f3120, %f3320, %p364; - selp.f32 %f1557, %f1555, %f3121, %p365; - selp.f32 %f1558, %f1557, %f3320, %p366; - mul.f32 %f1559, %f1558, 0fBF000000; - selp.f32 %f1560, 0fBF000000, %f1559, %p367; - mul.f32 %f1561, %f1560, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1562, %f1561; - fma.rn.f32 %f1564, %f1562, %f3115, %f1560; - fma.rn.f32 %f1566, %f1562, %f3116, %f1564; - mul.f32 %f1567, %f1566, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1568, %f1567; - add.f32 %f1569, %f1562, 0f00000000; - ex2.approx.f32 %f1570, %f1569; - mul.f32 %f1571, %f1568, %f1570; - setp.lt.f32 %p151, %f1560, 0fC2D20000; - selp.f32 %f1572, 0f00000000, %f1571, %p151; - setp.gt.f32 %p152, %f1560, 0f42D20000; - selp.f32 %f292, 0f7F800000, %f1572, %p152; - // inline asm - rcp.approx.ftz.f32 %f1552,%f155; - // inline asm - mul.f32 %f1573, %f1552, %f156; - mul.f32 %f1574, %f1573, %f1573; - fma.rn.f32 %f1577, %f3117, %f1574, %f3118; - fma.rn.f32 %f1579, %f1577, %f1574, %f3119; - mul.rn.f32 %f1580, %f1579, %f1574; - mul.rn.f32 %f1581, %f1580, %f1573; - sub.f32 %f1582, %f154, %f1573; - neg.f32 %f1583, %f1573; - add.f32 %f1584, %f1582, %f1582; - fma.rn.f32 %f1585, %f1583, %f154, %f1584; - mul.rn.f32 %f1586, %f1552, %f1585; - add.f32 %f1587, %f1581, %f1573; - sub.f32 %f1588, %f1573, %f1587; - add.f32 %f1589, %f1581, %f1588; - add.f32 %f1590, %f1586, %f1589; - add.f32 %f1591, %f1587, %f1590; - sub.f32 %f1592, %f1587, %f1591; - add.f32 %f1593, %f1590, %f1592; - add.f32 %f1594, %f157, %f1591; - sub.f32 %f1595, %f157, %f1594; - add.f32 %f1596, %f1591, %f1595; - add.f32 %f1597, %f1593, %f1596; - add.f32 %f1598, %f158, %f1597; - add.f32 %f1599, %f1594, %f1598; - sub.f32 %f1600, %f1594, %f1599; + mul.rn.f32 %f1183, %f1154, %f3110; + mul.rn.f32 %f1185, %f1154, %f3111; + add.f32 %f1186, %f1183, %f1179; + sub.f32 %f1187, %f1183, %f1186; + add.f32 %f1188, %f1179, %f1187; + add.f32 %f1189, %f1181, %f1188; + add.f32 %f1190, %f1185, %f1189; + add.f32 %f1191, %f1186, %f1190; + sub.f32 %f1192, %f1186, %f1191; + add.f32 %f1193, %f1190, %f1192; + mul.rn.f32 %f1194, %f600, %f1191; + neg.f32 %f1195, %f1194; + fma.rn.f32 %f1196, %f600, %f1191, %f1195; + fma.rn.f32 %f1197, %f600, %f1193, %f1196; + fma.rn.f32 %f1199, %f3263, %f1191, %f1197; + add.rn.f32 %f1200, %f1194, %f1199; + neg.f32 %f1201, %f1200; + add.rn.f32 %f1202, %f1194, %f1201; + add.rn.f32 %f1203, %f1202, %f1199; + mov.b32 %r528, %f1200; + setp.eq.s32 %p401, %r528, 1118925336; + add.s32 %r529, %r528, -1; + mov.b32 %f1204, %r529; + add.f32 %f1205, %f1203, 0f37000000; + selp.f32 %f187, %f1205, %f1203, %p401; + selp.f32 %f1206, %f1204, %f1200, %p401; + mul.rn.f32 %f1208, %f1206, %f800; + cvt.rzi.f32.f32 %f1209, %f1208; + abs.f32 %f1210, %f1209; + setp.gt.f32 %p402, %f1210, 0f42FC0000; + mov.b32 %r530, %f1209; + and.b32 %r531, %r530, -2147483648; + or.b32 %r532, %r531, 1123811328; + mov.b32 %f1211, %r532; + selp.f32 %f1212, %f1211, %f1209, %p402; + fma.rn.f32 %f1214, %f1212, %f3112, %f1206; + fma.rn.f32 %f1216, %f1212, %f3113, %f1214; + mul.f32 %f1217, %f1216, 0f3FB8AA3B; + add.f32 %f1218, %f1212, 0f4B40007F; + mov.b32 %r533, %f1218; + shl.b32 %r534, %r533, 23; + mov.b32 %f1219, %r534; + ex2.approx.ftz.f32 %f1220, %f1217; + mul.f32 %f188, %f1220, %f1219; + setp.eq.f32 %p403, %f188, 0f7F800000; + mov.f32 %f3244, 0f7F800000; + @%p403 bra $L__BB2_212; + + fma.rn.f32 %f3244, %f188, %f187, %f188; + +$L__BB2_212: + setp.lt.f32 %p404, %f185, 0f00000000; + and.pred %p36, %p404, %p301; + setp.eq.f32 %p406, %f185, 0f00000000; + @%p406 bra $L__BB2_216; + bra.uni $L__BB2_213; + +$L__BB2_216: + add.f32 %f1225, %f185, %f185; + selp.f32 %f3246, %f1225, 0f00000000, %p301; + bra.uni $L__BB2_217; + +$L__BB2_213: + mov.b32 %r535, %f3244; + xor.b32 %r536, %r535, -2147483648; + mov.b32 %f1221, %r536; + selp.f32 %f3246, %f1221, %f3244, %p36; + setp.geu.f32 %p407, %f185, 0f00000000; + @%p407 bra $L__BB2_217; + + cvt.rzi.f32.f32 %f1223, %f600; + setp.eq.f32 %p408, %f1223, 0f40000000; + @%p408 bra $L__BB2_217; + + mov.f32 %f3246, 0f7FFFFFFF; + +$L__BB2_217: + abs.f32 %f3146, %f185; + add.f32 %f1226, %f3146, 0f40000000; + mov.b32 %r537, %f1226; + setp.lt.s32 %p410, %r537, 2139095040; + @%p410 bra $L__BB2_222; + + abs.f32 %f3147, %f185; + setp.gtu.f32 %p411, %f3147, 0f7F800000; + @%p411 bra $L__BB2_221; + bra.uni $L__BB2_219; + +$L__BB2_221: + add.f32 %f3246, %f185, 0f40000000; + bra.uni $L__BB2_222; + +$L__BB2_219: + abs.f32 %f3148, %f185; + setp.neu.f32 %p412, %f3148, 0f7F800000; + @%p412 bra $L__BB2_222; + + selp.f32 %f3246, 0fFF800000, 0f7F800000, %p36; + +$L__BB2_222: + cvt.rn.f32.s32 %f3128, %r1370; + sub.f32 %f3127, %f3128, %f3278; + mov.f32 %f3126, 0f32A57060; + mov.f32 %f3125, 0f4B400001; + mov.f32 %f3124, 0f437C0000; + mov.f32 %f3123, 0f3BBB989D; + mov.f32 %f3122, 0f3102E308; + mov.f32 %f3121, 0fBF317218; + mov.f32 %f3120, 0f35BFBE8E; + mov.f32 %f3119, 0f3F317200; + mov.f32 %f3118, 0f3DAAAABD; + mov.f32 %f3117, 0f3C4CAF63; + mov.f32 %f3116, 0f3B18F0FE; + mov.f32 %f3115, 0f3F000000; + mul.f32 %f1228, %f3246, 0fBF000000; + setp.eq.f32 %p413, %f185, 0f3F800000; + selp.f32 %f1229, 0fBF000000, %f1228, %p413; + fma.rn.f32 %f1232, %f1229, %f3123, %f3115; + cvt.sat.f32.f32 %f1235, %f1232; + fma.rm.f32 %f1237, %f1235, %f3124, %f3125; + add.f32 %f1238, %f1237, 0fCB40007F; + neg.f32 %f1239, %f1238; + fma.rn.f32 %f1240, %f1229, %f800, %f1239; + fma.rn.f32 %f1242, %f1229, %f3126, %f1240; + mov.b32 %r538, %f1237; + shl.b32 %r539, %r538, 23; + mov.b32 %f1243, %r539; + ex2.approx.ftz.f32 %f1244, %f1242; + mul.f32 %f197, %f1244, %f1243; + div.rn.f32 %f198, %f3127, %f99; + abs.f32 %f199, %f198; + setp.lt.f32 %p414, %f199, 0f00800000; + mul.f32 %f1246, %f199, 0f4B800000; + selp.f32 %f1247, %f1246, %f199, %p414; + selp.f32 %f1248, 0fC3170000, 0fC2FE0000, %p414; + mov.b32 %r540, %f1247; + and.b32 %r541, %r540, 8388607; + or.b32 %r542, %r541, 1065353216; + mov.b32 %f1249, %r542; + shr.u32 %r543, %r540, 23; + cvt.rn.f32.u32 %f1250, %r543; + add.f32 %f1251, %f1248, %f1250; + setp.gt.f32 %p415, %f1249, 0f3FB504F3; + mul.f32 %f1252, %f1249, 0f3F000000; + add.f32 %f1253, %f1251, 0f3F800000; + selp.f32 %f1254, %f1253, %f1251, %p415; + selp.f32 %f1255, %f1252, %f1249, %p415; + add.f32 %f1256, %f1255, 0fBF800000; + add.f32 %f1257, %f1255, 0f3F800000; + rcp.approx.ftz.f32 %f1258, %f1257; + add.f32 %f1259, %f1256, %f1256; + mul.f32 %f1261, %f1259, %f1258; + mul.f32 %f1262, %f1261, %f1261; + fma.rn.f32 %f1265, %f3116, %f1262, %f3117; + fma.rn.f32 %f1267, %f1265, %f1262, %f3118; + mul.rn.f32 %f1268, %f1267, %f1262; + mul.rn.f32 %f1269, %f1268, %f1261; + sub.f32 %f1270, %f1256, %f1261; + add.f32 %f1271, %f1270, %f1270; + neg.f32 %f1272, %f1261; + fma.rn.f32 %f1273, %f1272, %f1256, %f1271; + mul.rn.f32 %f1274, %f1258, %f1273; + add.f32 %f1275, %f1269, %f1261; + sub.f32 %f1276, %f1261, %f1275; + add.f32 %f1277, %f1269, %f1276; + add.f32 %f1278, %f1274, %f1277; + add.f32 %f1279, %f1275, %f1278; + sub.f32 %f1280, %f1275, %f1279; + add.f32 %f1281, %f1278, %f1280; + mul.rn.f32 %f1283, %f1254, %f3119; + mul.rn.f32 %f1285, %f1254, %f3120; + add.f32 %f1286, %f1283, %f1279; + sub.f32 %f1287, %f1283, %f1286; + add.f32 %f1288, %f1279, %f1287; + add.f32 %f1289, %f1281, %f1288; + add.f32 %f1290, %f1285, %f1289; + add.f32 %f1291, %f1286, %f1290; + sub.f32 %f1292, %f1286, %f1291; + add.f32 %f1293, %f1290, %f1292; + mul.rn.f32 %f1294, %f600, %f1291; + neg.f32 %f1295, %f1294; + fma.rn.f32 %f1296, %f600, %f1291, %f1295; + fma.rn.f32 %f1297, %f600, %f1293, %f1296; + fma.rn.f32 %f1299, %f3263, %f1291, %f1297; + add.rn.f32 %f1300, %f1294, %f1299; + neg.f32 %f1301, %f1300; + add.rn.f32 %f1302, %f1294, %f1301; + add.rn.f32 %f1303, %f1302, %f1299; + mov.b32 %r544, %f1300; + setp.eq.s32 %p416, %r544, 1118925336; + add.s32 %r545, %r544, -1; + mov.b32 %f1304, %r545; + add.f32 %f1305, %f1303, 0f37000000; + selp.f32 %f200, %f1305, %f1303, %p416; + selp.f32 %f1306, %f1304, %f1300, %p416; + mul.rn.f32 %f1307, %f1306, %f800; + cvt.rzi.f32.f32 %f1308, %f1307; + abs.f32 %f1309, %f1308; + setp.gt.f32 %p417, %f1309, 0f42FC0000; + mov.b32 %r546, %f1308; + and.b32 %r547, %r546, -2147483648; + or.b32 %r548, %r547, 1123811328; + mov.b32 %f1310, %r548; + selp.f32 %f1311, %f1310, %f1308, %p417; + fma.rn.f32 %f1313, %f1311, %f3121, %f1306; + fma.rn.f32 %f1315, %f1311, %f3122, %f1313; + mul.f32 %f1316, %f1315, 0f3FB8AA3B; + add.f32 %f1317, %f1311, 0f4B40007F; + mov.b32 %r549, %f1317; + shl.b32 %r550, %r549, 23; + mov.b32 %f1318, %r550; + ex2.approx.ftz.f32 %f1319, %f1316; + mul.f32 %f201, %f1319, %f1318; + setp.eq.f32 %p418, %f201, 0f7F800000; + mov.f32 %f3247, 0f7F800000; + @%p418 bra $L__BB2_224; + + fma.rn.f32 %f3247, %f201, %f200, %f201; + +$L__BB2_224: + setp.lt.f32 %p419, %f198, 0f00000000; + and.pred %p37, %p419, %p301; + setp.eq.f32 %p421, %f198, 0f00000000; + @%p421 bra $L__BB2_228; + bra.uni $L__BB2_225; + +$L__BB2_228: + add.f32 %f1324, %f198, %f198; + selp.f32 %f3249, %f1324, 0f00000000, %p301; + bra.uni $L__BB2_229; + +$L__BB2_225: + mov.b32 %r551, %f3247; + xor.b32 %r552, %r551, -2147483648; + mov.b32 %f1320, %r552; + selp.f32 %f3249, %f1320, %f3247, %p37; + setp.geu.f32 %p422, %f198, 0f00000000; + @%p422 bra $L__BB2_229; + + cvt.rzi.f32.f32 %f1322, %f600; + setp.eq.f32 %p423, %f1322, 0f40000000; + @%p423 bra $L__BB2_229; + + mov.f32 %f3249, 0f7FFFFFFF; + +$L__BB2_229: + abs.f32 %f2984, %f198; + add.f32 %f1325, %f2984, 0f40000000; + mov.b32 %r553, %f1325; + setp.lt.s32 %p425, %r553, 2139095040; + @%p425 bra $L__BB2_234; + + abs.f32 %f3135, %f198; + setp.gtu.f32 %p426, %f3135, 0f7F800000; + @%p426 bra $L__BB2_233; + bra.uni $L__BB2_231; + +$L__BB2_233: + add.f32 %f3249, %f198, 0f40000000; + bra.uni $L__BB2_234; + +$L__BB2_231: + abs.f32 %f3136, %f198; + setp.neu.f32 %p427, %f3136, 0f7F800000; + @%p427 bra $L__BB2_234; + + selp.f32 %f3249, 0fFF800000, 0f7F800000, %p37; + +$L__BB2_234: + cvt.f64.f32 %fd1046, %f99; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1345}, %fd1046; + } + setp.lt.s32 %p1283, %r1345, 0; + mov.f64 %fd1045, 0d4014000000000000; + cvt.rn.f32.s32 %f2991, %r1370; + sub.f32 %f2990, %f2991, %f3278; + mov.f32 %f2989, 0f32A57060; + mov.f32 %f2988, 0f4B400001; + mov.f32 %f2987, 0f437C0000; + mov.f32 %f2986, 0f3BBB989D; + mov.f32 %f2985, 0f3F000000; + and.b32 %r554, %r78, 2146435072; + setp.eq.s32 %p428, %r554, 1074790400; + mul.f32 %f1326, %f3249, 0fBF000000; + setp.eq.f32 %p429, %f198, 0f3F800000; + selp.f32 %f1327, 0fBF000000, %f1326, %p429; + fma.rn.f32 %f1330, %f1327, %f2986, %f2985; + cvt.sat.f32.f32 %f1333, %f1330; + fma.rm.f32 %f1335, %f1333, %f2987, %f2988; + add.f32 %f1336, %f1335, 0fCB40007F; + neg.f32 %f1337, %f1336; + fma.rn.f32 %f1338, %f1327, %f800, %f1337; + fma.rn.f32 %f1340, %f1327, %f2989, %f1338; + mov.b32 %r555, %f1335; + shl.b32 %r556, %r555, 23; + mov.b32 %f1341, %r556; + ex2.approx.ftz.f32 %f1342, %f1340; + mul.f32 %f210, %f1342, %f1341; + add.f32 %f1344, %f2990, 0f3F800000; + mul.f32 %f1345, %f1344, %f197; + mul.f32 %f1346, %f2990, %f210; + sub.f32 %f1347, %f1345, %f1346; + div.rn.f32 %f1348, %f152, %f99; + mul.f32 %f1349, %f1348, %f1347; + mul.f32 %f211, %f125, %f1349; + { // callseq 42, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd120; + .param .b64 param1; + st.param.f64 [param1+0], %fd1045; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1105, [retval0+0]; + } // callseq 42 + and.pred %p38, %p1283, %p428; + not.pred %p431, %p38; + @%p431 bra $L__BB2_236; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r557}, %fd1105; + } + xor.b32 %r558, %r557, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r559, %temp}, %fd1105; + } + mov.b64 %fd1105, {%r559, %r558}; + +$L__BB2_236: + setp.eq.f32 %p1284, %f99, 0f00000000; + @%p1284 bra $L__BB2_240; + bra.uni $L__BB2_237; + +$L__BB2_240: + setp.lt.s32 %p435, %r78, 0; + mov.u32 %r560, 0; + selp.b32 %r562, %r104, 0, %p428; + or.b32 %r563, %r562, 2146435072; + selp.b32 %r564, %r563, %r562, %p435; + mov.b64 %fd1105, {%r560, %r564}; + bra.uni $L__BB2_241; + +$L__BB2_237: + setp.gt.s32 %p433, %r104, -1; + @%p433 bra $L__BB2_241; + + mov.f64 %fd1070, 0d4014000000000000; + cvt.rzi.f64.f64 %fd734, %fd1070; + setp.eq.f64 %p434, %fd734, 0d4014000000000000; + @%p434 bra $L__BB2_241; + + mov.f64 %fd1105, 0dFFF8000000000000; + +$L__BB2_241: + add.f64 %fd146, %fd119, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r565}, %fd146; + } + and.b32 %r566, %r565, 2146435072; + setp.ne.s32 %p437, %r566, 2146435072; + mov.f64 %fd1106, %fd1105; + @%p437 bra $L__BB2_247; + + setp.gtu.f64 %p438, %fd120, 0d7FF0000000000000; + mov.f64 %fd1106, %fd146; + @%p438 bra $L__BB2_247; + + mov.f64 %fd1069, 0d4014000000000000; + and.b32 %r567, %r78, 2147483647; + setp.eq.s32 %p439, %r567, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r568, %temp}, %fd1069; + } + setp.eq.s32 %p440, %r568, 0; + and.pred %p441, %p439, %p440; + @%p441 bra $L__BB2_246; + bra.uni $L__BB2_244; + +$L__BB2_246: + setp.lt.s32 %p448, %r78, 0; + mov.u32 %r576, 0; + setp.gt.f64 %p449, %fd120, 0d3FF0000000000000; + selp.b32 %r577, 2146435072, 0, %p449; + xor.b32 %r578, %r577, 2146435072; + selp.b32 %r579, %r578, %r577, %p448; + setp.eq.f32 %p450, %f99, 0fBF800000; + selp.b32 %r580, 1072693248, %r579, %p450; + mov.b64 %fd1106, {%r576, %r580}; + bra.uni $L__BB2_247; + +$L__BB2_244: + { + .reg .b32 %temp; + mov.b64 {%r569, %temp}, %fd119; + } + and.b32 %r570, %r104, 2147483647; + setp.ne.s32 %p442, %r570, 2146435072; + setp.ne.s32 %p443, %r569, 0; + or.pred %p444, %p442, %p443; + mov.f64 %fd1106, %fd1105; + @%p444 bra $L__BB2_247; + + setp.ne.s32 %p445, %r567, 1071644672; + and.pred %p446, %p445, %p38; + setp.gt.s32 %p447, %r78, -1; + selp.b32 %r572, 2146435072, 0, %p447; + mov.u32 %r573, 0; + or.b32 %r574, %r572, -2147483648; + selp.b32 %r575, %r574, %r572, %p446; + mov.b64 %fd1106, {%r573, %r575}; + +$L__BB2_247: + not.pred %p451, %p10; + mov.f64 %fd1108, %fd60; + @%p451 bra $L__BB2_249; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r581}, %fd60; + } + xor.b32 %r582, %r581, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r583, %temp}, %fd60; + } + mov.b64 %fd1108, {%r583, %r582}; + +$L__BB2_249: + setp.eq.f32 %p1285, %f99, 0f3F800000; + setp.eq.f32 %p452, %f81, 0f00000000; + selp.f64 %fd152, 0d3FF0000000000000, %fd1106, %p1285; + @%p452 bra $L__BB2_253; + bra.uni $L__BB2_250; + +$L__BB2_253: + mov.u32 %r584, 0; + selp.b32 %r586, %r76, 0, %p149; + or.b32 %r587, %r586, 2146435072; + selp.b32 %r588, %r587, %r586, %p152; + mov.b64 %fd1108, {%r584, %r588}; + bra.uni $L__BB2_254; + +$L__BB2_250: + setp.gt.s32 %p454, %r76, -1; + @%p454 bra $L__BB2_254; + + cvt.rzi.f64.f64 %fd738, %fd647; + setp.eq.f64 %p455, %fd738, 0d4008000000000000; + @%p455 bra $L__BB2_254; + + mov.f64 %fd1108, 0dFFF8000000000000; + +$L__BB2_254: + selp.f64 %fd1109, %fd1108, %fd61, %p172; + @%p23 bra $L__BB2_259; + + setp.eq.s32 %p459, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r589, %temp}, %fd647; + } + setp.eq.s32 %p460, %r589, 0; + and.pred %p461, %p459, %p460; + @%p461 bra $L__BB2_258; + bra.uni $L__BB2_256; + +$L__BB2_258: + mov.u32 %r596, 0; + mov.b64 %fd1109, {%r596, %r80}; + bra.uni $L__BB2_259; + +$L__BB2_256: + cvt.rn.f32.s32 %f2994, %r1370; + sub.f32 %f2993, %f2994, %f3278; + add.f32 %f2992, %f2993, 0f3F000000; + cvt.f64.f32 %fd1047, %f2992; + and.b32 %r590, %r76, 2147483647; + setp.ne.s32 %p462, %r590, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r591, %temp}, %fd1047; + } + setp.ne.s32 %p463, %r591, 0; + or.pred %p464, %p462, %p463; + mov.f64 %fd1109, %fd1108; + @%p464 bra $L__BB2_259; + + setp.ne.s32 %p465, %r58, 1071644672; + and.pred %p466, %p465, %p10; + selp.b32 %r594, %r63, %r62, %p466; + mov.u32 %r595, 0; + mov.b64 %fd1109, {%r595, %r594}; + +$L__BB2_259: + setp.eq.f32 %p467, %f81, 0f3F800000; + selp.f64 %fd742, 0d3FF0000000000000, %fd1109, %p467; + cvt.f64.f32 %fd743, %f197; + mul.f64 %fd159, %fd742, %fd743; + not.pred %p468, %p11; + mov.f64 %fd1111, %fd62; + @%p468 bra $L__BB2_261; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r597}, %fd62; + } + xor.b32 %r598, %r597, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r599, %temp}, %fd62; + } + mov.b64 %fd1111, {%r599, %r598}; + +$L__BB2_261: + setp.eq.f32 %p469, %f82, 0f00000000; + @%p469 bra $L__BB2_265; + bra.uni $L__BB2_262; + +$L__BB2_265: + mov.u32 %r600, 0; + selp.b32 %r602, %r79, 0, %p149; + or.b32 %r603, %r602, 2146435072; + selp.b32 %r604, %r603, %r602, %p152; + mov.b64 %fd1111, {%r600, %r604}; + bra.uni $L__BB2_266; + +$L__BB2_262: + setp.gt.s32 %p470, %r79, -1; + @%p470 bra $L__BB2_266; + + cvt.rzi.f64.f64 %fd745, %fd647; + setp.eq.f64 %p471, %fd745, 0d4008000000000000; + @%p471 bra $L__BB2_266; + + mov.f64 %fd1111, 0dFFF8000000000000; + +$L__BB2_266: + selp.f64 %fd1112, %fd1111, %fd63, %p177; + @%p24 bra $L__BB2_271; + + setp.eq.s32 %p475, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r605, %temp}, %fd647; + } + setp.eq.s32 %p476, %r605, 0; + and.pred %p477, %p475, %p476; + @%p477 bra $L__BB2_270; + bra.uni $L__BB2_268; + +$L__BB2_270: + mov.u32 %r612, 0; + mov.b64 %fd1112, {%r612, %r82}; + bra.uni $L__BB2_271; + +$L__BB2_268: + cvt.rn.f32.s32 %f2997, %r1370; + sub.f32 %f2996, %f2997, %f3278; + add.f32 %f2995, %f2996, 0fBF000000; + cvt.f64.f32 %fd1048, %f2995; + and.b32 %r606, %r79, 2147483647; + setp.ne.s32 %p478, %r606, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r607, %temp}, %fd1048; + } + setp.ne.s32 %p479, %r607, 0; + or.pred %p480, %p478, %p479; + mov.f64 %fd1112, %fd1111; + @%p480 bra $L__BB2_271; + + setp.ne.s32 %p481, %r58, 1071644672; + and.pred %p482, %p481, %p11; + selp.b32 %r610, %r63, %r62, %p482; + mov.u32 %r611, 0; + mov.b64 %fd1112, {%r611, %r610}; + +$L__BB2_271: + cvt.f64.f32 %fd1049, %f125; + cvt.rn.f32.s32 %f3005, %r1371; + mov.f32 %f3004, 0f3102E308; + mov.f32 %f3003, 0fBF317218; + mov.f32 %f3002, 0f35BFBE8E; + mov.f32 %f3001, 0f3F317200; + mov.f32 %f3000, 0f3DAAAABD; + mov.f32 %f2999, 0f3C4CAF63; + mov.f32 %f2998, 0f3B18F0FE; + setp.eq.f32 %p483, %f82, 0f3F800000; + selp.f64 %fd749, 0d3FF0000000000000, %fd1112, %p483; + cvt.f64.f32 %fd750, %f210; + mul.f64 %fd751, %fd749, %fd750; + sub.f64 %fd752, %fd159, %fd751; + div.rn.f64 %fd753, %fd43, %fd152; + mul.f64 %fd754, %fd753, %fd752; + mul.f64 %fd756, %fd754, %fd1049; + mov.f32 %f1355, 0fC0000000; + div.rn.f32 %f1356, %f1355, %f99; + mul.f32 %f1357, %f1356, %f211; + cvt.f64.f32 %fd757, %f1357; + sub.f64 %fd758, %fd757, %fd756; + cvt.rn.f32.f64 %f212, %fd758; + add.f32 %f1358, %f3005, 0f3F800000; + sub.f32 %f1359, %f1358, %f3277; + div.rn.f32 %f213, %f1359, %f101; + abs.f32 %f214, %f213; + setp.lt.f32 %p484, %f214, 0f00800000; + mul.f32 %f1360, %f214, 0f4B800000; + selp.f32 %f1361, %f1360, %f214, %p484; + selp.f32 %f1362, 0fC3170000, 0fC2FE0000, %p484; + mov.b32 %r613, %f1361; + and.b32 %r614, %r613, 8388607; + or.b32 %r615, %r614, 1065353216; + mov.b32 %f1363, %r615; + shr.u32 %r616, %r613, 23; + cvt.rn.f32.u32 %f1364, %r616; + add.f32 %f1365, %f1362, %f1364; + setp.gt.f32 %p485, %f1363, 0f3FB504F3; + mul.f32 %f1366, %f1363, 0f3F000000; + add.f32 %f1367, %f1365, 0f3F800000; + selp.f32 %f1368, %f1367, %f1365, %p485; + selp.f32 %f1369, %f1366, %f1363, %p485; + add.f32 %f1370, %f1369, 0fBF800000; + add.f32 %f1371, %f1369, 0f3F800000; + rcp.approx.ftz.f32 %f1372, %f1371; + add.f32 %f1373, %f1370, %f1370; + mul.f32 %f1375, %f1373, %f1372; + mul.f32 %f1376, %f1375, %f1375; + fma.rn.f32 %f1379, %f2998, %f1376, %f2999; + fma.rn.f32 %f1381, %f1379, %f1376, %f3000; + mul.rn.f32 %f1382, %f1381, %f1376; + mul.rn.f32 %f1383, %f1382, %f1375; + sub.f32 %f1384, %f1370, %f1375; + add.f32 %f1385, %f1384, %f1384; + neg.f32 %f1386, %f1375; + fma.rn.f32 %f1387, %f1386, %f1370, %f1385; + mul.rn.f32 %f1388, %f1372, %f1387; + add.f32 %f1389, %f1383, %f1375; + sub.f32 %f1390, %f1375, %f1389; + add.f32 %f1391, %f1383, %f1390; + add.f32 %f1392, %f1388, %f1391; + add.f32 %f1393, %f1389, %f1392; + sub.f32 %f1394, %f1389, %f1393; + add.f32 %f1395, %f1392, %f1394; + mul.rn.f32 %f1397, %f1368, %f3001; + mul.rn.f32 %f1399, %f1368, %f3002; + add.f32 %f1400, %f1397, %f1393; + sub.f32 %f1401, %f1397, %f1400; + add.f32 %f1402, %f1393, %f1401; + add.f32 %f1403, %f1395, %f1402; + add.f32 %f1404, %f1399, %f1403; + add.f32 %f1405, %f1400, %f1404; + sub.f32 %f1406, %f1400, %f1405; + add.f32 %f1407, %f1404, %f1406; + mul.rn.f32 %f1408, %f600, %f1405; + neg.f32 %f1409, %f1408; + fma.rn.f32 %f1410, %f600, %f1405, %f1409; + fma.rn.f32 %f1411, %f600, %f1407, %f1410; + fma.rn.f32 %f1413, %f3263, %f1405, %f1411; + add.rn.f32 %f1414, %f1408, %f1413; + neg.f32 %f1415, %f1414; + add.rn.f32 %f1416, %f1408, %f1415; + add.rn.f32 %f1417, %f1416, %f1413; + mov.b32 %r617, %f1414; + setp.eq.s32 %p486, %r617, 1118925336; + add.s32 %r618, %r617, -1; + mov.b32 %f1418, %r618; + add.f32 %f1419, %f1417, 0f37000000; + selp.f32 %f215, %f1419, %f1417, %p486; + selp.f32 %f1420, %f1418, %f1414, %p486; + mul.rn.f32 %f1422, %f1420, %f800; + cvt.rzi.f32.f32 %f1423, %f1422; + abs.f32 %f1424, %f1423; + setp.gt.f32 %p487, %f1424, 0f42FC0000; + mov.b32 %r619, %f1423; + and.b32 %r620, %r619, -2147483648; + or.b32 %r621, %r620, 1123811328; + mov.b32 %f1425, %r621; + selp.f32 %f1426, %f1425, %f1423, %p487; + fma.rn.f32 %f1428, %f1426, %f3003, %f1420; + fma.rn.f32 %f1430, %f1426, %f3004, %f1428; + mul.f32 %f1431, %f1430, 0f3FB8AA3B; + add.f32 %f1432, %f1426, 0f4B40007F; + mov.b32 %r622, %f1432; + shl.b32 %r623, %r622, 23; + mov.b32 %f1433, %r623; + ex2.approx.ftz.f32 %f1434, %f1431; + mul.f32 %f216, %f1434, %f1433; + setp.eq.f32 %p488, %f216, 0f7F800000; + mov.f32 %f3250, 0f7F800000; + @%p488 bra $L__BB2_273; + + fma.rn.f32 %f3250, %f216, %f215, %f216; + +$L__BB2_273: + setp.lt.f32 %p489, %f213, 0f00000000; + and.pred %p39, %p489, %p301; + setp.eq.f32 %p491, %f213, 0f00000000; + @%p491 bra $L__BB2_277; + bra.uni $L__BB2_274; + +$L__BB2_277: + add.f32 %f1439, %f213, %f213; + selp.f32 %f3252, %f1439, 0f00000000, %p301; + bra.uni $L__BB2_278; + +$L__BB2_274: + mov.b32 %r624, %f3250; + xor.b32 %r625, %r624, -2147483648; + mov.b32 %f1435, %r625; + selp.f32 %f3252, %f1435, %f3250, %p39; + setp.geu.f32 %p492, %f213, 0f00000000; + @%p492 bra $L__BB2_278; + + cvt.rzi.f32.f32 %f1437, %f600; + setp.eq.f32 %p493, %f1437, 0f40000000; + @%p493 bra $L__BB2_278; + + mov.f32 %f3252, 0f7FFFFFFF; + +$L__BB2_278: + abs.f32 %f3149, %f213; + add.f32 %f1440, %f3149, 0f40000000; + mov.b32 %r626, %f1440; + setp.lt.s32 %p495, %r626, 2139095040; + @%p495 bra $L__BB2_283; + + abs.f32 %f3151, %f213; + setp.gtu.f32 %p496, %f3151, 0f7F800000; + @%p496 bra $L__BB2_282; + bra.uni $L__BB2_280; + +$L__BB2_282: + add.f32 %f3252, %f213, 0f40000000; + bra.uni $L__BB2_283; + +$L__BB2_280: + abs.f32 %f3152, %f213; + setp.neu.f32 %p497, %f3152, 0f7F800000; + @%p497 bra $L__BB2_283; + + selp.f32 %f3252, 0fFF800000, 0f7F800000, %p39; + +$L__BB2_283: + mov.f32 %f3019, 0f32A57060; + mov.f32 %f3018, 0f4B400001; + mov.f32 %f3017, 0f437C0000; + mov.f32 %f3016, 0f3BBB989D; + mov.f32 %f3015, 0f3102E308; + mov.f32 %f3014, 0fBF317218; + mov.f32 %f3013, 0f35BFBE8E; + mov.f32 %f3012, 0f3F317200; + mov.f32 %f3011, 0f3DAAAABD; + mov.f32 %f3010, 0f3C4CAF63; + mov.f32 %f3009, 0f3B18F0FE; + cvt.rn.f32.s32 %f3008, %r1371; + sub.f32 %f3007, %f3008, %f3277; + mov.f32 %f3006, 0f3F000000; + mul.f32 %f1442, %f3252, 0fBF000000; + setp.eq.f32 %p498, %f213, 0f3F800000; + selp.f32 %f1443, 0fBF000000, %f1442, %p498; + fma.rn.f32 %f1446, %f1443, %f3016, %f3006; + cvt.sat.f32.f32 %f1449, %f1446; + fma.rm.f32 %f1451, %f1449, %f3017, %f3018; + add.f32 %f1452, %f1451, 0fCB40007F; + neg.f32 %f1453, %f1452; + fma.rn.f32 %f1454, %f1443, %f800, %f1453; + fma.rn.f32 %f1456, %f1443, %f3019, %f1454; + mov.b32 %r627, %f1451; + shl.b32 %r628, %r627, 23; + mov.b32 %f1457, %r628; + ex2.approx.ftz.f32 %f1458, %f1456; + mul.f32 %f225, %f1458, %f1457; + div.rn.f32 %f226, %f3007, %f101; + abs.f32 %f227, %f226; + setp.lt.f32 %p499, %f227, 0f00800000; + mul.f32 %f1459, %f227, 0f4B800000; + selp.f32 %f1460, %f1459, %f227, %p499; + selp.f32 %f1461, 0fC3170000, 0fC2FE0000, %p499; + mov.b32 %r629, %f1460; + and.b32 %r630, %r629, 8388607; + or.b32 %r631, %r630, 1065353216; + mov.b32 %f1462, %r631; + shr.u32 %r632, %r629, 23; + cvt.rn.f32.u32 %f1463, %r632; + add.f32 %f1464, %f1461, %f1463; + setp.gt.f32 %p500, %f1462, 0f3FB504F3; + mul.f32 %f1465, %f1462, 0f3F000000; + add.f32 %f1466, %f1464, 0f3F800000; + selp.f32 %f1467, %f1466, %f1464, %p500; + selp.f32 %f1468, %f1465, %f1462, %p500; + add.f32 %f1469, %f1468, 0fBF800000; + add.f32 %f1470, %f1468, 0f3F800000; + rcp.approx.ftz.f32 %f1471, %f1470; + add.f32 %f1472, %f1469, %f1469; + mul.f32 %f1474, %f1472, %f1471; + mul.f32 %f1475, %f1474, %f1474; + fma.rn.f32 %f1478, %f3009, %f1475, %f3010; + fma.rn.f32 %f1480, %f1478, %f1475, %f3011; + mul.rn.f32 %f1481, %f1480, %f1475; + mul.rn.f32 %f1482, %f1481, %f1474; + sub.f32 %f1483, %f1469, %f1474; + add.f32 %f1484, %f1483, %f1483; + neg.f32 %f1485, %f1474; + fma.rn.f32 %f1486, %f1485, %f1469, %f1484; + mul.rn.f32 %f1487, %f1471, %f1486; + add.f32 %f1488, %f1482, %f1474; + sub.f32 %f1489, %f1474, %f1488; + add.f32 %f1490, %f1482, %f1489; + add.f32 %f1491, %f1487, %f1490; + add.f32 %f1492, %f1488, %f1491; + sub.f32 %f1493, %f1488, %f1492; + add.f32 %f1494, %f1491, %f1493; + mul.rn.f32 %f1496, %f1467, %f3012; + mul.rn.f32 %f1498, %f1467, %f3013; + add.f32 %f1499, %f1496, %f1492; + sub.f32 %f1500, %f1496, %f1499; + add.f32 %f1501, %f1492, %f1500; + add.f32 %f1502, %f1494, %f1501; + add.f32 %f1503, %f1498, %f1502; + add.f32 %f1504, %f1499, %f1503; + sub.f32 %f1505, %f1499, %f1504; + add.f32 %f1506, %f1503, %f1505; + mul.rn.f32 %f1507, %f600, %f1504; + neg.f32 %f1508, %f1507; + fma.rn.f32 %f1509, %f600, %f1504, %f1508; + fma.rn.f32 %f1510, %f600, %f1506, %f1509; + fma.rn.f32 %f1512, %f3263, %f1504, %f1510; + add.rn.f32 %f1513, %f1507, %f1512; + neg.f32 %f1514, %f1513; + add.rn.f32 %f1515, %f1507, %f1514; + add.rn.f32 %f1516, %f1515, %f1512; + mov.b32 %r633, %f1513; + setp.eq.s32 %p501, %r633, 1118925336; + add.s32 %r634, %r633, -1; + mov.b32 %f1517, %r634; + add.f32 %f1518, %f1516, 0f37000000; + selp.f32 %f228, %f1518, %f1516, %p501; + selp.f32 %f1519, %f1517, %f1513, %p501; + mul.rn.f32 %f1520, %f1519, %f800; + cvt.rzi.f32.f32 %f1521, %f1520; + abs.f32 %f1522, %f1521; + setp.gt.f32 %p502, %f1522, 0f42FC0000; + mov.b32 %r635, %f1521; + and.b32 %r636, %r635, -2147483648; + or.b32 %r637, %r636, 1123811328; + mov.b32 %f1523, %r637; + selp.f32 %f1524, %f1523, %f1521, %p502; + fma.rn.f32 %f1526, %f1524, %f3014, %f1519; + fma.rn.f32 %f1528, %f1524, %f3015, %f1526; + mul.f32 %f1529, %f1528, 0f3FB8AA3B; + add.f32 %f1530, %f1524, 0f4B40007F; + mov.b32 %r638, %f1530; + shl.b32 %r639, %r638, 23; + mov.b32 %f1531, %r639; + ex2.approx.ftz.f32 %f1532, %f1529; + mul.f32 %f229, %f1532, %f1531; + setp.eq.f32 %p503, %f229, 0f7F800000; + mov.f32 %f3253, 0f7F800000; + @%p503 bra $L__BB2_285; + + fma.rn.f32 %f3253, %f229, %f228, %f229; + +$L__BB2_285: + setp.lt.f32 %p504, %f226, 0f00000000; + and.pred %p40, %p504, %p301; + setp.eq.f32 %p506, %f226, 0f00000000; + @%p506 bra $L__BB2_289; + bra.uni $L__BB2_286; + +$L__BB2_289: + add.f32 %f1537, %f226, %f226; + selp.f32 %f3255, %f1537, 0f00000000, %p301; + bra.uni $L__BB2_290; + +$L__BB2_286: + mov.b32 %r640, %f3253; + xor.b32 %r641, %r640, -2147483648; + mov.b32 %f1533, %r641; + selp.f32 %f3255, %f1533, %f3253, %p40; + setp.geu.f32 %p507, %f226, 0f00000000; + @%p507 bra $L__BB2_290; + + cvt.rzi.f32.f32 %f1535, %f600; + setp.eq.f32 %p508, %f1535, 0f40000000; + @%p508 bra $L__BB2_290; + + mov.f32 %f3255, 0f7FFFFFFF; + +$L__BB2_290: + abs.f32 %f3153, %f226; + add.f32 %f1538, %f3153, 0f40000000; + mov.b32 %r642, %f1538; + setp.lt.s32 %p510, %r642, 2139095040; + @%p510 bra $L__BB2_295; + + abs.f32 %f3154, %f226; + setp.gtu.f32 %p511, %f3154, 0f7F800000; + @%p511 bra $L__BB2_294; + bra.uni $L__BB2_292; + +$L__BB2_294: + add.f32 %f3255, %f226, 0f40000000; + bra.uni $L__BB2_295; + +$L__BB2_292: + abs.f32 %f3155, %f226; + setp.neu.f32 %p512, %f3155, 0f7F800000; + @%p512 bra $L__BB2_295; + + selp.f32 %f3255, 0fFF800000, 0f7F800000, %p40; + +$L__BB2_295: + cvt.f64.f32 %fd1051, %f101; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1346}, %fd1051; + } + setp.lt.s32 %p1286, %r1346, 0; + mov.f64 %fd1050, 0d4014000000000000; + mov.f32 %f3026, 0f32A57060; + mov.f32 %f3025, 0f4B400001; + mov.f32 %f3024, 0f437C0000; + mov.f32 %f3023, 0f3BBB989D; + cvt.rn.f32.s32 %f3022, %r1371; + sub.f32 %f3021, %f3022, %f3277; + mov.f32 %f3020, 0f3F000000; + mul.f32 %f1539, %f3255, 0fBF000000; + setp.eq.f32 %p514, %f226, 0f3F800000; + selp.f32 %f1540, 0fBF000000, %f1539, %p514; + fma.rn.f32 %f1543, %f1540, %f3023, %f3020; + cvt.sat.f32.f32 %f1546, %f1543; + fma.rm.f32 %f1548, %f1546, %f3024, %f3025; + add.f32 %f1549, %f1548, 0fCB40007F; + neg.f32 %f1550, %f1549; + fma.rn.f32 %f1551, %f1540, %f800, %f1550; + fma.rn.f32 %f1553, %f1540, %f3026, %f1551; + mov.b32 %r644, %f1548; + shl.b32 %r645, %r644, 23; + mov.b32 %f1554, %r645; + ex2.approx.ftz.f32 %f1555, %f1553; + mul.f32 %f238, %f1555, %f1554; + add.f32 %f1556, %f3021, 0f3F800000; + mul.f32 %f1557, %f1556, %f225; + mul.f32 %f1558, %f3021, %f238; + sub.f32 %f1559, %f1557, %f1558; + div.rn.f32 %f1560, %f182, %f101; + mul.f32 %f1561, %f1560, %f1559; + mul.f32 %f239, %f111, %f1561; + { // callseq 43, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd131; + .param .b64 param1; + st.param.f64 [param1+0], %fd1050; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1114, [retval0+0]; + } // callseq 43 + and.pred %p41, %p1286, %p428; + not.pred %p516, %p41; + @%p516 bra $L__BB2_297; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r646}, %fd1114; + } + xor.b32 %r647, %r646, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r648, %temp}, %fd1114; + } + mov.b64 %fd1114, {%r648, %r647}; + +$L__BB2_297: + setp.eq.f32 %p1287, %f101, 0f00000000; + @%p1287 bra $L__BB2_301; + bra.uni $L__BB2_298; + +$L__BB2_301: + setp.lt.s32 %p520, %r78, 0; + mov.u32 %r649, 0; + selp.b32 %r651, %r105, 0, %p428; + or.b32 %r652, %r651, 2146435072; + selp.b32 %r653, %r652, %r651, %p520; + mov.b64 %fd1114, {%r649, %r653}; + bra.uni $L__BB2_302; + +$L__BB2_298: + setp.gt.s32 %p518, %r105, -1; + @%p518 bra $L__BB2_302; + + mov.f64 %fd1068, 0d4014000000000000; + cvt.rzi.f64.f64 %fd761, %fd1068; + setp.eq.f64 %p519, %fd761, 0d4014000000000000; + @%p519 bra $L__BB2_302; + + mov.f64 %fd1114, 0dFFF8000000000000; + +$L__BB2_302: + add.f64 %fd173, %fd130, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r654}, %fd173; + } + and.b32 %r655, %r654, 2146435072; + setp.ne.s32 %p522, %r655, 2146435072; + mov.f64 %fd1115, %fd1114; + @%p522 bra $L__BB2_308; + + setp.gtu.f64 %p523, %fd131, 0d7FF0000000000000; + mov.f64 %fd1115, %fd173; + @%p523 bra $L__BB2_308; + + mov.f64 %fd1067, 0d4014000000000000; + and.b32 %r656, %r78, 2147483647; + setp.eq.s32 %p524, %r656, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r657, %temp}, %fd1067; + } + setp.eq.s32 %p525, %r657, 0; + and.pred %p526, %p524, %p525; + @%p526 bra $L__BB2_307; + bra.uni $L__BB2_305; + +$L__BB2_307: + setp.lt.s32 %p533, %r78, 0; + mov.u32 %r665, 0; + setp.gt.f64 %p534, %fd131, 0d3FF0000000000000; + selp.b32 %r666, 2146435072, 0, %p534; + xor.b32 %r667, %r666, 2146435072; + selp.b32 %r668, %r667, %r666, %p533; + setp.eq.f32 %p535, %f101, 0fBF800000; + selp.b32 %r669, 1072693248, %r668, %p535; + mov.b64 %fd1115, {%r665, %r669}; + bra.uni $L__BB2_308; + +$L__BB2_305: + { + .reg .b32 %temp; + mov.b64 {%r658, %temp}, %fd130; + } + and.b32 %r659, %r105, 2147483647; + setp.ne.s32 %p527, %r659, 2146435072; + setp.ne.s32 %p528, %r658, 0; + or.pred %p529, %p527, %p528; + mov.f64 %fd1115, %fd1114; + @%p529 bra $L__BB2_308; + + setp.ne.s32 %p530, %r656, 1071644672; + and.pred %p531, %p530, %p41; + setp.gt.s32 %p532, %r78, -1; + selp.b32 %r661, 2146435072, 0, %p532; + mov.u32 %r662, 0; + or.b32 %r663, %r661, -2147483648; + selp.b32 %r664, %r663, %r661, %p531; + mov.b64 %fd1115, {%r662, %r664}; + +$L__BB2_308: + cvt.f64.f32 %fd177, %f114; + { + .reg .b32 %temp; + mov.b64 {%temp, %r106}, %fd177; + } + abs.f64 %fd178, %fd177; + { // callseq 44, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd178; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1117, [retval0+0]; + } // callseq 44 + setp.lt.s32 %p536, %r106, 0; + and.pred %p42, %p536, %p149; + not.pred %p538, %p42; + @%p538 bra $L__BB2_310; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r670}, %fd1117; + } + xor.b32 %r671, %r670, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r672, %temp}, %fd1117; + } + mov.b64 %fd1117, {%r672, %r671}; + +$L__BB2_310: + setp.eq.f32 %p1288, %f101, 0f3F800000; + setp.eq.f32 %p539, %f114, 0f00000000; + selp.f64 %fd182, 0d3FF0000000000000, %fd1115, %p1288; + @%p539 bra $L__BB2_314; + bra.uni $L__BB2_311; + +$L__BB2_314: + mov.u32 %r673, 0; + selp.b32 %r674, %r106, 0, %p149; + or.b32 %r675, %r674, 2146435072; + selp.b32 %r676, %r675, %r674, %p152; + mov.b64 %fd1117, {%r673, %r676}; + bra.uni $L__BB2_315; + +$L__BB2_311: + setp.gt.s32 %p541, %r106, -1; + @%p541 bra $L__BB2_315; + + cvt.rzi.f64.f64 %fd766, %fd647; + setp.eq.f64 %p542, %fd766, 0d4008000000000000; + @%p542 bra $L__BB2_315; + + mov.f64 %fd1117, 0dFFF8000000000000; + +$L__BB2_315: + add.f64 %fd185, %fd177, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r677}, %fd185; + } + and.b32 %r678, %r677, 2146435072; + setp.ne.s32 %p545, %r678, 2146435072; + mov.f64 %fd1118, %fd1117; + @%p545 bra $L__BB2_321; + + setp.gtu.f64 %p546, %fd178, 0d7FF0000000000000; + mov.f64 %fd1118, %fd185; + @%p546 bra $L__BB2_321; + + setp.eq.s32 %p547, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r679, %temp}, %fd647; + } + setp.eq.s32 %p548, %r679, 0; + and.pred %p549, %p547, %p548; + @%p549 bra $L__BB2_320; + bra.uni $L__BB2_318; + +$L__BB2_320: + mov.u32 %r684, 0; + setp.gt.f64 %p556, %fd178, 0d3FF0000000000000; + selp.b32 %r685, 2146435072, 0, %p556; + xor.b32 %r686, %r685, 2146435072; + selp.b32 %r687, %r686, %r685, %p152; + setp.eq.f32 %p557, %f114, 0fBF800000; + selp.b32 %r688, 1072693248, %r687, %p557; + mov.b64 %fd1118, {%r684, %r688}; + bra.uni $L__BB2_321; + +$L__BB2_318: + { + .reg .b32 %temp; + mov.b64 {%r680, %temp}, %fd177; + } + and.b32 %r681, %r106, 2147483647; + setp.ne.s32 %p550, %r681, 2146435072; + setp.ne.s32 %p551, %r680, 0; + or.pred %p552, %p550, %p551; + mov.f64 %fd1118, %fd1117; + @%p552 bra $L__BB2_321; + + setp.ne.s32 %p553, %r58, 1071644672; + and.pred %p554, %p553, %p42; + selp.b32 %r682, %r63, %r62, %p554; + mov.u32 %r683, 0; + mov.b64 %fd1118, {%r683, %r682}; + +$L__BB2_321: + setp.eq.f32 %p558, %f114, 0f3F800000; + selp.f64 %fd769, 0d3FF0000000000000, %fd1118, %p558; + cvt.f64.f32 %fd770, %f225; + mul.f64 %fd189, %fd769, %fd770; + cvt.f64.f32 %fd190, %f120; + { + .reg .b32 %temp; + mov.b64 {%temp, %r107}, %fd190; + } + abs.f64 %fd191, %fd190; + { // callseq 45, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd191; + .param .b64 param1; + st.param.f64 [param1+0], %fd647; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1120, [retval0+0]; + } // callseq 45 + setp.lt.s32 %p559, %r107, 0; + and.pred %p43, %p559, %p149; + not.pred %p561, %p43; + @%p561 bra $L__BB2_323; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r689}, %fd1120; + } + xor.b32 %r690, %r689, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r691, %temp}, %fd1120; + } + mov.b64 %fd1120, {%r691, %r690}; + +$L__BB2_323: + setp.eq.f32 %p562, %f120, 0f00000000; + @%p562 bra $L__BB2_327; + bra.uni $L__BB2_324; + +$L__BB2_327: + mov.u32 %r692, 0; + selp.b32 %r693, %r107, 0, %p149; + or.b32 %r694, %r693, 2146435072; + selp.b32 %r695, %r694, %r693, %p152; + mov.b64 %fd1120, {%r692, %r695}; + bra.uni $L__BB2_328; + +$L__BB2_324: + setp.gt.s32 %p563, %r107, -1; + @%p563 bra $L__BB2_328; + + cvt.rzi.f64.f64 %fd773, %fd647; + setp.eq.f64 %p564, %fd773, 0d4008000000000000; + @%p564 bra $L__BB2_328; + + mov.f64 %fd1120, 0dFFF8000000000000; + +$L__BB2_328: + add.f64 %fd197, %fd190, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r696}, %fd197; + } + and.b32 %r697, %r696, 2146435072; + setp.ne.s32 %p567, %r697, 2146435072; + mov.f64 %fd1121, %fd1120; + @%p567 bra $L__BB2_334; + + setp.gtu.f64 %p568, %fd191, 0d7FF0000000000000; + mov.f64 %fd1121, %fd197; + @%p568 bra $L__BB2_334; + + setp.eq.s32 %p569, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r698, %temp}, %fd647; + } + setp.eq.s32 %p570, %r698, 0; + and.pred %p571, %p569, %p570; + @%p571 bra $L__BB2_333; + bra.uni $L__BB2_331; + +$L__BB2_333: + mov.u32 %r703, 0; + setp.gt.f64 %p578, %fd191, 0d3FF0000000000000; + selp.b32 %r704, 2146435072, 0, %p578; + xor.b32 %r705, %r704, 2146435072; + selp.b32 %r706, %r705, %r704, %p152; + setp.eq.f32 %p579, %f120, 0fBF800000; + selp.b32 %r707, 1072693248, %r706, %p579; + mov.b64 %fd1121, {%r703, %r707}; + bra.uni $L__BB2_334; + +$L__BB2_331: + { + .reg .b32 %temp; + mov.b64 {%r699, %temp}, %fd190; + } + and.b32 %r700, %r107, 2147483647; + setp.ne.s32 %p572, %r700, 2146435072; + setp.ne.s32 %p573, %r699, 0; + or.pred %p574, %p572, %p573; + mov.f64 %fd1121, %fd1120; + @%p574 bra $L__BB2_334; + + setp.ne.s32 %p575, %r58, 1071644672; + and.pred %p576, %p575, %p43; + selp.b32 %r701, %r63, %r62, %p576; + mov.u32 %r702, 0; + mov.b64 %fd1121, {%r702, %r701}; + +$L__BB2_334: + mov.f32 %f3150, 0fC0000000; + cvt.f64.f32 %fd1052, %f111; + setp.eq.f32 %p580, %f120, 0f3F800000; + selp.f64 %fd776, 0d3FF0000000000000, %fd1121, %p580; + cvt.f64.f32 %fd777, %f238; + mul.f64 %fd778, %fd776, %fd777; + sub.f64 %fd779, %fd189, %fd778; + div.rn.f64 %fd780, %fd43, %fd182; + mul.f64 %fd781, %fd780, %fd779; + mul.f64 %fd783, %fd781, %fd1052; + div.rn.f32 %f1563, %f3150, %f101; + mul.f32 %f1564, %f1563, %f239; + cvt.f64.f32 %fd784, %f1564; + sub.f64 %fd201, %fd784, %fd783; + div.rn.f32 %f240, %f51, %f98; + div.rn.f32 %f241, %f52, %f100; + not.pred %p581, %p12; + mov.f64 %fd1123, %fd64; + @%p581 bra $L__BB2_336; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r708}, %fd64; + } + xor.b32 %r709, %r708, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r710, %temp}, %fd64; + } + mov.b64 %fd1123, {%r710, %r709}; + +$L__BB2_336: + sub.f32 %f3027, %f3274, %f540; + setp.eq.f32 %p582, %f3027, 0f00000000; + @%p582 bra $L__BB2_340; + bra.uni $L__BB2_337; + +$L__BB2_340: + mov.u32 %r711, 0; + mov.b64 %fd1123, {%r711, %r84}; + bra.uni $L__BB2_341; + +$L__BB2_337: + setp.gt.s32 %p583, %r83, -1; + @%p583 bra $L__BB2_341; + + cvt.rzi.f64.f64 %fd786, %fd644; + setp.eq.f64 %p584, %fd786, 0d4000000000000000; + @%p584 bra $L__BB2_341; + + mov.f64 %fd1123, 0dFFF8000000000000; + +$L__BB2_341: + selp.f64 %fd1124, %fd1123, %fd46, %p182; + @%p25 bra $L__BB2_346; + + setp.eq.s32 %p586, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r712, %temp}, %fd644; + } + setp.eq.s32 %p587, %r712, 0; + and.pred %p588, %p586, %p587; + @%p588 bra $L__BB2_345; + bra.uni $L__BB2_343; + +$L__BB2_345: + mov.u32 %r716, 0; + mov.b64 %fd1124, {%r716, %r86}; + bra.uni $L__BB2_346; + +$L__BB2_343: + and.b32 %r713, %r83, 2147483647; + setp.ne.s32 %p589, %r713, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r714, %temp}, %fd45; + } + setp.ne.s32 %p590, %r714, 0; + or.pred %p591, %p589, %p590; + mov.f64 %fd1124, %fd1123; + @%p591 bra $L__BB2_346; + + mov.u32 %r715, 0; + mov.b64 %fd1124, {%r715, %r88}; + +$L__BB2_346: + cvt.f64.f32 %fd210, %f54; + not.pred %p592, %p13; + mov.f64 %fd1126, %fd65; + @%p592 bra $L__BB2_348; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r717}, %fd65; + } + xor.b32 %r718, %r717, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r719, %temp}, %fd65; + } + mov.b64 %fd1126, {%r719, %r718}; + +$L__BB2_348: + @%p582 bra $L__BB2_352; + bra.uni $L__BB2_349; + +$L__BB2_352: + mov.u32 %r720, 0; + selp.b32 %r722, %r83, 0, %p149; + or.b32 %r723, %r722, 2146435072; + selp.b32 %r724, %r723, %r722, %p152; + mov.b64 %fd1126, {%r720, %r724}; + bra.uni $L__BB2_353; + +$L__BB2_349: + setp.gt.s32 %p594, %r83, -1; + @%p594 bra $L__BB2_353; + + cvt.rzi.f64.f64 %fd790, %fd647; + setp.eq.f64 %p595, %fd790, 0d4008000000000000; + @%p595 bra $L__BB2_353; + + mov.f64 %fd1126, 0dFFF8000000000000; + +$L__BB2_353: + selp.f64 %fd1127, %fd1126, %fd47, %p186; + @%p26 bra $L__BB2_358; + + setp.eq.s32 %p599, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r725, %temp}, %fd647; + } + setp.eq.s32 %p600, %r725, 0; + and.pred %p601, %p599, %p600; + @%p601 bra $L__BB2_357; + bra.uni $L__BB2_355; + +$L__BB2_357: + mov.u32 %r732, 0; + mov.b64 %fd1127, {%r732, %r90}; + bra.uni $L__BB2_358; + +$L__BB2_355: + and.b32 %r726, %r83, 2147483647; + setp.ne.s32 %p602, %r726, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r727, %temp}, %fd45; + } + setp.ne.s32 %p603, %r727, 0; + or.pred %p604, %p602, %p603; + mov.f64 %fd1127, %fd1126; + @%p604 bra $L__BB2_358; + + setp.ne.s32 %p605, %r58, 1071644672; + and.pred %p606, %p605, %p13; + selp.b32 %r730, %r63, %r62, %p606; + mov.u32 %r731, 0; + mov.b64 %fd1127, {%r731, %r730}; + +$L__BB2_358: + not.pred %p607, %p14; + mov.f64 %fd1129, %fd66; + @%p607 bra $L__BB2_360; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r733}, %fd66; + } + xor.b32 %r734, %r733, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r735, %temp}, %fd66; + } + mov.b64 %fd1129, {%r735, %r734}; + +$L__BB2_360: + setp.eq.f32 %p608, %f541, 0f00000000; + @%p608 bra $L__BB2_364; + bra.uni $L__BB2_361; + +$L__BB2_364: + mov.u32 %r736, 0; + mov.b64 %fd1129, {%r736, %r91}; + bra.uni $L__BB2_365; + +$L__BB2_361: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1347}, %fd643; + } + setp.gt.s32 %p609, %r1347, -1; + @%p609 bra $L__BB2_365; + + cvt.rzi.f64.f64 %fd794, %fd649; + setp.eq.f64 %p610, %fd794, 0d4010000000000000; + @%p610 bra $L__BB2_365; + + mov.f64 %fd1129, 0dFFF8000000000000; + +$L__BB2_365: + selp.f64 %fd1130, %fd1129, %fd36, %p189; + @%p27 bra $L__BB2_370; + + setp.eq.s32 %p612, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r737, %temp}, %fd649; + } + setp.eq.s32 %p613, %r737, 0; + and.pred %p614, %p612, %p613; + @%p614 bra $L__BB2_369; + bra.uni $L__BB2_367; + +$L__BB2_369: + mov.u32 %r741, 0; + mov.b64 %fd1130, {%r741, %r94}; + bra.uni $L__BB2_370; + +$L__BB2_367: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1348}, %fd643; + } + and.b32 %r738, %r1348, 2147483647; + setp.ne.s32 %p615, %r738, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r739, %temp}, %fd643; + } + setp.ne.s32 %p616, %r739, 0; + or.pred %p617, %p615, %p616; + mov.f64 %fd1130, %fd1129; + @%p617 bra $L__BB2_370; + + mov.u32 %r740, 0; + mov.b64 %fd1130, {%r740, %r97}; + +$L__BB2_370: + sub.f32 %f3028, %f3274, %f540; + setp.eq.f32 %p618, %f541, 0f3F800000; + selp.f64 %fd798, 0d3FF0000000000000, %fd1130, %p618; + setp.eq.f32 %p619, %f3028, 0f3F800000; + selp.f64 %fd799, 0d3FF0000000000000, %fd1127, %p619; + mul.f64 %fd800, %fd799, %fd35; + div.rn.f64 %fd801, %fd800, %fd798; + selp.f64 %fd802, 0d3FF0000000000000, %fd1124, %p619; + mul.f64 %fd803, %fd802, %fd34; + div.rn.f64 %fd804, %fd803, %fd210; + add.f64 %fd805, %fd804, %fd44; + add.f64 %fd806, %fd805, %fd801; + cvt.rn.f32.f64 %f242, %fd806; + mul.f32 %f243, %f240, %f242; + not.pred %p620, %p15; + mov.f64 %fd1132, %fd67; + @%p620 bra $L__BB2_372; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r742}, %fd67; + } + xor.b32 %r743, %r742, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r744, %temp}, %fd67; + } + mov.b64 %fd1132, {%r744, %r743}; + +$L__BB2_372: + add.f32 %f3029, %f3274, %f540; + setp.eq.f32 %p621, %f3029, 0f00000000; + @%p621 bra $L__BB2_376; + bra.uni $L__BB2_373; + +$L__BB2_376: + mov.u32 %r745, 0; + mov.b64 %fd1132, {%r745, %r95}; + bra.uni $L__BB2_377; + +$L__BB2_373: + setp.gt.s32 %p622, %r93, -1; + @%p622 bra $L__BB2_377; + + cvt.rzi.f64.f64 %fd808, %fd644; + setp.eq.f64 %p623, %fd808, 0d4000000000000000; + @%p623 bra $L__BB2_377; + + mov.f64 %fd1132, 0dFFF8000000000000; + +$L__BB2_377: + selp.f64 %fd1133, %fd1132, %fd50, %p194; + @%p28 bra $L__BB2_382; + + setp.eq.s32 %p625, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r746, %temp}, %fd644; + } + setp.eq.s32 %p626, %r746, 0; + and.pred %p627, %p625, %p626; + @%p627 bra $L__BB2_381; + bra.uni $L__BB2_379; + +$L__BB2_381: + mov.u32 %r750, 0; + mov.b64 %fd1133, {%r750, %r98}; + bra.uni $L__BB2_382; + +$L__BB2_379: + and.b32 %r747, %r93, 2147483647; + setp.ne.s32 %p628, %r747, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r748, %temp}, %fd49; + } + setp.ne.s32 %p629, %r748, 0; + or.pred %p630, %p628, %p629; + mov.f64 %fd1133, %fd1132; + @%p630 bra $L__BB2_382; + + mov.u32 %r749, 0; + mov.b64 %fd1133, {%r749, %r100}; + +$L__BB2_382: + add.f32 %f3030, %f3274, %f540; + setp.eq.f32 %p631, %f3030, 0f3F800000; + selp.f64 %fd811, 0d3FF0000000000000, %fd1133, %p631; + mul.f64 %fd812, %fd811, %fd37; + div.rn.f64 %fd235, %fd812, %fd210; + not.pred %p632, %p16; + mov.f64 %fd1135, %fd68; + @%p632 bra $L__BB2_384; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r751}, %fd68; + } + xor.b32 %r752, %r751, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r753, %temp}, %fd68; + } + mov.b64 %fd1135, {%r753, %r752}; + +$L__BB2_384: + @%p621 bra $L__BB2_388; + bra.uni $L__BB2_385; + +$L__BB2_388: + mov.u32 %r754, 0; + selp.b32 %r756, %r93, 0, %p149; + or.b32 %r757, %r756, 2146435072; + selp.b32 %r758, %r757, %r756, %p152; + mov.b64 %fd1135, {%r754, %r758}; + bra.uni $L__BB2_389; + +$L__BB2_385: + setp.gt.s32 %p634, %r93, -1; + @%p634 bra $L__BB2_389; + + cvt.rzi.f64.f64 %fd814, %fd647; + setp.eq.f64 %p635, %fd814, 0d4008000000000000; + @%p635 bra $L__BB2_389; + + mov.f64 %fd1135, 0dFFF8000000000000; + +$L__BB2_389: + selp.f64 %fd1136, %fd1135, %fd51, %p199; + @%p29 bra $L__BB2_394; + + setp.eq.s32 %p639, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r759, %temp}, %fd647; + } + setp.eq.s32 %p640, %r759, 0; + and.pred %p641, %p639, %p640; + @%p641 bra $L__BB2_393; + bra.uni $L__BB2_391; + +$L__BB2_393: + mov.u32 %r766, 0; + mov.b64 %fd1136, {%r766, %r101}; + bra.uni $L__BB2_394; + +$L__BB2_391: + and.b32 %r760, %r93, 2147483647; + setp.ne.s32 %p642, %r760, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r761, %temp}, %fd49; + } + setp.ne.s32 %p643, %r761, 0; + or.pred %p644, %p642, %p643; + mov.f64 %fd1136, %fd1135; + @%p644 bra $L__BB2_394; + + setp.ne.s32 %p645, %r58, 1071644672; + and.pred %p646, %p645, %p16; + selp.b32 %r764, %r63, %r62, %p646; + mov.u32 %r765, 0; + mov.b64 %fd1136, {%r765, %r764}; + +$L__BB2_394: + mov.f64 %fd1138, %fd66; + @%p607 bra $L__BB2_396; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r767}, %fd66; + } + xor.b32 %r768, %r767, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r769, %temp}, %fd66; + } + mov.b64 %fd1138, {%r769, %r768}; + +$L__BB2_396: + @%p608 bra $L__BB2_400; + bra.uni $L__BB2_397; + +$L__BB2_400: + mov.u32 %r770, 0; + mov.b64 %fd1138, {%r770, %r91}; + bra.uni $L__BB2_401; + +$L__BB2_397: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1349}, %fd643; + } + setp.gt.s32 %p649, %r1349, -1; + @%p649 bra $L__BB2_401; + + cvt.rzi.f64.f64 %fd818, %fd649; + setp.eq.f64 %p650, %fd818, 0d4010000000000000; + @%p650 bra $L__BB2_401; + + mov.f64 %fd1138, 0dFFF8000000000000; + +$L__BB2_401: + selp.f64 %fd1139, %fd1138, %fd36, %p189; + @%p27 bra $L__BB2_406; + + setp.eq.s32 %p652, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r771, %temp}, %fd649; + } + setp.eq.s32 %p653, %r771, 0; + and.pred %p654, %p652, %p653; + @%p654 bra $L__BB2_405; + bra.uni $L__BB2_403; + +$L__BB2_405: + mov.u32 %r775, 0; + mov.b64 %fd1139, {%r775, %r94}; + bra.uni $L__BB2_406; + +$L__BB2_403: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1350}, %fd643; + } + and.b32 %r772, %r1350, 2147483647; + setp.ne.s32 %p655, %r772, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r773, %temp}, %fd643; + } + setp.ne.s32 %p656, %r773, 0; + or.pred %p657, %p655, %p656; + mov.f64 %fd1139, %fd1138; + @%p657 bra $L__BB2_406; + + mov.u32 %r774, 0; + mov.b64 %fd1139, {%r774, %r97}; + +$L__BB2_406: + mov.f32 %f3038, 0f3FC00000; + mov.f32 %f3037, 0f3102E308; + mov.f32 %f3036, 0fBF317218; + mov.f32 %f3035, 0f35BFBE8E; + mov.f32 %f3034, 0f3F317200; + mov.f32 %f3033, 0f3DAAAABD; + mov.f32 %f3032, 0f3C4CAF63; + mov.f32 %f3031, 0f3B18F0FE; + selp.f64 %fd822, 0d3FF0000000000000, %fd1139, %p618; + selp.f64 %fd823, 0d3FF0000000000000, %fd1136, %p631; + mul.f64 %fd824, %fd823, %fd38; + div.rn.f64 %fd825, %fd824, %fd822; + add.f64 %fd826, %fd235, %fd48; + add.f64 %fd827, %fd826, %fd825; + cvt.rn.f32.f64 %f244, %fd827; + mul.f32 %f245, %f241, %f244; + mul.f32 %f1566, %f239, %f245; + fma.rn.f32 %f246, %f211, %f243, %f1566; + abs.f32 %f247, %f96; + setp.lt.f32 %p660, %f247, 0f00800000; + mul.f32 %f1567, %f247, 0f4B800000; + selp.f32 %f1568, %f1567, %f247, %p660; + selp.f32 %f1569, 0fC3170000, 0fC2FE0000, %p660; + mov.b32 %r776, %f1568; + and.b32 %r777, %r776, 8388607; + or.b32 %r778, %r777, 1065353216; + mov.b32 %f1570, %r778; + shr.u32 %r779, %r776, 23; + cvt.rn.f32.u32 %f1571, %r779; + add.f32 %f1572, %f1569, %f1571; + setp.gt.f32 %p661, %f1570, 0f3FB504F3; + mul.f32 %f1573, %f1570, 0f3F000000; + add.f32 %f1574, %f1572, 0f3F800000; + selp.f32 %f1575, %f1574, %f1572, %p661; + selp.f32 %f1576, %f1573, %f1570, %p661; + add.f32 %f1577, %f1576, 0fBF800000; + add.f32 %f1578, %f1576, 0f3F800000; + rcp.approx.ftz.f32 %f1579, %f1578; + add.f32 %f1580, %f1577, %f1577; + mul.f32 %f1581, %f1580, %f1579; + mul.f32 %f1582, %f1581, %f1581; + fma.rn.f32 %f1585, %f3031, %f1582, %f3032; + fma.rn.f32 %f1587, %f1585, %f1582, %f3033; + mul.rn.f32 %f1588, %f1587, %f1582; + mul.rn.f32 %f1589, %f1588, %f1581; + sub.f32 %f1590, %f1577, %f1581; + add.f32 %f1591, %f1590, %f1590; + neg.f32 %f1592, %f1581; + fma.rn.f32 %f1593, %f1592, %f1577, %f1591; + mul.rn.f32 %f1594, %f1579, %f1593; + add.f32 %f1595, %f1589, %f1581; + sub.f32 %f1596, %f1581, %f1595; + add.f32 %f1597, %f1589, %f1596; + add.f32 %f1598, %f1594, %f1597; + add.f32 %f1599, %f1595, %f1598; + sub.f32 %f1600, %f1595, %f1599; add.f32 %f1601, %f1598, %f1600; - mul.rn.f32 %f1603, %f886, %f1599; - neg.f32 %f1604, %f1603; - fma.rn.f32 %f1605, %f886, %f1599, %f1604; - fma.rn.f32 %f1606, %f886, %f1601, %f1605; - fma.rn.f32 %f1608, %f3334, %f1599, %f1606; - add.rn.f32 %f1609, %f1603, %f1608; - neg.f32 %f1610, %f1609; - add.rn.f32 %f1611, %f1603, %f1610; - add.rn.f32 %f1612, %f1611, %f1608; - mov.b32 %r180, %f1609; - setp.eq.s32 %p153, %r180, 1118925336; - add.s32 %r181, %r180, -1; - mov.b32 %f1613, %r181; - add.f32 %f1614, %f1612, 0f37000000; - selp.f32 %f1615, %f1613, %f1609, %p153; - selp.f32 %f293, %f1614, %f1612, %p153; - mul.f32 %f1616, %f1615, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1617, %f1616; - fma.rn.f32 %f1618, %f1617, %f3115, %f1615; - fma.rn.f32 %f1619, %f1617, %f3116, %f1618; - mul.f32 %f1620, %f1619, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1621, %f1620; - add.f32 %f1622, %f1617, 0f00000000; - ex2.approx.f32 %f1623, %f1622; - mul.f32 %f1624, %f1621, %f1623; - setp.lt.f32 %p154, %f1615, 0fC2D20000; - selp.f32 %f1625, 0f00000000, %f1624, %p154; - setp.gt.f32 %p155, %f1615, 0f42D20000; - selp.f32 %f3321, 0f7F800000, %f1625, %p155; - setp.eq.f32 %p156, %f3321, 0f7F800000; - @%p156 bra BB2_89; - - fma.rn.f32 %f3321, %f3321, %f293, %f3321; - -BB2_89: - setp.eq.f32 %p369, %f152, 0f00000000; - setp.geu.f32 %p368, %f152, 0f00000000; - mov.b32 %r182, %f3321; - xor.b32 %r183, %r182, -2147483648; - mov.b32 %f1626, %r183; - selp.f32 %f297, %f1626, %f3321, %p4; - selp.f32 %f3322, %f159, %f297, %p369; - @%p368 bra BB2_91; - - cvt.rzi.f32.f32 %f1628, %f886; - setp.neu.f32 %p158, %f1628, 0f40000000; - selp.f32 %f3322, 0f7FFFFFFF, %f297, %p158; - -BB2_91: - abs.f32 %f3137, %f152; - cvt.rn.f32.s32 %f3136, %r319; - sub.f32 %f3135, %f3136, %f3349; - mul.f32 %f3134, %f3135, %f3135; - mul.f32 %f3133, %f3135, %f3134; - add.f32 %f3132, %f3135, 0f3F800000; - setp.eq.f32 %p373, %f152, 0f3F800000; - add.f32 %f3131, %f3137, 0f40000000; - mov.b32 %r294, %f3131; - setp.gt.s32 %p372, %r294, 2139095039; - setp.neu.f32 %p371, %f3137, 0f7F800000; - selp.f32 %f3130, 0fFF800000, 0f7F800000, %p4; - setp.gtu.f32 %p370, %f3137, 0f7F800000; - add.f32 %f3129, %f152, 0f40000000; - mov.f32 %f3128, 0f3DAAAABD; - mov.f32 %f3127, 0f3C4CAF63; - mov.f32 %f3126, 0f3B18F0FE; - mov.f32 %f3125, 0fB5BFBE8E; - mov.f32 %f3124, 0fBF317200; - selp.f32 %f1632, %f3129, %f3322, %p370; - selp.f32 %f1634, %f1632, %f3130, %p371; - selp.f32 %f1635, %f1634, %f3322, %p372; - mul.f32 %f1636, %f1635, 0fBF000000; - selp.f32 %f1637, 0fBF000000, %f1636, %p373; + mul.rn.f32 %f1603, %f1575, %f3034; + mul.rn.f32 %f1605, %f1575, %f3035; + add.f32 %f1606, %f1603, %f1599; + sub.f32 %f1607, %f1603, %f1606; + add.f32 %f1608, %f1599, %f1607; + add.f32 %f1609, %f1601, %f1608; + add.f32 %f1610, %f1605, %f1609; + add.f32 %f1611, %f1606, %f1610; + sub.f32 %f1612, %f1606, %f1611; + add.f32 %f1613, %f1610, %f1612; + mul.rn.f32 %f1615, %f3038, %f1611; + neg.f32 %f1616, %f1615; + fma.rn.f32 %f1617, %f3038, %f1611, %f1616; + fma.rn.f32 %f1618, %f3038, %f1613, %f1617; + fma.rn.f32 %f1620, %f3263, %f1611, %f1618; + add.rn.f32 %f1621, %f1615, %f1620; + neg.f32 %f1622, %f1621; + add.rn.f32 %f1623, %f1615, %f1622; + add.rn.f32 %f1624, %f1623, %f1620; + mov.b32 %r780, %f1621; + setp.eq.s32 %p662, %r780, 1118925336; + add.s32 %r781, %r780, -1; + mov.b32 %f1625, %r781; + add.f32 %f1626, %f1624, 0f37000000; + selp.f32 %f248, %f1626, %f1624, %p662; + selp.f32 %f1627, %f1625, %f1621, %p662; + mul.rn.f32 %f1629, %f1627, %f800; + cvt.rzi.f32.f32 %f1630, %f1629; + abs.f32 %f1631, %f1630; + setp.gt.f32 %p663, %f1631, 0f42FC0000; + mov.b32 %r782, %f1630; + and.b32 %r783, %r782, -2147483648; + or.b32 %r784, %r783, 1123811328; + mov.b32 %f1632, %r784; + selp.f32 %f1633, %f1632, %f1630, %p663; + fma.rn.f32 %f1635, %f1633, %f3036, %f1627; + fma.rn.f32 %f1637, %f1633, %f3037, %f1635; mul.f32 %f1638, %f1637, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1639, %f1638; - fma.rn.f32 %f1641, %f1639, %f3124, %f1637; - fma.rn.f32 %f1643, %f1639, %f3125, %f1641; - mul.f32 %f1644, %f1643, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1645, %f1644; - add.f32 %f1646, %f1639, 0f00000000; - ex2.approx.f32 %f1647, %f1646; - mul.f32 %f1648, %f1645, %f1647; - setp.lt.f32 %p163, %f1637, 0fC2D20000; - selp.f32 %f1649, 0f00000000, %f1648, %p163; - setp.gt.f32 %p164, %f1637, 0f42D20000; - selp.f32 %f1650, 0f7F800000, %f1649, %p164; - mul.f32 %f1651, %f3135, %f1650; - mul.f32 %f1652, %f3132, %f292; - sub.f32 %f1653, %f1652, %f1651; - mul.f32 %f1654, %f162, %f1653; - mul.f32 %f301, %f224, %f1654; - mul.f32 %f1655, %f163, %f301; - mul.f32 %f1656, %f1650, %f3133; - mul.f32 %f1657, %f292, %f191; - sub.f32 %f1658, %f1657, %f1656; - mul.f32 %f1659, %f190, %f1658; - mul.f32 %f1660, %f224, %f1659; - sub.f32 %f302, %f1655, %f1660; - // inline asm - rcp.approx.ftz.f32 %f1629,%f1289; - // inline asm - mul.f32 %f1661, %f1629, %f249; - mul.f32 %f1662, %f1661, %f1661; - fma.rn.f32 %f1665, %f3126, %f1662, %f3127; - fma.rn.f32 %f1667, %f1665, %f1662, %f3128; - mul.rn.f32 %f1668, %f1667, %f1662; - mul.rn.f32 %f1669, %f1668, %f1661; - sub.f32 %f1670, %f247, %f1661; - neg.f32 %f1671, %f1661; - add.f32 %f1672, %f1670, %f1670; - fma.rn.f32 %f1673, %f1671, %f247, %f1672; - mul.rn.f32 %f1674, %f1629, %f1673; - add.f32 %f1675, %f1669, %f1661; - sub.f32 %f1676, %f1661, %f1675; - add.f32 %f1677, %f1669, %f1676; - add.f32 %f1678, %f1674, %f1677; - add.f32 %f1679, %f1675, %f1678; - sub.f32 %f1680, %f1675, %f1679; - add.f32 %f1681, %f1678, %f1680; - add.f32 %f1682, %f250, %f1679; - sub.f32 %f1683, %f250, %f1682; - add.f32 %f1684, %f1679, %f1683; - add.f32 %f1685, %f1681, %f1684; - add.f32 %f1686, %f251, %f1685; - add.f32 %f1687, %f1682, %f1686; - sub.f32 %f1688, %f1682, %f1687; - add.f32 %f1689, %f1686, %f1688; - mul.rn.f32 %f1691, %f886, %f1687; - neg.f32 %f1692, %f1691; - fma.rn.f32 %f1693, %f886, %f1687, %f1692; - fma.rn.f32 %f1694, %f886, %f1689, %f1693; - fma.rn.f32 %f1696, %f3334, %f1687, %f1694; - add.rn.f32 %f1697, %f1691, %f1696; - neg.f32 %f1698, %f1697; - add.rn.f32 %f1699, %f1691, %f1698; - add.rn.f32 %f1700, %f1699, %f1696; - mov.b32 %r184, %f1697; - setp.eq.s32 %p165, %r184, 1118925336; - add.s32 %r185, %r184, -1; - mov.b32 %f1701, %r185; - add.f32 %f1702, %f1700, 0f37000000; - selp.f32 %f1703, %f1701, %f1697, %p165; - selp.f32 %f303, %f1702, %f1700, %p165; - mul.f32 %f1704, %f1703, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1705, %f1704; - fma.rn.f32 %f1706, %f1705, %f3124, %f1703; - fma.rn.f32 %f1707, %f1705, %f3125, %f1706; - mul.f32 %f1708, %f1707, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1709, %f1708; - add.f32 %f1710, %f1705, 0f00000000; - ex2.approx.f32 %f1711, %f1710; - mul.f32 %f1712, %f1709, %f1711; - setp.lt.f32 %p166, %f1703, 0fC2D20000; - selp.f32 %f1713, 0f00000000, %f1712, %p166; - setp.gt.f32 %p167, %f1703, 0f42D20000; - selp.f32 %f3323, 0f7F800000, %f1713, %p167; - setp.eq.f32 %p168, %f3323, 0f7F800000; - @%p168 bra BB2_93; - - fma.rn.f32 %f3323, %f3323, %f303, %f3323; - -BB2_93: - setp.eq.f32 %p374, %f245, 0f00000000; - mov.b32 %r186, %f3323; - xor.b32 %r187, %r186, -2147483648; - mov.b32 %f1714, %r187; - selp.f32 %f3325, %f1714, %f3323, %p9; - @%p374 bra BB2_96; - bra.uni BB2_94; - -BB2_96: - add.f32 %f1717, %f245, %f245; - selp.f32 %f3325, %f1717, 0f00000000, %p59; - bra.uni BB2_97; - -BB2_94: - setp.geu.f32 %p170, %f245, 0f00000000; - @%p170 bra BB2_97; - - cvt.rzi.f32.f32 %f1716, %f886; - setp.neu.f32 %p171, %f1716, 0f40000000; - selp.f32 %f3325, 0f7FFFFFFF, %f3325, %p171; - -BB2_97: - abs.f32 %f3139, %f245; - add.f32 %f3138, %f3139, 0f40000000; - mov.b32 %r295, %f3138; - setp.lt.s32 %p375, %r295, 2139095040; - @%p375 bra BB2_102; - - abs.f32 %f3217, %f245; - setp.gtu.f32 %p174, %f3217, 0f7F800000; - @%p174 bra BB2_101; - bra.uni BB2_99; - -BB2_101: - add.f32 %f3325, %f245, 0f40000000; - bra.uni BB2_102; - -BB2_99: - abs.f32 %f3218, %f245; - setp.neu.f32 %p175, %f3218, 0f7F800000; - @%p175 bra BB2_102; - - selp.f32 %f3325, 0fFF800000, 0f7F800000, %p9; - -BB2_102: - setp.eq.f32 %p376, %f245, 0f3F800000; - mov.f32 %f3144, 0f3DAAAABD; - mov.f32 %f3143, 0f3C4CAF63; - mov.f32 %f3142, 0f3B18F0FE; - mov.f32 %f3141, 0fB5BFBE8E; - mov.f32 %f3140, 0fBF317200; - mul.f32 %f1720, %f3325, 0fBF000000; - selp.f32 %f1721, 0fBF000000, %f1720, %p376; - mul.f32 %f1722, %f1721, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1723, %f1722; - fma.rn.f32 %f1725, %f1723, %f3140, %f1721; - fma.rn.f32 %f1727, %f1723, %f3141, %f1725; - mul.f32 %f1728, %f1727, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1729, %f1728; - add.f32 %f1730, %f1723, 0f00000000; - ex2.approx.f32 %f1731, %f1730; - mul.f32 %f1732, %f1729, %f1731; - setp.lt.f32 %p177, %f1721, 0fC2D20000; - selp.f32 %f1733, 0f00000000, %f1732, %p177; - setp.gt.f32 %p178, %f1721, 0f42D20000; - selp.f32 %f314, 0f7F800000, %f1733, %p178; - // inline asm - rcp.approx.ftz.f32 %f1718,%f1388; - // inline asm - mul.f32 %f1734, %f1718, %f268; - mul.f32 %f1735, %f1734, %f1734; - fma.rn.f32 %f1738, %f3142, %f1735, %f3143; - fma.rn.f32 %f1740, %f1738, %f1735, %f3144; - mul.rn.f32 %f1741, %f1740, %f1735; - mul.rn.f32 %f1742, %f1741, %f1734; - sub.f32 %f1743, %f266, %f1734; - neg.f32 %f1744, %f1734; - add.f32 %f1745, %f1743, %f1743; - fma.rn.f32 %f1746, %f1744, %f266, %f1745; - mul.rn.f32 %f1747, %f1718, %f1746; - add.f32 %f1748, %f1742, %f1734; - sub.f32 %f1749, %f1734, %f1748; - add.f32 %f1750, %f1742, %f1749; - add.f32 %f1751, %f1747, %f1750; - add.f32 %f1752, %f1748, %f1751; - sub.f32 %f1753, %f1748, %f1752; - add.f32 %f1754, %f1751, %f1753; - add.f32 %f1755, %f269, %f1752; - sub.f32 %f1756, %f269, %f1755; - add.f32 %f1757, %f1752, %f1756; - add.f32 %f1758, %f1754, %f1757; - add.f32 %f1759, %f270, %f1758; - add.f32 %f1760, %f1755, %f1759; - sub.f32 %f1761, %f1755, %f1760; - add.f32 %f1762, %f1759, %f1761; - mul.rn.f32 %f1764, %f886, %f1760; - neg.f32 %f1765, %f1764; - fma.rn.f32 %f1766, %f886, %f1760, %f1765; - fma.rn.f32 %f1767, %f886, %f1762, %f1766; - fma.rn.f32 %f1769, %f3334, %f1760, %f1767; - add.rn.f32 %f1770, %f1764, %f1769; - neg.f32 %f1771, %f1770; - add.rn.f32 %f1772, %f1764, %f1771; - add.rn.f32 %f1773, %f1772, %f1769; - mov.b32 %r188, %f1770; - setp.eq.s32 %p179, %r188, 1118925336; - add.s32 %r189, %r188, -1; - mov.b32 %f1774, %r189; - add.f32 %f1775, %f1773, 0f37000000; - selp.f32 %f1776, %f1774, %f1770, %p179; - selp.f32 %f315, %f1775, %f1773, %p179; - mul.f32 %f1777, %f1776, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1778, %f1777; - fma.rn.f32 %f1779, %f1778, %f3140, %f1776; - fma.rn.f32 %f1780, %f1778, %f3141, %f1779; - mul.f32 %f1781, %f1780, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1782, %f1781; - add.f32 %f1783, %f1778, 0f00000000; - ex2.approx.f32 %f1784, %f1783; - mul.f32 %f1785, %f1782, %f1784; - setp.lt.f32 %p180, %f1776, 0fC2D20000; - selp.f32 %f1786, 0f00000000, %f1785, %p180; - setp.gt.f32 %p181, %f1776, 0f42D20000; - selp.f32 %f3326, 0f7F800000, %f1786, %p181; - setp.eq.f32 %p182, %f3326, 0f7F800000; - @%p182 bra BB2_104; - - fma.rn.f32 %f3326, %f3326, %f315, %f3326; - -BB2_104: - setp.eq.f32 %p381, %f264, 0f00000000; - mov.b32 %r190, %f3326; - xor.b32 %r191, %r190, -2147483648; - mov.b32 %f1787, %r191; - selp.f32 %f3328, %f1787, %f3326, %p10; - @%p381 bra BB2_107; - bra.uni BB2_105; - -BB2_107: - add.f32 %f1790, %f264, %f264; - selp.f32 %f3328, %f1790, 0f00000000, %p59; - bra.uni BB2_108; - -BB2_105: - setp.geu.f32 %p184, %f264, 0f00000000; - @%p184 bra BB2_108; - - cvt.rzi.f32.f32 %f1789, %f886; - setp.neu.f32 %p185, %f1789, 0f40000000; - selp.f32 %f3328, 0f7FFFFFFF, %f3328, %p185; - -BB2_108: - abs.f32 %f3231, %f264; - add.f32 %f3230, %f3231, 0f40000000; - mov.b32 %r304, %f3230; - setp.lt.s32 %p382, %r304, 2139095040; - @%p382 bra BB2_113; - - abs.f32 %f3215, %f264; - setp.gtu.f32 %p188, %f3215, 0f7F800000; - @%p188 bra BB2_112; - bra.uni BB2_110; - -BB2_112: - add.f32 %f3328, %f264, 0f40000000; - bra.uni BB2_113; - -BB2_110: - abs.f32 %f3216, %f264; - setp.neu.f32 %p189, %f3216, 0f7F800000; - @%p189 bra BB2_113; - - selp.f32 %f3328, 0fFF800000, 0f7F800000, %p10; - -BB2_113: - setp.eq.f32 %p383, %f264, 0f3F800000; - mov.f32 %f3153, 0f3FC00000; - cvt.rn.f32.s32 %f3152, %r320; - sub.f32 %f3151, %f3152, %f3348; - add.f32 %f3150, %f3151, 0f3F800000; - mov.f32 %f3149, 0f3DAAAABD; - mov.f32 %f3148, 0f3C4CAF63; - mov.f32 %f3147, 0f3B18F0FE; - mov.f32 %f3146, 0fB5BFBE8E; - mov.f32 %f3145, 0fBF317200; - mul.f32 %f1793, %f3328, 0fBF000000; - selp.f32 %f1794, 0fBF000000, %f1793, %p383; - mul.f32 %f1795, %f1794, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1796, %f1795; - fma.rn.f32 %f1798, %f1796, %f3145, %f1794; - fma.rn.f32 %f1800, %f1796, %f3146, %f1798; - mul.f32 %f1801, %f1800, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1802, %f1801; - add.f32 %f1803, %f1796, 0f00000000; - ex2.approx.f32 %f1804, %f1803; - mul.f32 %f1805, %f1802, %f1804; - setp.lt.f32 %p191, %f1794, 0fC2D20000; - selp.f32 %f1806, 0f00000000, %f1805, %p191; - setp.gt.f32 %p192, %f1794, 0f42D20000; - selp.f32 %f1807, 0f7F800000, %f1806, %p192; - mul.f32 %f1808, %f3151, %f1807; - mul.f32 %f1809, %f3150, %f314; - sub.f32 %f1810, %f1809, %f1808; - mul.f32 %f1811, %f164, %f1810; - mul.f32 %f326, %f210, %f1811; - mul.f32 %f1812, %f165, %f326; - mul.f32 %f1813, %f3150, %f3150; - mul.f32 %f1814, %f3150, %f1813; - mul.f32 %f1815, %f3151, %f3151; - mul.f32 %f1816, %f3151, %f1815; - mul.f32 %f1817, %f314, %f1814; - mul.f32 %f1818, %f1807, %f1816; - sub.f32 %f1819, %f1817, %f1818; - mul.f32 %f1820, %f193, %f1819; - mul.f32 %f1821, %f210, %f1820; - sub.f32 %f327, %f1812, %f1821; - mul.f32 %f1822, %f326, %f167; - fma.rn.f32 %f328, %f301, %f166, %f1822; - // inline asm - rcp.approx.ftz.f32 %f1791,%f170; - // inline asm - mul.f32 %f1823, %f1791, %f171; - mul.f32 %f1824, %f1823, %f1823; - fma.rn.f32 %f1827, %f3147, %f1824, %f3148; - fma.rn.f32 %f1829, %f1827, %f1824, %f3149; - mul.rn.f32 %f1830, %f1829, %f1824; - mul.rn.f32 %f1831, %f1830, %f1823; - sub.f32 %f1832, %f169, %f1823; - neg.f32 %f1833, %f1823; - add.f32 %f1834, %f1832, %f1832; - fma.rn.f32 %f1835, %f1833, %f169, %f1834; - mul.rn.f32 %f1836, %f1791, %f1835; - add.f32 %f1837, %f1831, %f1823; - sub.f32 %f1838, %f1823, %f1837; - add.f32 %f1839, %f1831, %f1838; - add.f32 %f1840, %f1836, %f1839; - add.f32 %f1841, %f1837, %f1840; - sub.f32 %f1842, %f1837, %f1841; - add.f32 %f1843, %f1840, %f1842; - add.f32 %f1844, %f172, %f1841; - sub.f32 %f1845, %f172, %f1844; - add.f32 %f1846, %f1841, %f1845; - add.f32 %f1847, %f1843, %f1846; - add.f32 %f1848, %f173, %f1847; - add.f32 %f1849, %f1844, %f1848; - sub.f32 %f1850, %f1844, %f1849; - add.f32 %f1851, %f1848, %f1850; - mul.rn.f32 %f1853, %f3153, %f1849; - neg.f32 %f1854, %f1853; - fma.rn.f32 %f1855, %f3153, %f1849, %f1854; - fma.rn.f32 %f1856, %f3153, %f1851, %f1855; - fma.rn.f32 %f1858, %f3334, %f1849, %f1856; - add.rn.f32 %f1859, %f1853, %f1858; - neg.f32 %f1860, %f1859; - add.rn.f32 %f1861, %f1853, %f1860; - add.rn.f32 %f1862, %f1861, %f1858; - mov.b32 %r192, %f1859; - setp.eq.s32 %p193, %r192, 1118925336; - add.s32 %r193, %r192, -1; - mov.b32 %f1863, %r193; - add.f32 %f1864, %f1862, 0f37000000; - selp.f32 %f1865, %f1863, %f1859, %p193; - selp.f32 %f329, %f1864, %f1862, %p193; - mul.f32 %f1866, %f1865, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1867, %f1866; - fma.rn.f32 %f1868, %f1867, %f3145, %f1865; - fma.rn.f32 %f1869, %f1867, %f3146, %f1868; - mul.f32 %f1870, %f1869, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1871, %f1870; - add.f32 %f1872, %f1867, 0f00000000; - ex2.approx.f32 %f1873, %f1872; - mul.f32 %f1874, %f1871, %f1873; - setp.lt.f32 %p194, %f1865, 0fC2D20000; - selp.f32 %f1875, 0f00000000, %f1874, %p194; - setp.gt.f32 %p195, %f1865, 0f42D20000; - selp.f32 %f3329, 0f7F800000, %f1875, %p195; - setp.eq.f32 %p196, %f3329, 0f7F800000; - @%p196 bra BB2_115; - - fma.rn.f32 %f3329, %f3329, %f329, %f3329; - -BB2_115: - setp.geu.f32 %p377, %f108, 0f00000000; - mov.b32 %r194, %f3329; - xor.b32 %r195, %r194, -2147483648; - mov.b32 %f1876, %r195; - selp.f32 %f333, %f1876, %f3329, %p5; - setp.eq.f32 %p197, %f108, 0f00000000; - selp.f32 %f3330, %f174, %f333, %p197; - @%p377 bra BB2_117; - - mov.f32 %f3154, 0f3FC00000; - cvt.rzi.f32.f32 %f1878, %f3154; - setp.neu.f32 %p198, %f1878, 0f3FC00000; - selp.f32 %f3330, 0f7FFFFFFF, %f333, %p198; - -BB2_117: - selp.f32 %f3163, 0fFF800000, 0f7F800000, %p5; - add.f32 %f3162, %f108, 0f3FC00000; - abs.f32 %f3161, %f108; - mov.f32 %f3160, 0f3FC00000; - mov.f32 %f3159, 0f3DAAAABD; - mov.f32 %f3158, 0f3C4CAF63; - mov.f32 %f3157, 0f3B18F0FE; - mov.f32 %f3156, 0fB5BFBE8E; - mov.f32 %f3155, 0fBF317200; - add.f32 %f1881, %f3161, 0f3FC00000; - mov.b32 %r196, %f1881; - setp.gt.s32 %p199, %r196, 2139095039; - setp.gtu.f32 %p200, %f3161, 0f7F800000; - selp.f32 %f1882, %f3162, %f3330, %p200; - setp.neu.f32 %p201, %f3161, 0f7F800000; - selp.f32 %f1883, %f1882, %f3163, %p201; - selp.f32 %f1884, %f1883, %f3330, %p199; - setp.eq.f32 %p202, %f108, 0f3F800000; - selp.f32 %f1885, 0f3F800000, %f1884, %p202; - div.rn.f32 %f337, %f96, %f1885; - // inline asm - rcp.approx.ftz.f32 %f1879,%f178; - // inline asm - mul.f32 %f1886, %f1879, %f179; - mul.f32 %f1887, %f1886, %f1886; - fma.rn.f32 %f1890, %f3157, %f1887, %f3158; - fma.rn.f32 %f1892, %f1890, %f1887, %f3159; - mul.rn.f32 %f1893, %f1892, %f1887; - mul.rn.f32 %f1894, %f1893, %f1886; - sub.f32 %f1895, %f177, %f1886; - neg.f32 %f1896, %f1886; - add.f32 %f1897, %f1895, %f1895; - fma.rn.f32 %f1898, %f1896, %f177, %f1897; - mul.rn.f32 %f1899, %f1879, %f1898; - add.f32 %f1900, %f1894, %f1886; - sub.f32 %f1901, %f1886, %f1900; - add.f32 %f1902, %f1894, %f1901; - add.f32 %f1903, %f1899, %f1902; - add.f32 %f1904, %f1900, %f1903; - sub.f32 %f1905, %f1900, %f1904; - add.f32 %f1906, %f1903, %f1905; - add.f32 %f1907, %f180, %f1904; - sub.f32 %f1908, %f180, %f1907; - add.f32 %f1909, %f1904, %f1908; - add.f32 %f1910, %f1906, %f1909; - add.f32 %f1911, %f181, %f1910; - add.f32 %f1912, %f1907, %f1911; - sub.f32 %f1913, %f1907, %f1912; - add.f32 %f1914, %f1911, %f1913; - mul.rn.f32 %f1916, %f3160, %f1912; - neg.f32 %f1917, %f1916; - fma.rn.f32 %f1918, %f3160, %f1912, %f1917; - fma.rn.f32 %f1919, %f3160, %f1914, %f1918; - fma.rn.f32 %f1921, %f3334, %f1912, %f1919; - add.rn.f32 %f1922, %f1916, %f1921; - neg.f32 %f1923, %f1922; - add.rn.f32 %f1924, %f1916, %f1923; - add.rn.f32 %f1925, %f1924, %f1921; - mov.b32 %r197, %f1922; - setp.eq.s32 %p203, %r197, 1118925336; - add.s32 %r198, %r197, -1; - mov.b32 %f1926, %r198; - add.f32 %f1927, %f1925, 0f37000000; - selp.f32 %f1928, %f1926, %f1922, %p203; - selp.f32 %f338, %f1927, %f1925, %p203; - mul.f32 %f1929, %f1928, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1930, %f1929; - fma.rn.f32 %f1932, %f1930, %f3155, %f1928; - fma.rn.f32 %f1934, %f1930, %f3156, %f1932; - mul.f32 %f1935, %f1934, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1936, %f1935; - add.f32 %f1937, %f1930, 0f00000000; - ex2.approx.f32 %f1938, %f1937; - mul.f32 %f1939, %f1936, %f1938; - setp.lt.f32 %p204, %f1928, 0fC2D20000; - selp.f32 %f1940, 0f00000000, %f1939, %p204; - setp.gt.f32 %p205, %f1928, 0f42D20000; - selp.f32 %f3331, 0f7F800000, %f1940, %p205; - setp.eq.f32 %p206, %f3331, 0f7F800000; - @%p206 bra BB2_119; - - fma.rn.f32 %f3331, %f3331, %f338, %f3331; - -BB2_119: - setp.geu.f32 %p378, %f109, 0f00000000; - mov.b32 %r199, %f3331; - xor.b32 %r200, %r199, -2147483648; - mov.b32 %f1941, %r200; - selp.f32 %f342, %f1941, %f3331, %p6; - setp.eq.f32 %p207, %f109, 0f00000000; - selp.f32 %f3332, %f182, %f342, %p207; - @%p378 bra BB2_121; - - mov.f32 %f3164, 0f3FC00000; - cvt.rzi.f32.f32 %f1943, %f3164; - setp.neu.f32 %p208, %f1943, 0f3FC00000; - selp.f32 %f3332, 0f7FFFFFFF, %f342, %p208; - -BB2_121: - mul.f32 %f3171, %f113, %f113; - mul.f32 %f3170, %f112, %f112; - mul.f32 %f3169, %f167, %f167; - mul.f32 %f3168, %f166, %f166; - selp.f32 %f3167, 0fFF800000, 0f7F800000, %p6; - add.f32 %f3166, %f109, 0f3FC00000; - abs.f32 %f3165, %f109; - add.f32 %f1945, %f3165, 0f3FC00000; - mov.b32 %r201, %f1945; - setp.gt.s32 %p209, %r201, 2139095039; - setp.gtu.f32 %p210, %f3165, 0f7F800000; - selp.f32 %f1946, %f3166, %f3332, %p210; - setp.neu.f32 %p211, %f3165, 0f7F800000; - selp.f32 %f1947, %f1946, %f3167, %p211; - selp.f32 %f1948, %f1947, %f3332, %p209; - setp.eq.f32 %p212, %f109, 0f3F800000; - selp.f32 %f1949, 0f3F800000, %f1948, %p212; - div.rn.f32 %f1950, %f97, %f1949; - fma.rn.f32 %f1951, %f337, %f3170, %f184; - fma.rn.f32 %f1952, %f1950, %f3171, %f185; - mul.f32 %f1953, %f301, %f1951; - fma.rn.f32 %f1954, %f302, %f3168, %f1953; - fma.rn.f32 %f1955, %f327, %f3169, %f1954; - fma.rn.f32 %f346, %f326, %f1952, %f1955; - mul.f32 %f1956, %f210, %f3347; - fma.rn.f32 %f347, %f224, %f1956, %f3258; - mad.lo.s32 %r202, %r320, %r63, %r319; - add.s32 %r203, %r202, %r4; - mul.wide.s32 %rd71, %r203, 4; - add.s64 %rd72, %rd1, %rd71; - ld.global.f32 %f348, [%rd72]; - mul.f32 %f349, %f210, %f224; - setp.leu.f32 %p213, %f347, 0f3C23D70A; - mov.f32 %f3333, %f3334; - @%p213 bra BB2_123; - - div.rn.f32 %f1957, %f348, %f347; - add.f32 %f3333, %f1957, 0fBF800000; - -BB2_123: - @%p213 bra BB2_125; - - mul.f32 %f1959, %f347, %f347; - div.rn.f32 %f3334, %f348, %f1959; - -BB2_125: - mov.f32 %f1960, 0f47C35000; - min.f32 %f1961, %f3333, %f1960; - fma.rn.f32 %f3299, %f1961, %f242, %f3299; - mul.f32 %f1962, %f1961, %f243; - mul.f32 %f1963, %f242, %f242; - min.f32 %f1964, %f3334, %f1960; - mul.f32 %f1965, %f1964, %f1963; - sub.f32 %f1966, %f1962, %f1965; - add.f32 %f3304, %f1966, %f3304; - fma.rn.f32 %f3298, %f1961, %f282, %f3298; - mul.f32 %f1967, %f1961, %f283; - mul.f32 %f1968, %f282, %f282; - mul.f32 %f1969, %f1964, %f1968; - sub.f32 %f1970, %f1967, %f1969; - add.f32 %f3303, %f1970, %f3303; - fma.rn.f32 %f3297, %f1961, %f349, %f3297; - mul.f32 %f1971, %f1961, 0f00000000; - mul.f32 %f1972, %f349, %f349; - mul.f32 %f1973, %f1964, %f1972; - sub.f32 %f1974, %f1971, %f1973; - add.f32 %f3302, %f1974, %f3302; - add.f32 %f3296, %f3296, %f1961; - sub.f32 %f1975, %f1971, %f1964; - add.f32 %f3301, %f1975, %f3301; - fma.rn.f32 %f3295, %f1961, %f328, %f3295; - mul.f32 %f1976, %f1961, %f346; - mul.f32 %f1977, %f328, %f328; - mul.f32 %f1978, %f1964, %f1977; - sub.f32 %f1979, %f1976, %f1978; - add.f32 %f3300, %f1979, %f3300; - add.s32 %r320, %r320, 1; - setp.lt.s32 %p215, %r320, %r63; - @%p215 bra BB2_41; - - add.s32 %r319, %r319, 1; - setp.lt.s32 %p216, %r319, %r63; - @%p216 bra BB2_40; - -BB2_127: - div.rn.f32 %f1980, %f3299, %f3304; - mov.f32 %f1981, 0fBF800000; - max.f32 %f1982, %f1980, %f1981; - mov.f32 %f1983, 0f3F800000; - min.f32 %f1984, %f1982, %f1983; - sub.f32 %f3349, %f3349, %f1984; - div.rn.f32 %f1985, %f3298, %f3303; - max.f32 %f1986, %f1985, %f1981; - min.f32 %f1987, %f1986, %f1983; - sub.f32 %f3348, %f3348, %f1987; - neg.f32 %f1988, %f3347; - div.rn.f32 %f1989, %f3297, %f3302; - max.f32 %f1990, %f1989, %f1988; - min.f32 %f1991, %f1990, %f3347; - sub.f32 %f1992, %f3347, %f1991; - neg.f32 %f1993, %f3258; - div.rn.f32 %f1994, %f3296, %f3301; - max.f32 %f1995, %f1994, %f1993; - min.f32 %f1996, %f1995, %f3258; - sub.f32 %f1997, %f3258, %f1996; - div.rn.f32 %f1998, %f3295, %f3300; - mov.f32 %f1999, 0fBDCCCCCD; - max.f32 %f2000, %f1998, %f1999; - mov.f32 %f2001, 0f3DCCCCCD; - min.f32 %f2002, %f2000, %f2001; - sub.f32 %f3345, %f3345, %f2002; - max.f32 %f3347, %f1992, %f1983; - mov.f32 %f2003, 0f3C23D70A; - max.f32 %f3258, %f1997, %f2003; - add.s32 %r318, %r318, 1; - setp.lt.s32 %p217, %r318, %r64; - @%p217 bra BB2_38; - -BB2_128: - mov.f32 %f3393, 0f00000000; - @%p19 bra BB2_213; - - mul.f32 %f384, %f643, 0f3F000000; - mul.f32 %f385, %f650, 0f3F000000; - mul.f32 %f2006, %f649, %f649; - mul.f32 %f2007, %f2006, %f649; - mul.f32 %f2008, %f2006, %f2006; - sub.f32 %f2009, %f3345, %f648; - div.rn.f32 %f2010, %f2009, %f649; - mul.f32 %f2011, %f2010, %f2010; - mul.f32 %f2012, %f2010, %f2011; - add.f32 %f2013, %f2011, 0f3F800000; - fma.rn.f32 %f2014, %f2012, %f644, %f2013; - mul.f32 %f2015, %f2011, %f2011; - fma.rn.f32 %f386, %f2015, %f646, %f2014; - add.f32 %f2016, %f3345, %f648; - div.rn.f32 %f2017, %f2016, %f649; - mul.f32 %f2018, %f2017, %f2017; - mul.f32 %f2019, %f2017, %f2018; - add.f32 %f2020, %f2018, 0f3F800000; - fma.rn.f32 %f2021, %f2019, %f645, %f2020; - mul.f32 %f2022, %f2018, %f2018; - fma.rn.f32 %f387, %f2022, %f647, %f2021; - div.rn.f32 %f388, %f3347, 0fC0206C98; - add.f32 %f2023, %f2009, %f2009; - div.rn.f32 %f2024, %f2023, %f2006; - mul.f32 %f2025, %f2009, %f2009; - mul.f32 %f2026, %f644, 0f40400000; - mul.f32 %f2027, %f2026, %f2025; - div.rn.f32 %f2028, %f2027, %f2007; - add.f32 %f2029, %f2024, %f2028; - mul.f32 %f2030, %f2009, %f2025; - mul.f32 %f2031, %f646, 0f40800000; - mul.f32 %f2032, %f2031, %f2030; - div.rn.f32 %f2033, %f2032, %f2008; - add.f32 %f389, %f2029, %f2033; - add.f32 %f2034, %f2016, %f2016; - div.rn.f32 %f2035, %f2034, %f2006; - mul.f32 %f2036, %f2016, %f2016; - mul.f32 %f2037, %f645, 0f40400000; - mul.f32 %f2038, %f2037, %f2036; - div.rn.f32 %f2039, %f2038, %f2007; - add.f32 %f2040, %f2035, %f2039; - mul.f32 %f2041, %f2016, %f2036; - mul.f32 %f2042, %f647, 0f40800000; - mul.f32 %f2043, %f2042, %f2041; - div.rn.f32 %f2044, %f2043, %f2008; - add.f32 %f390, %f2040, %f2044; - mov.u32 %r204, 0; - mov.f32 %f3393, 0f00000000; - sqrt.rn.f32 %f2045, %f386; - sqrt.rn.f32 %f2047, %f387; - div.rn.f32 %f2088, %f384, %f2045; - div.rn.f32 %f2089, %f385, %f2047; - mul.f32 %f427, %f2088, %f389; - mul.f32 %f428, %f2089, %f390; - mov.u32 %r321, %r204; - -BB2_130: - mul.f32 %f2046, %f2045, %f643; - mul.f32 %f392, %f2047, %f650; - div.rn.f32 %f2049, %f684, %f2046; - div.rn.f32 %f2050, %f2049, %f2046; - cvt.rn.f32.s32 %f2051, %r321; - sub.f32 %f393, %f2051, %f3349; - add.f32 %f394, %f393, 0f3F800000; - sqrt.rn.f32 %f2052, %f2050; - mul.f32 %f395, %f394, %f2052; - abs.f32 %f396, %f395; - mul.f32 %f397, %f395, %f395; - mul.f32 %f398, %f393, %f2052; - abs.f32 %f399, %f398; - mul.f32 %f400, %f398, %f398; - div.rn.f32 %f2053, %f684, %f392; - div.rn.f32 %f2054, %f2053, %f392; - sqrt.rn.f32 %f401, %f2054; - add.f32 %f2055, %f2051, 0f3F800000; - sub.f32 %f2056, %f2055, %f3349; - div.rn.f32 %f402, %f2056, %f2046; - mov.f32 %f2057, 0f3F800000; - cvt.rzi.f32.f32 %f2058, %f2057; - add.f32 %f2059, %f2058, %f2058; - mov.f32 %f2060, 0f40000000; - sub.f32 %f2061, %f2060, %f2059; - abs.f32 %f403, %f2061; - setp.eq.f32 %p219, %f403, 0f3F800000; - abs.f32 %f404, %f402; - setp.lt.f32 %p220, %f404, 0f00800000; - mul.f32 %f2062, %f404, 0f4B800000; - selp.f32 %f2063, 0fC3170000, 0fC2FE0000, %p220; - selp.f32 %f2064, %f2062, %f404, %p220; - mov.b32 %r206, %f2064; - and.b32 %r207, %r206, 8388607; - or.b32 %r208, %r207, 1065353216; - mov.b32 %f2065, %r208; - shr.u32 %r209, %r206, 23; - cvt.rn.f32.u32 %f2066, %r209; - add.f32 %f2067, %f2063, %f2066; - setp.gt.f32 %p221, %f2065, 0f3FB504F3; - mul.f32 %f2068, %f2065, 0f3F000000; - add.f32 %f2069, %f2067, 0f3F800000; - selp.f32 %f2070, %f2068, %f2065, %p221; - selp.f32 %f2071, %f2069, %f2067, %p221; - add.f32 %f405, %f2070, 0fBF800000; - add.f32 %f406, %f2070, 0f3F800000; - add.f32 %f407, %f405, %f405; - mov.f32 %f2072, 0f3F317200; - mul.rn.f32 %f408, %f2071, %f2072; - mov.f32 %f2073, 0f35BFBE8E; - mul.rn.f32 %f409, %f2071, %f2073; - setp.lt.f32 %p222, %f402, 0f00000000; - and.pred %p11, %p222, %p219; - add.f32 %f2074, %f402, %f402; - selp.f32 %f410, %f2074, 0f00000000, %p219; - add.f32 %f2075, %f404, 0f40000000; - mov.b32 %r42, %f2075; - add.f32 %f411, %f402, 0f40000000; - selp.f32 %f412, 0fFF800000, 0f7F800000, %p11; - div.rn.f32 %f413, %f393, %f2046; - abs.f32 %f414, %f413; - setp.lt.f32 %p223, %f414, 0f00800000; - mul.f32 %f2076, %f414, 0f4B800000; - selp.f32 %f2077, 0fC3170000, 0fC2FE0000, %p223; - selp.f32 %f2078, %f2076, %f414, %p223; - mov.b32 %r210, %f2078; - and.b32 %r211, %r210, 8388607; - or.b32 %r212, %r211, 1065353216; - mov.b32 %f2079, %r212; - shr.u32 %r213, %r210, 23; - cvt.rn.f32.u32 %f2080, %r213; - add.f32 %f2081, %f2077, %f2080; - setp.gt.f32 %p224, %f2079, 0f3FB504F3; - mul.f32 %f2082, %f2079, 0f3F000000; - add.f32 %f2083, %f2081, 0f3F800000; - selp.f32 %f2084, %f2082, %f2079, %p224; - selp.f32 %f2085, %f2083, %f2081, %p224; - add.f32 %f415, %f2084, 0fBF800000; - add.f32 %f416, %f2084, 0f3F800000; - add.f32 %f417, %f415, %f415; - mul.rn.f32 %f418, %f2085, %f2072; - mul.rn.f32 %f419, %f2085, %f2073; - setp.lt.f32 %p225, %f413, 0f00000000; - and.pred %p12, %p225, %p219; - add.f32 %f2086, %f413, %f413; - selp.f32 %f420, %f2086, 0f00000000, %p219; - add.f32 %f2087, %f414, 0f40000000; - mov.b32 %r43, %f2087; - add.f32 %f421, %f413, 0f40000000; - selp.f32 %f422, 0fFF800000, 0f7F800000, %p12; - div.rn.f32 %f423, %f388, %f2046; - div.rn.f32 %f424, %f388, %f392; - div.rn.f32 %f425, %f423, %f2046; - div.rn.f32 %f426, %f424, %f392; - mov.b32 %r214, %f398; - and.b32 %r44, %r214, -2147483648; - mov.b32 %r215, %f395; - and.b32 %r45, %r215, -2147483648; - setp.geu.f32 %p13, %f402, 0f00000000; - setp.geu.f32 %p14, %f413, 0f00000000; - ld.local.f32 %f3365, [%rd2]; - ld.local.f32 %f3364, [%rd2+4]; - ld.local.f32 %f3363, [%rd2+8]; - ld.local.f32 %f3362, [%rd2+12]; - ld.local.f32 %f3361, [%rd2+16]; - ld.local.f32 %f3360, [%rd2+24]; - ld.local.f32 %f3359, [%rd2+28]; - ld.local.f32 %f3358, [%rd2+32]; - ld.local.f32 %f3357, [%rd2+36]; - ld.local.f32 %f3356, [%rd2+48]; - ld.local.f32 %f3355, [%rd2+52]; - ld.local.f32 %f3354, [%rd2+56]; - ld.local.f32 %f3353, [%rd2+72]; - ld.local.f32 %f3352, [%rd2+76]; - ld.local.f32 %f3351, [%rd2+96]; - mov.u32 %r322, %r204; - -BB2_131: - setp.ltu.f32 %p226, %f396, 0f3F800000; - @%p226 bra BB2_133; - bra.uni BB2_132; - -BB2_133: - mov.f32 %f2108, 0f3BA0C9F8; - mov.f32 %f2109, 0fBA1268FB; - fma.rn.f32 %f2110, %f2109, %f397, %f2108; - mov.f32 %f2111, 0fBCDABFD4; - fma.rn.f32 %f2112, %f2110, %f397, %f2111; - mov.f32 %f2113, 0f3DE70331; - fma.rn.f32 %f2114, %f2112, %f397, %f2113; - mov.f32 %f2115, 0fBEC09330; - fma.rn.f32 %f2116, %f2114, %f397, %f2115; - mov.f32 %f2117, 0f3F906EBA; - fma.rn.f32 %f2118, %f2116, %f397, %f2117; - mul.f32 %f3367, %f395, %f2118; - bra.uni BB2_134; - -BB2_132: - setp.ltu.f32 %p227, %f396, 0f407AD445; - mov.f32 %f2090, 0f3A03BB71; - mov.f32 %f2091, 0fB7B730FB; - fma.rn.f32 %f2092, %f2091, %f396, %f2090; - mov.f32 %f2093, 0fBBACA3B3; - fma.rn.f32 %f2094, %f2092, %f396, %f2093; - mov.f32 %f2095, 0f3D0A7445; - fma.rn.f32 %f2096, %f2094, %f396, %f2095; - mov.f32 %f2097, 0fBE1B3B75; - fma.rn.f32 %f2098, %f2096, %f396, %f2097; - mov.f32 %f2099, 0fBF6B385A; - fma.rn.f32 %f2100, %f2098, %f396, %f2099; - mov.f32 %f2101, 0fBFD0316E; - fma.rn.f32 %f2102, %f2100, %f396, %f2101; - mov.f32 %f2103, 0fBA031CCE; - fma.rn.f32 %f2104, %f2102, %f396, %f2103; - ex2.approx.ftz.f32 %f2105, %f2104; - sub.f32 %f2107, %f2057, %f2105; - mov.b32 %r216, %f2107; - selp.b32 %r217, %r216, 1065353216, %p227; - or.b32 %r218, %r217, %r45; - mov.b32 %f3367, %r218; - -BB2_134: - setp.ltu.f32 %p228, %f399, 0f3F800000; - @%p228 bra BB2_136; - bra.uni BB2_135; - -BB2_136: - mov.f32 %f2137, 0f3BA0C9F8; - mov.f32 %f2138, 0fBA1268FB; - fma.rn.f32 %f2139, %f2138, %f400, %f2137; - mov.f32 %f2140, 0fBCDABFD4; - fma.rn.f32 %f2141, %f2139, %f400, %f2140; - mov.f32 %f2142, 0f3DE70331; - fma.rn.f32 %f2143, %f2141, %f400, %f2142; - mov.f32 %f2144, 0fBEC09330; - fma.rn.f32 %f2145, %f2143, %f400, %f2144; - mov.f32 %f2146, 0f3F906EBA; - fma.rn.f32 %f2147, %f2145, %f400, %f2146; - mul.f32 %f3368, %f398, %f2147; - bra.uni BB2_137; - -BB2_135: - setp.ltu.f32 %p229, %f399, 0f407AD445; - mov.f32 %f2119, 0f3A03BB71; - mov.f32 %f2120, 0fB7B730FB; - fma.rn.f32 %f2121, %f2120, %f399, %f2119; - mov.f32 %f2122, 0fBBACA3B3; - fma.rn.f32 %f2123, %f2121, %f399, %f2122; - mov.f32 %f2124, 0f3D0A7445; - fma.rn.f32 %f2125, %f2123, %f399, %f2124; - mov.f32 %f2126, 0fBE1B3B75; - fma.rn.f32 %f2127, %f2125, %f399, %f2126; - mov.f32 %f2128, 0fBF6B385A; - fma.rn.f32 %f2129, %f2127, %f399, %f2128; - mov.f32 %f2130, 0fBFD0316E; - fma.rn.f32 %f2131, %f2129, %f399, %f2130; - mov.f32 %f2132, 0fBA031CCE; - fma.rn.f32 %f2133, %f2131, %f399, %f2132; - ex2.approx.ftz.f32 %f2134, %f2133; - sub.f32 %f2136, %f2057, %f2134; - mov.b32 %r219, %f2136; - selp.b32 %r220, %r219, 1065353216, %p229; - or.b32 %r221, %r220, %r44; - mov.b32 %f3368, %r221; - -BB2_137: - sub.f32 %f2148, %f3367, %f3368; - mul.f32 %f466, %f2148, 0f3F000000; - cvt.rn.f32.s32 %f467, %r322; - sub.f32 %f468, %f467, %f3348; - add.f32 %f469, %f468, 0f3F800000; - mul.f32 %f470, %f469, %f401; - abs.f32 %f471, %f470; - setp.ltu.f32 %p230, %f471, 0f3F800000; - @%p230 bra BB2_139; - bra.uni BB2_138; - -BB2_139: - mul.f32 %f2167, %f470, %f470; - mov.f32 %f2168, 0f3BA0C9F8; - mov.f32 %f2169, 0fBA1268FB; - fma.rn.f32 %f2170, %f2169, %f2167, %f2168; - mov.f32 %f2171, 0fBCDABFD4; - fma.rn.f32 %f2172, %f2170, %f2167, %f2171; - mov.f32 %f2173, 0f3DE70331; - fma.rn.f32 %f2174, %f2172, %f2167, %f2173; - mov.f32 %f2175, 0fBEC09330; - fma.rn.f32 %f2176, %f2174, %f2167, %f2175; - mov.f32 %f2177, 0f3F906EBA; - fma.rn.f32 %f2178, %f2176, %f2167, %f2177; - mul.f32 %f3369, %f470, %f2178; - bra.uni BB2_140; - -BB2_138: - mov.f32 %f2149, 0f3A03BB71; - mov.f32 %f2150, 0fB7B730FB; - fma.rn.f32 %f2151, %f2150, %f471, %f2149; - mov.f32 %f2152, 0fBBACA3B3; - fma.rn.f32 %f2153, %f2151, %f471, %f2152; - mov.f32 %f2154, 0f3D0A7445; - fma.rn.f32 %f2155, %f2153, %f471, %f2154; - mov.f32 %f2156, 0fBE1B3B75; - fma.rn.f32 %f2157, %f2155, %f471, %f2156; - mov.f32 %f2158, 0fBF6B385A; - fma.rn.f32 %f2159, %f2157, %f471, %f2158; - mov.f32 %f2160, 0fBFD0316E; - fma.rn.f32 %f2161, %f2159, %f471, %f2160; - mov.f32 %f2162, 0fBA031CCE; - fma.rn.f32 %f2163, %f2161, %f471, %f2162; - ex2.approx.ftz.f32 %f2164, %f2163; - sub.f32 %f2166, %f2057, %f2164; - mov.b32 %r222, %f2166; - setp.ltu.f32 %p231, %f471, 0f407AD445; - selp.b32 %r223, %r222, 1065353216, %p231; - mov.b32 %r224, %f470; - and.b32 %r225, %r224, -2147483648; - or.b32 %r226, %r223, %r225; - mov.b32 %f3369, %r226; - -BB2_140: - mul.f32 %f475, %f468, %f401; - abs.f32 %f476, %f475; - setp.ltu.f32 %p232, %f476, 0f3F800000; - @%p232 bra BB2_142; - bra.uni BB2_141; - -BB2_142: - mul.f32 %f2197, %f475, %f475; - mov.f32 %f2198, 0f3BA0C9F8; - mov.f32 %f2199, 0fBA1268FB; - fma.rn.f32 %f2200, %f2199, %f2197, %f2198; - mov.f32 %f2201, 0fBCDABFD4; - fma.rn.f32 %f2202, %f2200, %f2197, %f2201; - mov.f32 %f2203, 0f3DE70331; - fma.rn.f32 %f2204, %f2202, %f2197, %f2203; - mov.f32 %f2205, 0fBEC09330; - fma.rn.f32 %f2206, %f2204, %f2197, %f2205; - mov.f32 %f2207, 0f3F906EBA; - fma.rn.f32 %f2208, %f2206, %f2197, %f2207; - mul.f32 %f3370, %f475, %f2208; - bra.uni BB2_143; - -BB2_141: - mov.f32 %f2179, 0f3A03BB71; - mov.f32 %f2180, 0fB7B730FB; - fma.rn.f32 %f2181, %f2180, %f476, %f2179; - mov.f32 %f2182, 0fBBACA3B3; - fma.rn.f32 %f2183, %f2181, %f476, %f2182; - mov.f32 %f2184, 0f3D0A7445; - fma.rn.f32 %f2185, %f2183, %f476, %f2184; - mov.f32 %f2186, 0fBE1B3B75; - fma.rn.f32 %f2187, %f2185, %f476, %f2186; - mov.f32 %f2188, 0fBF6B385A; - fma.rn.f32 %f2189, %f2187, %f476, %f2188; - mov.f32 %f2190, 0fBFD0316E; - fma.rn.f32 %f2191, %f2189, %f476, %f2190; - mov.f32 %f2192, 0fBA031CCE; - fma.rn.f32 %f2193, %f2191, %f476, %f2192; - ex2.approx.ftz.f32 %f2194, %f2193; - sub.f32 %f2196, %f2057, %f2194; - mov.b32 %r227, %f2196; - setp.ltu.f32 %p233, %f476, 0f407AD445; - selp.b32 %r228, %r227, 1065353216, %p233; - mov.b32 %r229, %f475; - and.b32 %r230, %r229, -2147483648; - or.b32 %r231, %r228, %r230; - mov.b32 %f3370, %r231; - -BB2_143: - sub.f32 %f2211, %f3369, %f3370; - mul.f32 %f480, %f2211, 0f3F000000; - // inline asm - rcp.approx.ftz.f32 %f2209,%f406; - // inline asm - mul.f32 %f2212, %f2209, %f407; - mul.f32 %f2213, %f2212, %f2212; - mov.f32 %f2214, 0f3C4CAF63; - mov.f32 %f2215, 0f3B18F0FE; - fma.rn.f32 %f2216, %f2215, %f2213, %f2214; - mov.f32 %f2217, 0f3DAAAABD; - fma.rn.f32 %f2218, %f2216, %f2213, %f2217; - mul.rn.f32 %f2219, %f2218, %f2213; - mul.rn.f32 %f2220, %f2219, %f2212; - sub.f32 %f2221, %f405, %f2212; - neg.f32 %f2222, %f2212; - add.f32 %f2223, %f2221, %f2221; - fma.rn.f32 %f2224, %f2222, %f405, %f2223; - mul.rn.f32 %f2225, %f2209, %f2224; - add.f32 %f2226, %f2220, %f2212; - sub.f32 %f2227, %f2212, %f2226; - add.f32 %f2228, %f2220, %f2227; - add.f32 %f2229, %f2225, %f2228; - add.f32 %f2230, %f2226, %f2229; - sub.f32 %f2231, %f2226, %f2230; - add.f32 %f2232, %f2229, %f2231; - add.f32 %f2233, %f408, %f2230; - sub.f32 %f2234, %f408, %f2233; - add.f32 %f2235, %f2230, %f2234; - add.f32 %f2236, %f2232, %f2235; - add.f32 %f2237, %f409, %f2236; - add.f32 %f2238, %f2233, %f2237; - sub.f32 %f2239, %f2233, %f2238; - add.f32 %f2240, %f2237, %f2239; - mul.rn.f32 %f2242, %f2060, %f2238; - neg.f32 %f2243, %f2242; - fma.rn.f32 %f2244, %f2060, %f2238, %f2243; - fma.rn.f32 %f2245, %f2060, %f2240, %f2244; - mov.f32 %f2246, 0f00000000; - fma.rn.f32 %f2247, %f2246, %f2238, %f2245; - add.rn.f32 %f2248, %f2242, %f2247; - neg.f32 %f2249, %f2248; - add.rn.f32 %f2250, %f2242, %f2249; - add.rn.f32 %f2251, %f2250, %f2247; - mov.b32 %r232, %f2248; - setp.eq.s32 %p234, %r232, 1118925336; - add.s32 %r233, %r232, -1; - mov.b32 %f2252, %r233; - add.f32 %f2253, %f2251, 0f37000000; - selp.f32 %f2254, %f2252, %f2248, %p234; - selp.f32 %f481, %f2253, %f2251, %p234; - mul.f32 %f2255, %f2254, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2256, %f2255; - mov.f32 %f2257, 0fBF317200; - fma.rn.f32 %f2258, %f2256, %f2257, %f2254; - mov.f32 %f2259, 0fB5BFBE8E; - fma.rn.f32 %f2260, %f2256, %f2259, %f2258; - mul.f32 %f2261, %f2260, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2262, %f2261; - add.f32 %f2263, %f2256, 0f00000000; - ex2.approx.f32 %f2264, %f2263; - mul.f32 %f2265, %f2262, %f2264; - setp.lt.f32 %p235, %f2254, 0fC2D20000; - selp.f32 %f2266, 0f00000000, %f2265, %p235; - setp.gt.f32 %p236, %f2254, 0f42D20000; - selp.f32 %f3371, 0f7F800000, %f2266, %p236; - setp.eq.f32 %p237, %f3371, 0f7F800000; - @%p237 bra BB2_145; - - fma.rn.f32 %f3371, %f3371, %f481, %f3371; - -BB2_145: - mov.b32 %r234, %f3371; - xor.b32 %r235, %r234, -2147483648; - mov.b32 %f2267, %r235; - selp.f32 %f485, %f2267, %f3371, %p11; - setp.eq.f32 %p238, %f402, 0f00000000; - selp.f32 %f3372, %f410, %f485, %p238; - @%p13 bra BB2_147; - - cvt.rzi.f32.f32 %f2269, %f2060; - setp.neu.f32 %p239, %f2269, 0f40000000; - selp.f32 %f3372, 0f7FFFFFFF, %f485, %p239; - -BB2_147: - setp.gtu.f32 %p240, %f404, 0f7F800000; - selp.f32 %f2272, %f411, %f3372, %p240; - setp.neu.f32 %p241, %f404, 0f7F800000; - selp.f32 %f2273, %f2272, %f412, %p241; - setp.gt.s32 %p242, %r42, 2139095039; - selp.f32 %f2274, %f2273, %f3372, %p242; - mul.f32 %f2275, %f2274, 0fBF000000; - setp.eq.f32 %p243, %f402, 0f3F800000; - selp.f32 %f2276, 0fBF000000, %f2275, %p243; - mul.f32 %f2277, %f2276, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2278, %f2277; - fma.rn.f32 %f2280, %f2278, %f2257, %f2276; - fma.rn.f32 %f2282, %f2278, %f2259, %f2280; - mul.f32 %f2283, %f2282, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2284, %f2283; - add.f32 %f2285, %f2278, 0f00000000; - ex2.approx.f32 %f2286, %f2285; - mul.f32 %f2287, %f2284, %f2286; - setp.lt.f32 %p244, %f2276, 0fC2D20000; - selp.f32 %f2288, 0f00000000, %f2287, %p244; - setp.gt.f32 %p245, %f2276, 0f42D20000; - selp.f32 %f489, 0f7F800000, %f2288, %p245; - // inline asm - rcp.approx.ftz.f32 %f2270,%f416; - // inline asm - mul.f32 %f2289, %f2270, %f417; - mul.f32 %f2290, %f2289, %f2289; - fma.rn.f32 %f2293, %f2215, %f2290, %f2214; - fma.rn.f32 %f2295, %f2293, %f2290, %f2217; - mul.rn.f32 %f2296, %f2295, %f2290; - mul.rn.f32 %f2297, %f2296, %f2289; - sub.f32 %f2298, %f415, %f2289; - neg.f32 %f2299, %f2289; - add.f32 %f2300, %f2298, %f2298; - fma.rn.f32 %f2301, %f2299, %f415, %f2300; - mul.rn.f32 %f2302, %f2270, %f2301; - add.f32 %f2303, %f2297, %f2289; - sub.f32 %f2304, %f2289, %f2303; - add.f32 %f2305, %f2297, %f2304; - add.f32 %f2306, %f2302, %f2305; - add.f32 %f2307, %f2303, %f2306; - sub.f32 %f2308, %f2303, %f2307; - add.f32 %f2309, %f2306, %f2308; - add.f32 %f2310, %f418, %f2307; - sub.f32 %f2311, %f418, %f2310; - add.f32 %f2312, %f2307, %f2311; - add.f32 %f2313, %f2309, %f2312; - add.f32 %f2314, %f419, %f2313; - add.f32 %f2315, %f2310, %f2314; - sub.f32 %f2316, %f2310, %f2315; - add.f32 %f2317, %f2314, %f2316; - mul.rn.f32 %f2319, %f2060, %f2315; - neg.f32 %f2320, %f2319; - fma.rn.f32 %f2321, %f2060, %f2315, %f2320; - fma.rn.f32 %f2322, %f2060, %f2317, %f2321; - fma.rn.f32 %f2324, %f2246, %f2315, %f2322; - add.rn.f32 %f2325, %f2319, %f2324; - neg.f32 %f2326, %f2325; - add.rn.f32 %f2327, %f2319, %f2326; - add.rn.f32 %f2328, %f2327, %f2324; - mov.b32 %r236, %f2325; - setp.eq.s32 %p246, %r236, 1118925336; - add.s32 %r237, %r236, -1; - mov.b32 %f2329, %r237; - add.f32 %f2330, %f2328, 0f37000000; - selp.f32 %f2331, %f2329, %f2325, %p246; - selp.f32 %f490, %f2330, %f2328, %p246; - mul.f32 %f2332, %f2331, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2333, %f2332; - fma.rn.f32 %f2334, %f2333, %f2257, %f2331; - fma.rn.f32 %f2335, %f2333, %f2259, %f2334; - mul.f32 %f2336, %f2335, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2337, %f2336; - add.f32 %f2338, %f2333, 0f00000000; - ex2.approx.f32 %f2339, %f2338; - mul.f32 %f2340, %f2337, %f2339; - setp.lt.f32 %p247, %f2331, 0fC2D20000; - selp.f32 %f2341, 0f00000000, %f2340, %p247; - setp.gt.f32 %p248, %f2331, 0f42D20000; - selp.f32 %f3373, 0f7F800000, %f2341, %p248; - setp.eq.f32 %p249, %f3373, 0f7F800000; - @%p249 bra BB2_149; - - fma.rn.f32 %f3373, %f3373, %f490, %f3373; - -BB2_149: - mov.b32 %r238, %f3373; - xor.b32 %r239, %r238, -2147483648; - mov.b32 %f2342, %r239; - selp.f32 %f494, %f2342, %f3373, %p12; - setp.eq.f32 %p250, %f413, 0f00000000; - selp.f32 %f3374, %f420, %f494, %p250; - @%p14 bra BB2_151; - - cvt.rzi.f32.f32 %f2344, %f2060; - setp.neu.f32 %p251, %f2344, 0f40000000; - selp.f32 %f3374, 0f7FFFFFFF, %f494, %p251; - -BB2_151: - setp.gtu.f32 %p252, %f414, 0f7F800000; - selp.f32 %f2347, %f421, %f3374, %p252; - setp.neu.f32 %p253, %f414, 0f7F800000; - selp.f32 %f2348, %f2347, %f422, %p253; - setp.gt.s32 %p254, %r43, 2139095039; - selp.f32 %f2349, %f2348, %f3374, %p254; - mul.f32 %f2350, %f2349, 0fBF000000; - setp.eq.f32 %p255, %f413, 0f3F800000; - selp.f32 %f2351, 0fBF000000, %f2350, %p255; - mul.f32 %f2352, %f2351, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2353, %f2352; - fma.rn.f32 %f2355, %f2353, %f2257, %f2351; - fma.rn.f32 %f2357, %f2353, %f2259, %f2355; - mul.f32 %f2358, %f2357, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2359, %f2358; - add.f32 %f2360, %f2353, 0f00000000; - ex2.approx.f32 %f2361, %f2360; - mul.f32 %f2362, %f2359, %f2361; - setp.lt.f32 %p256, %f2351, 0fC2D20000; - selp.f32 %f2363, 0f00000000, %f2362, %p256; - setp.gt.f32 %p257, %f2351, 0f42D20000; - selp.f32 %f2364, 0f7F800000, %f2363, %p257; - sub.f32 %f2365, %f489, %f2364; - mul.f32 %f2366, %f423, %f2365; - mul.f32 %f498, %f480, %f2366; - add.f32 %f2367, %f467, 0f3F800000; - sub.f32 %f2368, %f2367, %f3348; - div.rn.f32 %f499, %f2368, %f392; - abs.f32 %f500, %f499; - setp.lt.f32 %p258, %f500, 0f00800000; - mul.f32 %f2369, %f500, 0f4B800000; - selp.f32 %f2370, 0fC3170000, 0fC2FE0000, %p258; - selp.f32 %f2371, %f2369, %f500, %p258; - mov.b32 %r240, %f2371; - and.b32 %r241, %r240, 8388607; - or.b32 %r242, %r241, 1065353216; - mov.b32 %f2372, %r242; - shr.u32 %r243, %r240, 23; - cvt.rn.f32.u32 %f2373, %r243; - add.f32 %f2374, %f2370, %f2373; - setp.gt.f32 %p259, %f2372, 0f3FB504F3; - mul.f32 %f2375, %f2372, 0f3F000000; - add.f32 %f2376, %f2374, 0f3F800000; - selp.f32 %f2377, %f2375, %f2372, %p259; - selp.f32 %f2378, %f2376, %f2374, %p259; - add.f32 %f501, %f2377, 0fBF800000; - add.f32 %f2346, %f2377, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2345,%f2346; - // inline asm - add.f32 %f503, %f501, %f501; - mul.f32 %f2379, %f2345, %f503; - mul.f32 %f2380, %f2379, %f2379; - fma.rn.f32 %f2383, %f2215, %f2380, %f2214; - fma.rn.f32 %f2385, %f2383, %f2380, %f2217; - mul.rn.f32 %f2386, %f2385, %f2380; - mul.rn.f32 %f2387, %f2386, %f2379; - sub.f32 %f2388, %f501, %f2379; - neg.f32 %f2389, %f2379; - add.f32 %f2390, %f2388, %f2388; - fma.rn.f32 %f2391, %f2389, %f501, %f2390; - mul.rn.f32 %f2392, %f2345, %f2391; - add.f32 %f2393, %f2387, %f2379; - sub.f32 %f2394, %f2379, %f2393; - add.f32 %f2395, %f2387, %f2394; - add.f32 %f2396, %f2392, %f2395; - add.f32 %f2397, %f2393, %f2396; - sub.f32 %f2398, %f2393, %f2397; - add.f32 %f2399, %f2396, %f2398; - mul.rn.f32 %f504, %f2378, %f2072; - mul.rn.f32 %f505, %f2378, %f2073; - add.f32 %f2402, %f504, %f2397; - sub.f32 %f2403, %f504, %f2402; - add.f32 %f2404, %f2397, %f2403; - add.f32 %f2405, %f2399, %f2404; - add.f32 %f2406, %f505, %f2405; - add.f32 %f2407, %f2402, %f2406; - sub.f32 %f2408, %f2402, %f2407; - add.f32 %f2409, %f2406, %f2408; - mul.rn.f32 %f2411, %f2060, %f2407; - neg.f32 %f2412, %f2411; - fma.rn.f32 %f2413, %f2060, %f2407, %f2412; - fma.rn.f32 %f2414, %f2060, %f2409, %f2413; - fma.rn.f32 %f2416, %f2246, %f2407, %f2414; - add.rn.f32 %f2417, %f2411, %f2416; - neg.f32 %f2418, %f2417; - add.rn.f32 %f2419, %f2411, %f2418; - add.rn.f32 %f2420, %f2419, %f2416; - mov.b32 %r244, %f2417; - setp.eq.s32 %p260, %r244, 1118925336; - add.s32 %r245, %r244, -1; - mov.b32 %f2421, %r245; - add.f32 %f2422, %f2420, 0f37000000; - selp.f32 %f2423, %f2421, %f2417, %p260; - selp.f32 %f506, %f2422, %f2420, %p260; - mul.f32 %f2424, %f2423, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2425, %f2424; - fma.rn.f32 %f2426, %f2425, %f2257, %f2423; - fma.rn.f32 %f2427, %f2425, %f2259, %f2426; - mul.f32 %f2428, %f2427, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2429, %f2428; - add.f32 %f2430, %f2425, 0f00000000; - ex2.approx.f32 %f2431, %f2430; - mul.f32 %f2432, %f2429, %f2431; - setp.lt.f32 %p261, %f2423, 0fC2D20000; - selp.f32 %f2433, 0f00000000, %f2432, %p261; - setp.gt.f32 %p262, %f2423, 0f42D20000; - selp.f32 %f3375, 0f7F800000, %f2433, %p262; - setp.eq.f32 %p263, %f3375, 0f7F800000; - @%p263 bra BB2_153; - - fma.rn.f32 %f3375, %f3375, %f506, %f3375; - -BB2_153: - setp.lt.f32 %p264, %f499, 0f00000000; - and.pred %p15, %p264, %p219; - mov.b32 %r246, %f3375; - xor.b32 %r247, %r246, -2147483648; - mov.b32 %f2434, %r247; - selp.f32 %f3377, %f2434, %f3375, %p15; - setp.eq.f32 %p266, %f499, 0f00000000; - @%p266 bra BB2_156; - bra.uni BB2_154; - -BB2_156: - add.f32 %f2437, %f499, %f499; - selp.f32 %f3377, %f2437, 0f00000000, %p219; - bra.uni BB2_157; - -BB2_154: - setp.geu.f32 %p267, %f499, 0f00000000; - @%p267 bra BB2_157; - - cvt.rzi.f32.f32 %f2436, %f2060; - setp.neu.f32 %p268, %f2436, 0f40000000; - selp.f32 %f3377, 0f7FFFFFFF, %f3377, %p268; - -BB2_157: - add.f32 %f2438, %f500, 0f40000000; - mov.b32 %r47, %f2438; - setp.lt.s32 %p270, %r47, 2139095040; - @%p270 bra BB2_162; - - setp.gtu.f32 %p271, %f500, 0f7F800000; - @%p271 bra BB2_161; - bra.uni BB2_159; - -BB2_161: - add.f32 %f3377, %f499, 0f40000000; - bra.uni BB2_162; - -BB2_159: - setp.neu.f32 %p272, %f500, 0f7F800000; - @%p272 bra BB2_162; - - selp.f32 %f3377, 0fFF800000, 0f7F800000, %p15; - -BB2_162: - mul.f32 %f2441, %f3377, 0fBF000000; - setp.eq.f32 %p273, %f499, 0f3F800000; - selp.f32 %f2442, 0fBF000000, %f2441, %p273; - mul.f32 %f2443, %f2442, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2444, %f2443; - fma.rn.f32 %f2446, %f2444, %f2257, %f2442; - fma.rn.f32 %f2448, %f2444, %f2259, %f2446; - mul.f32 %f2449, %f2448, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2450, %f2449; - add.f32 %f2451, %f2444, 0f00000000; - ex2.approx.f32 %f2452, %f2451; - mul.f32 %f2453, %f2450, %f2452; - setp.lt.f32 %p274, %f2442, 0fC2D20000; - selp.f32 %f2454, 0f00000000, %f2453, %p274; - setp.gt.f32 %p275, %f2442, 0f42D20000; - selp.f32 %f517, 0f7F800000, %f2454, %p275; - div.rn.f32 %f518, %f468, %f392; - abs.f32 %f519, %f518; - setp.lt.f32 %p276, %f519, 0f00800000; - mul.f32 %f2455, %f519, 0f4B800000; - selp.f32 %f2456, 0fC3170000, 0fC2FE0000, %p276; - selp.f32 %f2457, %f2455, %f519, %p276; - mov.b32 %r248, %f2457; - and.b32 %r249, %r248, 8388607; - or.b32 %r250, %r249, 1065353216; - mov.b32 %f2458, %r250; - shr.u32 %r251, %r248, 23; - cvt.rn.f32.u32 %f2459, %r251; - add.f32 %f2460, %f2456, %f2459; - setp.gt.f32 %p277, %f2458, 0f3FB504F3; - mul.f32 %f2461, %f2458, 0f3F000000; - add.f32 %f2462, %f2460, 0f3F800000; - selp.f32 %f2463, %f2461, %f2458, %p277; - selp.f32 %f2464, %f2462, %f2460, %p277; - add.f32 %f520, %f2463, 0fBF800000; - add.f32 %f2440, %f2463, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2439,%f2440; - // inline asm - add.f32 %f522, %f520, %f520; - mul.f32 %f2465, %f2439, %f522; - mul.f32 %f2466, %f2465, %f2465; - fma.rn.f32 %f2469, %f2215, %f2466, %f2214; - fma.rn.f32 %f2471, %f2469, %f2466, %f2217; - mul.rn.f32 %f2472, %f2471, %f2466; - mul.rn.f32 %f2473, %f2472, %f2465; - sub.f32 %f2474, %f520, %f2465; - neg.f32 %f2475, %f2465; - add.f32 %f2476, %f2474, %f2474; - fma.rn.f32 %f2477, %f2475, %f520, %f2476; - mul.rn.f32 %f2478, %f2439, %f2477; - add.f32 %f2479, %f2473, %f2465; - sub.f32 %f2480, %f2465, %f2479; - add.f32 %f2481, %f2473, %f2480; - add.f32 %f2482, %f2478, %f2481; - add.f32 %f2483, %f2479, %f2482; - sub.f32 %f2484, %f2479, %f2483; - add.f32 %f2485, %f2482, %f2484; - mul.rn.f32 %f523, %f2464, %f2072; - mul.rn.f32 %f524, %f2464, %f2073; - add.f32 %f2488, %f523, %f2483; - sub.f32 %f2489, %f523, %f2488; - add.f32 %f2490, %f2483, %f2489; - add.f32 %f2491, %f2485, %f2490; - add.f32 %f2492, %f524, %f2491; - add.f32 %f2493, %f2488, %f2492; - sub.f32 %f2494, %f2488, %f2493; - add.f32 %f2495, %f2492, %f2494; - mul.rn.f32 %f2497, %f2060, %f2493; - neg.f32 %f2498, %f2497; - fma.rn.f32 %f2499, %f2060, %f2493, %f2498; - fma.rn.f32 %f2500, %f2060, %f2495, %f2499; - fma.rn.f32 %f2502, %f2246, %f2493, %f2500; - add.rn.f32 %f2503, %f2497, %f2502; - neg.f32 %f2504, %f2503; - add.rn.f32 %f2505, %f2497, %f2504; - add.rn.f32 %f2506, %f2505, %f2502; - mov.b32 %r252, %f2503; - setp.eq.s32 %p278, %r252, 1118925336; - add.s32 %r253, %r252, -1; - mov.b32 %f2507, %r253; - add.f32 %f2508, %f2506, 0f37000000; - selp.f32 %f2509, %f2507, %f2503, %p278; - selp.f32 %f525, %f2508, %f2506, %p278; - mul.f32 %f2510, %f2509, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2511, %f2510; - fma.rn.f32 %f2512, %f2511, %f2257, %f2509; - fma.rn.f32 %f2513, %f2511, %f2259, %f2512; - mul.f32 %f2514, %f2513, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2515, %f2514; - add.f32 %f2516, %f2511, 0f00000000; - ex2.approx.f32 %f2517, %f2516; - mul.f32 %f2518, %f2515, %f2517; - setp.lt.f32 %p279, %f2509, 0fC2D20000; - selp.f32 %f2519, 0f00000000, %f2518, %p279; - setp.gt.f32 %p280, %f2509, 0f42D20000; - selp.f32 %f3378, 0f7F800000, %f2519, %p280; - setp.eq.f32 %p281, %f3378, 0f7F800000; - @%p281 bra BB2_164; - - fma.rn.f32 %f3378, %f3378, %f525, %f3378; - -BB2_164: - setp.lt.f32 %p282, %f518, 0f00000000; - and.pred %p16, %p282, %p219; - mov.b32 %r254, %f3378; - xor.b32 %r255, %r254, -2147483648; - mov.b32 %f2520, %r255; - selp.f32 %f3380, %f2520, %f3378, %p16; - setp.eq.f32 %p284, %f518, 0f00000000; - @%p284 bra BB2_167; - bra.uni BB2_165; - -BB2_167: - add.f32 %f2523, %f518, %f518; - selp.f32 %f3380, %f2523, 0f00000000, %p219; - bra.uni BB2_168; - -BB2_165: - setp.geu.f32 %p285, %f518, 0f00000000; - @%p285 bra BB2_168; - - cvt.rzi.f32.f32 %f2522, %f2060; - setp.neu.f32 %p286, %f2522, 0f40000000; - selp.f32 %f3380, 0f7FFFFFFF, %f3380, %p286; - -BB2_168: - add.f32 %f2524, %f519, 0f40000000; - mov.b32 %r48, %f2524; - setp.lt.s32 %p288, %r48, 2139095040; - @%p288 bra BB2_173; - - setp.gtu.f32 %p289, %f519, 0f7F800000; - @%p289 bra BB2_172; - bra.uni BB2_170; - -BB2_172: - add.f32 %f3380, %f518, 0f40000000; - bra.uni BB2_173; - -BB2_170: - setp.neu.f32 %p290, %f519, 0f7F800000; - @%p290 bra BB2_173; - - selp.f32 %f3380, 0fFF800000, 0f7F800000, %p16; - -BB2_173: - mul.f32 %f2527, %f3380, 0fBF000000; - setp.eq.f32 %p291, %f518, 0f3F800000; - selp.f32 %f2528, 0fBF000000, %f2527, %p291; - mul.f32 %f2529, %f2528, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2530, %f2529; - fma.rn.f32 %f2532, %f2530, %f2257, %f2528; - fma.rn.f32 %f2534, %f2530, %f2259, %f2532; - mul.f32 %f2535, %f2534, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2536, %f2535; - add.f32 %f2537, %f2530, 0f00000000; - ex2.approx.f32 %f2538, %f2537; - mul.f32 %f2539, %f2536, %f2538; - setp.lt.f32 %p292, %f2528, 0fC2D20000; - selp.f32 %f2540, 0f00000000, %f2539, %p292; - setp.gt.f32 %p293, %f2528, 0f42D20000; - selp.f32 %f2541, 0f7F800000, %f2540, %p293; - sub.f32 %f2542, %f517, %f2541; - mul.f32 %f2543, %f424, %f2542; - mul.f32 %f536, %f466, %f2543; - // inline asm - rcp.approx.ftz.f32 %f2525,%f406; - // inline asm - mul.f32 %f2544, %f2525, %f407; - mul.f32 %f2545, %f2544, %f2544; - fma.rn.f32 %f2548, %f2215, %f2545, %f2214; - fma.rn.f32 %f2550, %f2548, %f2545, %f2217; - mul.rn.f32 %f2551, %f2550, %f2545; - mul.rn.f32 %f2552, %f2551, %f2544; - sub.f32 %f2553, %f405, %f2544; - neg.f32 %f2554, %f2544; - add.f32 %f2555, %f2553, %f2553; - fma.rn.f32 %f2556, %f2554, %f405, %f2555; - mul.rn.f32 %f2557, %f2525, %f2556; - add.f32 %f2558, %f2552, %f2544; - sub.f32 %f2559, %f2544, %f2558; - add.f32 %f2560, %f2552, %f2559; - add.f32 %f2561, %f2557, %f2560; - add.f32 %f2562, %f2558, %f2561; - sub.f32 %f2563, %f2558, %f2562; - add.f32 %f2564, %f2561, %f2563; - add.f32 %f2565, %f408, %f2562; - sub.f32 %f2566, %f408, %f2565; - add.f32 %f2567, %f2562, %f2566; - add.f32 %f2568, %f2564, %f2567; - add.f32 %f2569, %f409, %f2568; - add.f32 %f2570, %f2565, %f2569; - sub.f32 %f2571, %f2565, %f2570; - add.f32 %f2572, %f2569, %f2571; - mul.rn.f32 %f2574, %f2060, %f2570; - neg.f32 %f2575, %f2574; - fma.rn.f32 %f2576, %f2060, %f2570, %f2575; - fma.rn.f32 %f2577, %f2060, %f2572, %f2576; - fma.rn.f32 %f2579, %f2246, %f2570, %f2577; - add.rn.f32 %f2580, %f2574, %f2579; - neg.f32 %f2581, %f2580; - add.rn.f32 %f2582, %f2574, %f2581; - add.rn.f32 %f2583, %f2582, %f2579; - mov.b32 %r256, %f2580; - setp.eq.s32 %p294, %r256, 1118925336; - add.s32 %r257, %r256, -1; - mov.b32 %f2584, %r257; - add.f32 %f2585, %f2583, 0f37000000; - selp.f32 %f2586, %f2584, %f2580, %p294; - selp.f32 %f537, %f2585, %f2583, %p294; - mul.f32 %f2587, %f2586, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2588, %f2587; - fma.rn.f32 %f2589, %f2588, %f2257, %f2586; - fma.rn.f32 %f2590, %f2588, %f2259, %f2589; - mul.f32 %f2591, %f2590, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2592, %f2591; - add.f32 %f2593, %f2588, 0f00000000; - ex2.approx.f32 %f2594, %f2593; - mul.f32 %f2595, %f2592, %f2594; - setp.lt.f32 %p295, %f2586, 0fC2D20000; - selp.f32 %f2596, 0f00000000, %f2595, %p295; - setp.gt.f32 %p296, %f2586, 0f42D20000; - selp.f32 %f3381, 0f7F800000, %f2596, %p296; - setp.eq.f32 %p297, %f3381, 0f7F800000; - @%p297 bra BB2_175; - - fma.rn.f32 %f3381, %f3381, %f537, %f3381; - -BB2_175: - mov.b32 %r258, %f3381; - xor.b32 %r259, %r258, -2147483648; - mov.b32 %f2597, %r259; - selp.f32 %f541, %f2597, %f3381, %p11; - selp.f32 %f3382, %f410, %f541, %p238; - @%p13 bra BB2_177; - - cvt.rzi.f32.f32 %f2599, %f2060; - setp.neu.f32 %p299, %f2599, 0f40000000; - selp.f32 %f3382, 0f7FFFFFFF, %f541, %p299; - -BB2_177: - selp.f32 %f2602, %f411, %f3382, %p240; - selp.f32 %f2603, %f2602, %f412, %p241; - selp.f32 %f2604, %f2603, %f3382, %p242; - mul.f32 %f2605, %f2604, 0fBF000000; - selp.f32 %f2606, 0fBF000000, %f2605, %p243; - mul.f32 %f2607, %f2606, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2608, %f2607; - fma.rn.f32 %f2610, %f2608, %f2257, %f2606; - fma.rn.f32 %f2612, %f2608, %f2259, %f2610; - mul.f32 %f2613, %f2612, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2614, %f2613; - add.f32 %f2615, %f2608, 0f00000000; - ex2.approx.f32 %f2616, %f2615; - mul.f32 %f2617, %f2614, %f2616; - setp.lt.f32 %p304, %f2606, 0fC2D20000; - selp.f32 %f2618, 0f00000000, %f2617, %p304; - setp.gt.f32 %p305, %f2606, 0f42D20000; - selp.f32 %f545, 0f7F800000, %f2618, %p305; - // inline asm - rcp.approx.ftz.f32 %f2600,%f416; - // inline asm - mul.f32 %f2619, %f2600, %f417; - mul.f32 %f2620, %f2619, %f2619; - fma.rn.f32 %f2623, %f2215, %f2620, %f2214; - fma.rn.f32 %f2625, %f2623, %f2620, %f2217; - mul.rn.f32 %f2626, %f2625, %f2620; - mul.rn.f32 %f2627, %f2626, %f2619; - sub.f32 %f2628, %f415, %f2619; - neg.f32 %f2629, %f2619; - add.f32 %f2630, %f2628, %f2628; - fma.rn.f32 %f2631, %f2629, %f415, %f2630; - mul.rn.f32 %f2632, %f2600, %f2631; - add.f32 %f2633, %f2627, %f2619; - sub.f32 %f2634, %f2619, %f2633; - add.f32 %f2635, %f2627, %f2634; - add.f32 %f2636, %f2632, %f2635; - add.f32 %f2637, %f2633, %f2636; - sub.f32 %f2638, %f2633, %f2637; - add.f32 %f2639, %f2636, %f2638; - add.f32 %f2640, %f418, %f2637; - sub.f32 %f2641, %f418, %f2640; - add.f32 %f2642, %f2637, %f2641; - add.f32 %f2643, %f2639, %f2642; - add.f32 %f2644, %f419, %f2643; - add.f32 %f2645, %f2640, %f2644; - sub.f32 %f2646, %f2640, %f2645; - add.f32 %f2647, %f2644, %f2646; - mul.rn.f32 %f2649, %f2060, %f2645; - neg.f32 %f2650, %f2649; - fma.rn.f32 %f2651, %f2060, %f2645, %f2650; - fma.rn.f32 %f2652, %f2060, %f2647, %f2651; - fma.rn.f32 %f2654, %f2246, %f2645, %f2652; - add.rn.f32 %f2655, %f2649, %f2654; - neg.f32 %f2656, %f2655; - add.rn.f32 %f2657, %f2649, %f2656; - add.rn.f32 %f2658, %f2657, %f2654; - mov.b32 %r260, %f2655; - setp.eq.s32 %p306, %r260, 1118925336; - add.s32 %r261, %r260, -1; - mov.b32 %f2659, %r261; - add.f32 %f2660, %f2658, 0f37000000; - selp.f32 %f2661, %f2659, %f2655, %p306; - selp.f32 %f546, %f2660, %f2658, %p306; - mul.f32 %f2662, %f2661, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2663, %f2662; - fma.rn.f32 %f2664, %f2663, %f2257, %f2661; - fma.rn.f32 %f2665, %f2663, %f2259, %f2664; - mul.f32 %f2666, %f2665, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2667, %f2666; - add.f32 %f2668, %f2663, 0f00000000; - ex2.approx.f32 %f2669, %f2668; - mul.f32 %f2670, %f2667, %f2669; - setp.lt.f32 %p307, %f2661, 0fC2D20000; - selp.f32 %f2671, 0f00000000, %f2670, %p307; - setp.gt.f32 %p308, %f2661, 0f42D20000; - selp.f32 %f3383, 0f7F800000, %f2671, %p308; - setp.eq.f32 %p309, %f3383, 0f7F800000; - @%p309 bra BB2_179; - - fma.rn.f32 %f3383, %f3383, %f546, %f3383; - -BB2_179: - mov.b32 %r262, %f3383; - xor.b32 %r263, %r262, -2147483648; - mov.b32 %f2672, %r263; - selp.f32 %f550, %f2672, %f3383, %p12; - selp.f32 %f3384, %f420, %f550, %p250; - @%p14 bra BB2_181; - - cvt.rzi.f32.f32 %f2674, %f2060; - setp.neu.f32 %p311, %f2674, 0f40000000; - selp.f32 %f3384, 0f7FFFFFFF, %f550, %p311; - -BB2_181: - selp.f32 %f2677, %f421, %f3384, %p252; - selp.f32 %f2678, %f2677, %f422, %p253; - selp.f32 %f2679, %f2678, %f3384, %p254; - mul.f32 %f2680, %f2679, 0fBF000000; - selp.f32 %f2681, 0fBF000000, %f2680, %p255; - mul.f32 %f2682, %f2681, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2683, %f2682; - fma.rn.f32 %f2685, %f2683, %f2257, %f2681; - fma.rn.f32 %f2687, %f2683, %f2259, %f2685; - mul.f32 %f2688, %f2687, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2689, %f2688; - add.f32 %f2690, %f2683, 0f00000000; - ex2.approx.f32 %f2691, %f2690; - mul.f32 %f2692, %f2689, %f2691; - setp.lt.f32 %p316, %f2681, 0fC2D20000; - selp.f32 %f2693, 0f00000000, %f2692, %p316; - setp.gt.f32 %p317, %f2681, 0f42D20000; - selp.f32 %f2694, 0f7F800000, %f2693, %p317; - mul.f32 %f2695, %f393, %f2694; - mul.f32 %f2696, %f394, %f545; - sub.f32 %f2697, %f2696, %f2695; - mul.f32 %f2698, %f425, %f2697; - mul.f32 %f554, %f480, %f2698; - // inline asm - rcp.approx.ftz.f32 %f2675,%f2346; - // inline asm - mul.f32 %f2699, %f2675, %f503; - mul.f32 %f2700, %f2699, %f2699; - fma.rn.f32 %f2703, %f2215, %f2700, %f2214; - fma.rn.f32 %f2705, %f2703, %f2700, %f2217; - mul.rn.f32 %f2706, %f2705, %f2700; - mul.rn.f32 %f2707, %f2706, %f2699; - sub.f32 %f2708, %f501, %f2699; - neg.f32 %f2709, %f2699; - add.f32 %f2710, %f2708, %f2708; - fma.rn.f32 %f2711, %f2709, %f501, %f2710; - mul.rn.f32 %f2712, %f2675, %f2711; - add.f32 %f2713, %f2707, %f2699; - sub.f32 %f2714, %f2699, %f2713; - add.f32 %f2715, %f2707, %f2714; - add.f32 %f2716, %f2712, %f2715; - add.f32 %f2717, %f2713, %f2716; - sub.f32 %f2718, %f2713, %f2717; - add.f32 %f2719, %f2716, %f2718; - add.f32 %f2720, %f504, %f2717; - sub.f32 %f2721, %f504, %f2720; - add.f32 %f2722, %f2717, %f2721; - add.f32 %f2723, %f2719, %f2722; - add.f32 %f2724, %f505, %f2723; - add.f32 %f2725, %f2720, %f2724; - sub.f32 %f2726, %f2720, %f2725; - add.f32 %f2727, %f2724, %f2726; - mul.rn.f32 %f2729, %f2060, %f2725; - neg.f32 %f2730, %f2729; - fma.rn.f32 %f2731, %f2060, %f2725, %f2730; - fma.rn.f32 %f2732, %f2060, %f2727, %f2731; - fma.rn.f32 %f2734, %f2246, %f2725, %f2732; - add.rn.f32 %f2735, %f2729, %f2734; - neg.f32 %f2736, %f2735; - add.rn.f32 %f2737, %f2729, %f2736; - add.rn.f32 %f2738, %f2737, %f2734; - mov.b32 %r264, %f2735; - setp.eq.s32 %p318, %r264, 1118925336; - add.s32 %r265, %r264, -1; - mov.b32 %f2739, %r265; - add.f32 %f2740, %f2738, 0f37000000; - selp.f32 %f2741, %f2739, %f2735, %p318; - selp.f32 %f555, %f2740, %f2738, %p318; - mul.f32 %f2742, %f2741, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2743, %f2742; - fma.rn.f32 %f2744, %f2743, %f2257, %f2741; - fma.rn.f32 %f2745, %f2743, %f2259, %f2744; - mul.f32 %f2746, %f2745, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2747, %f2746; - add.f32 %f2748, %f2743, 0f00000000; - ex2.approx.f32 %f2749, %f2748; - mul.f32 %f2750, %f2747, %f2749; - setp.lt.f32 %p319, %f2741, 0fC2D20000; - selp.f32 %f2751, 0f00000000, %f2750, %p319; - setp.gt.f32 %p320, %f2741, 0f42D20000; - selp.f32 %f3385, 0f7F800000, %f2751, %p320; - setp.eq.f32 %p321, %f3385, 0f7F800000; - @%p321 bra BB2_183; - - fma.rn.f32 %f3385, %f3385, %f555, %f3385; - -BB2_183: - mov.b32 %r266, %f3385; - xor.b32 %r267, %r266, -2147483648; - mov.b32 %f2752, %r267; - selp.f32 %f3387, %f2752, %f3385, %p15; - @%p266 bra BB2_186; - bra.uni BB2_184; - -BB2_186: - add.f32 %f2755, %f499, %f499; - selp.f32 %f3387, %f2755, 0f00000000, %p219; - bra.uni BB2_187; - -BB2_184: - setp.geu.f32 %p323, %f499, 0f00000000; - @%p323 bra BB2_187; - - cvt.rzi.f32.f32 %f2754, %f2060; - setp.neu.f32 %p324, %f2754, 0f40000000; - selp.f32 %f3387, 0f7FFFFFFF, %f3387, %p324; - -BB2_187: - @%p270 bra BB2_192; - - setp.gtu.f32 %p327, %f500, 0f7F800000; - @%p327 bra BB2_191; - bra.uni BB2_189; - -BB2_191: - add.f32 %f3387, %f499, 0f40000000; - bra.uni BB2_192; - -BB2_189: - setp.neu.f32 %p328, %f500, 0f7F800000; - @%p328 bra BB2_192; - - selp.f32 %f3387, 0fFF800000, 0f7F800000, %p15; - -BB2_192: - mul.f32 %f2758, %f3387, 0fBF000000; - selp.f32 %f2759, 0fBF000000, %f2758, %p273; - mul.f32 %f2760, %f2759, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2761, %f2760; - fma.rn.f32 %f2763, %f2761, %f2257, %f2759; - fma.rn.f32 %f2765, %f2761, %f2259, %f2763; - mul.f32 %f2766, %f2765, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2767, %f2766; - add.f32 %f2768, %f2761, 0f00000000; - ex2.approx.f32 %f2769, %f2768; - mul.f32 %f2770, %f2767, %f2769; - setp.lt.f32 %p330, %f2759, 0fC2D20000; - selp.f32 %f2771, 0f00000000, %f2770, %p330; - setp.gt.f32 %p331, %f2759, 0f42D20000; - selp.f32 %f566, 0f7F800000, %f2771, %p331; - // inline asm - rcp.approx.ftz.f32 %f2756,%f2440; - // inline asm - mul.f32 %f2772, %f2756, %f522; - mul.f32 %f2773, %f2772, %f2772; - fma.rn.f32 %f2776, %f2215, %f2773, %f2214; - fma.rn.f32 %f2778, %f2776, %f2773, %f2217; - mul.rn.f32 %f2779, %f2778, %f2773; - mul.rn.f32 %f2780, %f2779, %f2772; - sub.f32 %f2781, %f520, %f2772; - neg.f32 %f2782, %f2772; - add.f32 %f2783, %f2781, %f2781; - fma.rn.f32 %f2784, %f2782, %f520, %f2783; - mul.rn.f32 %f2785, %f2756, %f2784; - add.f32 %f2786, %f2780, %f2772; - sub.f32 %f2787, %f2772, %f2786; - add.f32 %f2788, %f2780, %f2787; - add.f32 %f2789, %f2785, %f2788; - add.f32 %f2790, %f2786, %f2789; - sub.f32 %f2791, %f2786, %f2790; - add.f32 %f2792, %f2789, %f2791; - add.f32 %f2793, %f523, %f2790; - sub.f32 %f2794, %f523, %f2793; - add.f32 %f2795, %f2790, %f2794; - add.f32 %f2796, %f2792, %f2795; - add.f32 %f2797, %f524, %f2796; - add.f32 %f2798, %f2793, %f2797; - sub.f32 %f2799, %f2793, %f2798; - add.f32 %f2800, %f2797, %f2799; - mul.rn.f32 %f2802, %f2060, %f2798; - neg.f32 %f2803, %f2802; - fma.rn.f32 %f2804, %f2060, %f2798, %f2803; - fma.rn.f32 %f2805, %f2060, %f2800, %f2804; - fma.rn.f32 %f2807, %f2246, %f2798, %f2805; - add.rn.f32 %f2808, %f2802, %f2807; - neg.f32 %f2809, %f2808; - add.rn.f32 %f2810, %f2802, %f2809; - add.rn.f32 %f2811, %f2810, %f2807; - mov.b32 %r268, %f2808; - setp.eq.s32 %p332, %r268, 1118925336; - add.s32 %r269, %r268, -1; - mov.b32 %f2812, %r269; - add.f32 %f2813, %f2811, 0f37000000; - selp.f32 %f2814, %f2812, %f2808, %p332; - selp.f32 %f567, %f2813, %f2811, %p332; - mul.f32 %f2815, %f2814, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2816, %f2815; - fma.rn.f32 %f2817, %f2816, %f2257, %f2814; - fma.rn.f32 %f2818, %f2816, %f2259, %f2817; - mul.f32 %f2819, %f2818, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2820, %f2819; - add.f32 %f2821, %f2816, 0f00000000; - ex2.approx.f32 %f2822, %f2821; - mul.f32 %f2823, %f2820, %f2822; - setp.lt.f32 %p333, %f2814, 0fC2D20000; - selp.f32 %f2824, 0f00000000, %f2823, %p333; - setp.gt.f32 %p334, %f2814, 0f42D20000; - selp.f32 %f3388, 0f7F800000, %f2824, %p334; - setp.eq.f32 %p335, %f3388, 0f7F800000; - @%p335 bra BB2_194; - - fma.rn.f32 %f3388, %f3388, %f567, %f3388; - -BB2_194: - mov.b32 %r270, %f3388; - xor.b32 %r271, %r270, -2147483648; - mov.b32 %f2825, %r271; - selp.f32 %f3390, %f2825, %f3388, %p16; - @%p284 bra BB2_197; - bra.uni BB2_195; - -BB2_197: - add.f32 %f2828, %f518, %f518; - selp.f32 %f3390, %f2828, 0f00000000, %p219; - bra.uni BB2_198; - -BB2_195: - setp.geu.f32 %p337, %f518, 0f00000000; - @%p337 bra BB2_198; - - cvt.rzi.f32.f32 %f2827, %f2060; - setp.neu.f32 %p338, %f2827, 0f40000000; - selp.f32 %f3390, 0f7FFFFFFF, %f3390, %p338; - -BB2_198: - @%p288 bra BB2_203; - - setp.gtu.f32 %p341, %f519, 0f7F800000; - @%p341 bra BB2_202; - bra.uni BB2_200; - -BB2_202: - add.f32 %f3390, %f518, 0f40000000; - bra.uni BB2_203; - -BB2_200: - setp.neu.f32 %p342, %f519, 0f7F800000; - @%p342 bra BB2_203; - - selp.f32 %f3390, 0fFF800000, 0f7F800000, %p16; - -BB2_203: - mul.f32 %f2829, %f3390, 0fBF000000; - selp.f32 %f2830, 0fBF000000, %f2829, %p291; - mul.f32 %f2831, %f2830, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2832, %f2831; - fma.rn.f32 %f2834, %f2832, %f2257, %f2830; - fma.rn.f32 %f2836, %f2832, %f2259, %f2834; - mul.f32 %f2837, %f2836, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2838, %f2837; - add.f32 %f2839, %f2832, 0f00000000; - ex2.approx.f32 %f2840, %f2839; - mul.f32 %f2841, %f2838, %f2840; - setp.lt.f32 %p344, %f2830, 0fC2D20000; - selp.f32 %f2842, 0f00000000, %f2841, %p344; - setp.gt.f32 %p345, %f2830, 0f42D20000; - selp.f32 %f2843, 0f7F800000, %f2842, %p345; - mul.f32 %f2844, %f468, %f2843; - mul.f32 %f2845, %f469, %f566; - sub.f32 %f2846, %f2845, %f2844; - mul.f32 %f2847, %f426, %f2846; - mul.f32 %f2848, %f466, %f2847; - mul.f32 %f2849, %f2848, %f428; - fma.rn.f32 %f2850, %f554, %f427, %f2849; - mul.f32 %f2851, %f466, %f3347; - fma.rn.f32 %f578, %f480, %f2851, %f3258; - mad.lo.s32 %r272, %r322, %r63, %r321; - add.s32 %r273, %r272, %r4; - mul.wide.s32 %rd73, %r273, 4; - add.s64 %rd74, %rd1, %rd73; - ld.global.f32 %f579, [%rd74]; - mul.f32 %f2852, %f498, %f498; - div.rn.f32 %f2853, %f2852, %f578; - add.f32 %f3365, %f2853, %f3365; - mul.f32 %f2854, %f536, %f498; - div.rn.f32 %f2855, %f2854, %f578; - add.f32 %f3364, %f2855, %f3364; - mul.f32 %f2856, %f466, %f480; - mul.f32 %f2857, %f2856, %f498; - div.rn.f32 %f2858, %f2857, %f578; - add.f32 %f3363, %f2858, %f3363; - div.rn.f32 %f2859, %f498, %f578; - add.f32 %f3362, %f2859, %f3362; - mul.f32 %f2860, %f2850, %f498; - div.rn.f32 %f2861, %f2860, %f578; - add.f32 %f3361, %f2861, %f3361; - mul.f32 %f2862, %f536, %f536; - div.rn.f32 %f2863, %f2862, %f578; - add.f32 %f3360, %f2863, %f3360; - mul.f32 %f2864, %f2856, %f536; - div.rn.f32 %f2865, %f2864, %f578; - add.f32 %f3359, %f2865, %f3359; - div.rn.f32 %f2866, %f536, %f578; - add.f32 %f3358, %f2866, %f3358; - mul.f32 %f2867, %f2850, %f536; - div.rn.f32 %f2868, %f2867, %f578; - add.f32 %f3357, %f2868, %f3357; - mul.f32 %f2869, %f2856, %f2856; - div.rn.f32 %f2870, %f2869, %f578; - add.f32 %f3356, %f2870, %f3356; - div.rn.f32 %f2871, %f2856, %f578; - add.f32 %f3355, %f2871, %f3355; - mul.f32 %f2872, %f2850, %f2856; - div.rn.f32 %f2873, %f2872, %f578; - add.f32 %f3354, %f2873, %f3354; - rcp.rn.f32 %f2874, %f578; - add.f32 %f3353, %f2874, %f3353; - div.rn.f32 %f2875, %f2850, %f578; - add.f32 %f3352, %f2875, %f3352; - mul.f32 %f2876, %f2850, %f2850; - div.rn.f32 %f2877, %f2876, %f578; - add.f32 %f3351, %f2877, %f3351; - setp.leu.f32 %p346, %f578, 0f00000000; - @%p346 bra BB2_211; - - setp.gt.f32 %p347, %f579, 0f00000000; - @%p347 bra BB2_206; - bra.uni BB2_205; - -BB2_206: - setp.lt.f32 %p348, %f578, 0f00800000; - mul.f32 %f2878, %f578, 0f4B000000; - selp.f32 %f596, %f2878, %f578, %p348; - selp.f32 %f2879, 0fC1B80000, 0f00000000, %p348; - mov.b32 %r274, %f596; - add.s32 %r275, %r274, -1059760811; - and.b32 %r276, %r275, -8388608; - sub.s32 %r277, %r274, %r276; - mov.b32 %f2880, %r277; - cvt.rn.f32.s32 %f2881, %r276; - mov.f32 %f2882, 0f34000000; - fma.rn.f32 %f2883, %f2881, %f2882, %f2879; - add.f32 %f2884, %f2880, 0fBF800000; - mov.f32 %f2885, 0f3E1039F6; - mov.f32 %f2886, 0fBE055027; - fma.rn.f32 %f2887, %f2886, %f2884, %f2885; - mov.f32 %f2888, 0fBDF8CDCC; - fma.rn.f32 %f2889, %f2887, %f2884, %f2888; - mov.f32 %f2890, 0f3E0F2955; - fma.rn.f32 %f2891, %f2889, %f2884, %f2890; - mov.f32 %f2892, 0fBE2AD8B9; - fma.rn.f32 %f2893, %f2891, %f2884, %f2892; - mov.f32 %f2894, 0f3E4CED0B; - fma.rn.f32 %f2895, %f2893, %f2884, %f2894; - mov.f32 %f2896, 0fBE7FFF22; - fma.rn.f32 %f2897, %f2895, %f2884, %f2896; - mov.f32 %f2898, 0f3EAAAA78; - fma.rn.f32 %f2899, %f2897, %f2884, %f2898; - mov.f32 %f2900, 0fBF000000; - fma.rn.f32 %f2901, %f2899, %f2884, %f2900; - mul.f32 %f2902, %f2884, %f2901; - fma.rn.f32 %f2903, %f2902, %f2884, %f2884; - mov.f32 %f2904, 0f3F317218; - fma.rn.f32 %f3391, %f2883, %f2904, %f2903; - setp.lt.u32 %p349, %r274, 2139095040; - @%p349 bra BB2_208; + add.f32 %f1639, %f1633, 0f4B40007F; + mov.b32 %r785, %f1639; + shl.b32 %r786, %r785, 23; + mov.b32 %f1640, %r786; + ex2.approx.ftz.f32 %f1641, %f1638; + mul.f32 %f249, %f1641, %f1640; + setp.eq.f32 %p664, %f249, 0f7F800000; + mov.f32 %f3256, 0f7F800000; + @%p664 bra $L__BB2_408; + + fma.rn.f32 %f3256, %f249, %f248, %f249; + +$L__BB2_408: + mov.f32 %f3044, 0f3F400000; + cvt.rzi.f32.f32 %f3043, %f3044; + add.f32 %f3042, %f3043, %f3043; + mov.f32 %f3041, 0f3FC00000; + sub.f32 %f3040, %f3041, %f3042; + abs.f32 %f3039, %f3040; + setp.lt.f32 %p665, %f96, 0f00000000; + setp.eq.f32 %p666, %f3039, 0f3F800000; + and.pred %p44, %p665, %p666; + setp.eq.f32 %p667, %f96, 0f00000000; + @%p667 bra $L__BB2_412; + bra.uni $L__BB2_409; + +$L__BB2_412: + add.f32 %f1646, %f96, %f96; + selp.f32 %f3258, %f1646, 0f00000000, %p666; + bra.uni $L__BB2_413; + +$L__BB2_409: + mov.b32 %r787, %f3256; + xor.b32 %r788, %r787, -2147483648; + mov.b32 %f1642, %r788; + selp.f32 %f3258, %f1642, %f3256, %p44; + setp.geu.f32 %p668, %f96, 0f00000000; + @%p668 bra $L__BB2_413; + + mov.f32 %f3134, 0f3FC00000; + cvt.rzi.f32.f32 %f1644, %f3134; + setp.eq.f32 %p669, %f1644, 0f3FC00000; + @%p669 bra $L__BB2_413; + + mov.f32 %f3258, 0f7FFFFFFF; + +$L__BB2_413: + abs.f32 %f3156, %f96; + add.f32 %f1647, %f3156, 0f3FC00000; + mov.b32 %r789, %f1647; + setp.lt.s32 %p671, %r789, 2139095040; + @%p671 bra $L__BB2_418; + + abs.f32 %f3170, %f96; + setp.gtu.f32 %p672, %f3170, 0f7F800000; + @%p672 bra $L__BB2_417; + bra.uni $L__BB2_415; + +$L__BB2_417: + add.f32 %f3258, %f96, 0f3FC00000; + bra.uni $L__BB2_418; + +$L__BB2_415: + abs.f32 %f3171, %f96; + setp.neu.f32 %p673, %f3171, 0f7F800000; + @%p673 bra $L__BB2_418; + + selp.f32 %f3258, 0fFF800000, 0f7F800000, %p44; + +$L__BB2_418: + mov.f32 %f3052, 0f3FC00000; + mov.f32 %f3051, 0f3102E308; + mov.f32 %f3050, 0fBF317218; + mov.f32 %f3049, 0f35BFBE8E; + mov.f32 %f3048, 0f3F317200; + mov.f32 %f3047, 0f3DAAAABD; + mov.f32 %f3046, 0f3C4CAF63; + mov.f32 %f3045, 0f3B18F0FE; + setp.eq.f32 %p674, %f96, 0f3F800000; + selp.f32 %f1649, 0f3F800000, %f3258, %p674; + div.rn.f32 %f258, %f55, %f1649; + abs.f32 %f259, %f97; + setp.lt.f32 %p675, %f259, 0f00800000; + mul.f32 %f1650, %f259, 0f4B800000; + selp.f32 %f1651, %f1650, %f259, %p675; + selp.f32 %f1652, 0fC3170000, 0fC2FE0000, %p675; + mov.b32 %r790, %f1651; + and.b32 %r791, %r790, 8388607; + or.b32 %r792, %r791, 1065353216; + mov.b32 %f1653, %r792; + shr.u32 %r793, %r790, 23; + cvt.rn.f32.u32 %f1654, %r793; + add.f32 %f1655, %f1652, %f1654; + setp.gt.f32 %p676, %f1653, 0f3FB504F3; + mul.f32 %f1656, %f1653, 0f3F000000; + add.f32 %f1657, %f1655, 0f3F800000; + selp.f32 %f1658, %f1657, %f1655, %p676; + selp.f32 %f1659, %f1656, %f1653, %p676; + add.f32 %f1660, %f1659, 0fBF800000; + add.f32 %f1661, %f1659, 0f3F800000; + rcp.approx.ftz.f32 %f1662, %f1661; + add.f32 %f1663, %f1660, %f1660; + mul.f32 %f1664, %f1663, %f1662; + mul.f32 %f1665, %f1664, %f1664; + fma.rn.f32 %f1668, %f3045, %f1665, %f3046; + fma.rn.f32 %f1670, %f1668, %f1665, %f3047; + mul.rn.f32 %f1671, %f1670, %f1665; + mul.rn.f32 %f1672, %f1671, %f1664; + sub.f32 %f1673, %f1660, %f1664; + add.f32 %f1674, %f1673, %f1673; + neg.f32 %f1675, %f1664; + fma.rn.f32 %f1676, %f1675, %f1660, %f1674; + mul.rn.f32 %f1677, %f1662, %f1676; + add.f32 %f1678, %f1672, %f1664; + sub.f32 %f1679, %f1664, %f1678; + add.f32 %f1680, %f1672, %f1679; + add.f32 %f1681, %f1677, %f1680; + add.f32 %f1682, %f1678, %f1681; + sub.f32 %f1683, %f1678, %f1682; + add.f32 %f1684, %f1681, %f1683; + mul.rn.f32 %f1686, %f1658, %f3048; + mul.rn.f32 %f1688, %f1658, %f3049; + add.f32 %f1689, %f1686, %f1682; + sub.f32 %f1690, %f1686, %f1689; + add.f32 %f1691, %f1682, %f1690; + add.f32 %f1692, %f1684, %f1691; + add.f32 %f1693, %f1688, %f1692; + add.f32 %f1694, %f1689, %f1693; + sub.f32 %f1695, %f1689, %f1694; + add.f32 %f1696, %f1693, %f1695; + mul.rn.f32 %f1698, %f3052, %f1694; + neg.f32 %f1699, %f1698; + fma.rn.f32 %f1700, %f3052, %f1694, %f1699; + fma.rn.f32 %f1701, %f3052, %f1696, %f1700; + fma.rn.f32 %f1703, %f3263, %f1694, %f1701; + add.rn.f32 %f1704, %f1698, %f1703; + neg.f32 %f1705, %f1704; + add.rn.f32 %f1706, %f1698, %f1705; + add.rn.f32 %f1707, %f1706, %f1703; + mov.b32 %r794, %f1704; + setp.eq.s32 %p677, %r794, 1118925336; + add.s32 %r795, %r794, -1; + mov.b32 %f1708, %r795; + add.f32 %f1709, %f1707, 0f37000000; + selp.f32 %f260, %f1709, %f1707, %p677; + selp.f32 %f1710, %f1708, %f1704, %p677; + mul.rn.f32 %f1712, %f1710, %f800; + cvt.rzi.f32.f32 %f1713, %f1712; + abs.f32 %f1714, %f1713; + setp.gt.f32 %p678, %f1714, 0f42FC0000; + mov.b32 %r796, %f1713; + and.b32 %r797, %r796, -2147483648; + or.b32 %r798, %r797, 1123811328; + mov.b32 %f1715, %r798; + selp.f32 %f1716, %f1715, %f1713, %p678; + fma.rn.f32 %f1718, %f1716, %f3050, %f1710; + fma.rn.f32 %f1720, %f1716, %f3051, %f1718; + mul.f32 %f1721, %f1720, 0f3FB8AA3B; + add.f32 %f1722, %f1716, 0f4B40007F; + mov.b32 %r799, %f1722; + shl.b32 %r800, %r799, 23; + mov.b32 %f1723, %r800; + ex2.approx.ftz.f32 %f1724, %f1721; + mul.f32 %f261, %f1724, %f1723; + setp.eq.f32 %p679, %f261, 0f7F800000; + mov.f32 %f3259, 0f7F800000; + @%p679 bra $L__BB2_420; + + fma.rn.f32 %f3259, %f261, %f260, %f261; + +$L__BB2_420: + setp.lt.f32 %p680, %f97, 0f00000000; + and.pred %p45, %p680, %p666; + setp.eq.f32 %p682, %f97, 0f00000000; + @%p682 bra $L__BB2_424; + bra.uni $L__BB2_421; + +$L__BB2_424: + add.f32 %f1729, %f97, %f97; + selp.f32 %f3261, %f1729, 0f00000000, %p666; + bra.uni $L__BB2_425; + +$L__BB2_421: + mov.b32 %r801, %f3259; + xor.b32 %r802, %r801, -2147483648; + mov.b32 %f1725, %r802; + selp.f32 %f3261, %f1725, %f3259, %p45; + setp.geu.f32 %p683, %f97, 0f00000000; + @%p683 bra $L__BB2_425; + + mov.f32 %f3133, 0f3FC00000; + cvt.rzi.f32.f32 %f1727, %f3133; + setp.eq.f32 %p684, %f1727, 0f3FC00000; + @%p684 bra $L__BB2_425; + + mov.f32 %f3261, 0f7FFFFFFF; + +$L__BB2_425: + abs.f32 %f3172, %f97; + add.f32 %f1730, %f3172, 0f3FC00000; + mov.b32 %r803, %f1730; + setp.lt.s32 %p686, %r803, 2139095040; + @%p686 bra $L__BB2_430; + + abs.f32 %f3173, %f97; + setp.gtu.f32 %p687, %f3173, 0f7F800000; + @%p687 bra $L__BB2_429; + bra.uni $L__BB2_427; + +$L__BB2_429: + add.f32 %f3261, %f97, 0f3FC00000; + bra.uni $L__BB2_430; + +$L__BB2_427: + abs.f32 %f3174, %f97; + setp.neu.f32 %p688, %f3174, 0f7F800000; + @%p688 bra $L__BB2_430; + + selp.f32 %f3261, 0fFF800000, 0f7F800000, %p45; + +$L__BB2_430: + cvt.rn.f32.f64 %f3157, %fd806; + setp.eq.f32 %p689, %f97, 0f3F800000; + selp.f32 %f270, 0f3F800000, %f3261, %p689; + cvt.f64.f32 %fd252, %f3157; + { + .reg .b32 %temp; + mov.b64 {%temp, %r108}, %fd252; + } + abs.f64 %fd253, %fd252; + { // callseq 46, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd253; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1141, [retval0+0]; + } // callseq 46 + setp.lt.s32 %p690, %r108, 0; + and.pred %p46, %p690, %p144; + not.pred %p692, %p46; + @%p692 bra $L__BB2_432; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r804}, %fd1141; + } + xor.b32 %r805, %r804, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r806, %temp}, %fd1141; + } + mov.b64 %fd1141, {%r806, %r805}; + +$L__BB2_432: + cvt.rn.f32.f64 %f3158, %fd806; + setp.eq.f32 %p693, %f3158, 0f00000000; + @%p693 bra $L__BB2_436; + bra.uni $L__BB2_433; + +$L__BB2_436: + mov.u32 %r807, 0; + selp.b32 %r808, %r108, 0, %p144; + or.b32 %r809, %r808, 2146435072; + selp.b32 %r810, %r809, %r808, %p146; + mov.b64 %fd1141, {%r807, %r810}; + bra.uni $L__BB2_437; + +$L__BB2_433: + setp.gt.s32 %p694, %r108, -1; + @%p694 bra $L__BB2_437; + + cvt.rzi.f64.f64 %fd830, %fd644; + setp.eq.f64 %p695, %fd830, 0d4000000000000000; + @%p695 bra $L__BB2_437; + + mov.f64 %fd1141, 0dFFF8000000000000; + +$L__BB2_437: + add.f64 %fd259, %fd252, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r811}, %fd259; + } + and.b32 %r812, %r811, 2146435072; + setp.ne.s32 %p698, %r812, 2146435072; + mov.f64 %fd1142, %fd1141; + @%p698 bra $L__BB2_443; + + setp.gtu.f64 %p699, %fd253, 0d7FF0000000000000; + mov.f64 %fd1142, %fd259; + @%p699 bra $L__BB2_443; + + setp.eq.s32 %p700, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r813, %temp}, %fd644; + } + setp.eq.s32 %p701, %r813, 0; + and.pred %p702, %p700, %p701; + @%p702 bra $L__BB2_442; + bra.uni $L__BB2_440; + +$L__BB2_442: + cvt.rn.f32.f64 %f3169, %fd806; + mov.u32 %r818, 0; + setp.gt.f64 %p709, %fd253, 0d3FF0000000000000; + selp.b32 %r819, 2146435072, 0, %p709; + xor.b32 %r820, %r819, 2146435072; + selp.b32 %r821, %r820, %r819, %p146; + setp.eq.f32 %p710, %f3169, 0fBF800000; + selp.b32 %r822, 1072693248, %r821, %p710; + mov.b64 %fd1142, {%r818, %r822}; + bra.uni $L__BB2_443; + +$L__BB2_440: + { + .reg .b32 %temp; + mov.b64 {%r814, %temp}, %fd252; + } + and.b32 %r815, %r108, 2147483647; + setp.ne.s32 %p703, %r815, 2146435072; + setp.ne.s32 %p704, %r814, 0; + or.pred %p705, %p703, %p704; + mov.f64 %fd1142, %fd1141; + @%p705 bra $L__BB2_443; + + and.pred %p707, %p155, %p46; + selp.b32 %r816, %r57, %r56, %p707; + mov.u32 %r817, 0; + mov.b64 %fd1142, {%r817, %r816}; + +$L__BB2_443: + cvt.rn.f32.f64 %f3159, %fd806; + not.pred %p1289, %p12; + setp.eq.f32 %p711, %f3159, 0f3F800000; + selp.f64 %fd833, 0d3FF0000000000000, %fd1142, %p711; + cvt.f64.f32 %fd834, %f258; + mul.f64 %fd263, %fd833, %fd834; + mov.f64 %fd1144, %fd64; + @%p1289 bra $L__BB2_445; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r823}, %fd64; + } + xor.b32 %r824, %r823, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r825, %temp}, %fd64; + } + mov.b64 %fd1144, {%r825, %r824}; + +$L__BB2_445: + @%p582 bra $L__BB2_449; + bra.uni $L__BB2_446; + +$L__BB2_449: + mov.u32 %r826, 0; + mov.b64 %fd1144, {%r826, %r84}; + bra.uni $L__BB2_450; + +$L__BB2_446: + setp.gt.s32 %p714, %r83, -1; + @%p714 bra $L__BB2_450; + + cvt.rzi.f64.f64 %fd836, %fd644; + setp.eq.f64 %p715, %fd836, 0d4000000000000000; + @%p715 bra $L__BB2_450; + + mov.f64 %fd1144, 0dFFF8000000000000; + +$L__BB2_450: + selp.f64 %fd1145, %fd1144, %fd46, %p182; + @%p25 bra $L__BB2_455; + + setp.eq.s32 %p717, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r827, %temp}, %fd644; + } + setp.eq.s32 %p718, %r827, 0; + and.pred %p719, %p717, %p718; + @%p719 bra $L__BB2_454; + bra.uni $L__BB2_452; + +$L__BB2_454: + mov.u32 %r831, 0; + mov.b64 %fd1145, {%r831, %r86}; + bra.uni $L__BB2_455; + +$L__BB2_452: + and.b32 %r828, %r83, 2147483647; + setp.ne.s32 %p720, %r828, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r829, %temp}, %fd45; + } + setp.ne.s32 %p721, %r829, 0; + or.pred %p722, %p720, %p721; + mov.f64 %fd1145, %fd1144; + @%p722 bra $L__BB2_455; + + mov.u32 %r830, 0; + mov.b64 %fd1145, {%r830, %r88}; + +$L__BB2_455: + mov.f64 %fd1147, %fd66; + @%p607 bra $L__BB2_457; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r832}, %fd66; + } + xor.b32 %r833, %r832, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r834, %temp}, %fd66; + } + mov.b64 %fd1147, {%r834, %r833}; + +$L__BB2_457: + @%p608 bra $L__BB2_461; + bra.uni $L__BB2_458; + +$L__BB2_461: + mov.u32 %r835, 0; + mov.b64 %fd1147, {%r835, %r91}; + bra.uni $L__BB2_462; + +$L__BB2_458: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1351}, %fd643; + } + setp.gt.s32 %p725, %r1351, -1; + @%p725 bra $L__BB2_462; + + cvt.rzi.f64.f64 %fd840, %fd649; + setp.eq.f64 %p726, %fd840, 0d4010000000000000; + @%p726 bra $L__BB2_462; + + mov.f64 %fd1147, 0dFFF8000000000000; + +$L__BB2_462: + selp.f64 %fd1148, %fd1147, %fd36, %p189; + @%p27 bra $L__BB2_467; + + setp.eq.s32 %p728, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r836, %temp}, %fd649; + } + setp.eq.s32 %p729, %r836, 0; + and.pred %p730, %p728, %p729; + @%p730 bra $L__BB2_466; + bra.uni $L__BB2_464; + +$L__BB2_466: + mov.u32 %r840, 0; + mov.b64 %fd1148, {%r840, %r94}; + bra.uni $L__BB2_467; + +$L__BB2_464: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1352}, %fd643; + } + and.b32 %r837, %r1352, 2147483647; + setp.ne.s32 %p731, %r837, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r838, %temp}, %fd643; + } + setp.ne.s32 %p732, %r838, 0; + or.pred %p733, %p731, %p732; + mov.f64 %fd1148, %fd1147; + @%p733 bra $L__BB2_467; - mov.f32 %f2905, 0f7F800000; - fma.rn.f32 %f3391, %f596, %f2905, %f2905; - -BB2_208: - setp.eq.f32 %p350, %f596, 0f00000000; - selp.f32 %f2906, 0fFF800000, %f3391, %p350; - mul.f32 %f2907, %f579, %f2906; - sub.f32 %f600, %f2907, %f578; - mul.f32 %f2908, %f579, 0f4B000000; - setp.lt.f32 %p351, %f579, 0f00800000; - selp.f32 %f601, %f2908, %f579, %p351; - selp.f32 %f2909, 0fC1B80000, 0f00000000, %p351; - mov.b32 %r278, %f601; - add.s32 %r279, %r278, -1059760811; - and.b32 %r280, %r279, -8388608; - sub.s32 %r281, %r278, %r280; - mov.b32 %f2910, %r281; - cvt.rn.f32.s32 %f2911, %r280; - fma.rn.f32 %f2913, %f2911, %f2882, %f2909; - add.f32 %f2914, %f2910, 0fBF800000; - fma.rn.f32 %f2917, %f2886, %f2914, %f2885; - fma.rn.f32 %f2919, %f2917, %f2914, %f2888; - fma.rn.f32 %f2921, %f2919, %f2914, %f2890; - fma.rn.f32 %f2923, %f2921, %f2914, %f2892; - fma.rn.f32 %f2925, %f2923, %f2914, %f2894; - fma.rn.f32 %f2927, %f2925, %f2914, %f2896; - fma.rn.f32 %f2929, %f2927, %f2914, %f2898; - fma.rn.f32 %f2931, %f2929, %f2914, %f2900; - mul.f32 %f2932, %f2914, %f2931; - fma.rn.f32 %f2933, %f2932, %f2914, %f2914; - fma.rn.f32 %f3392, %f2913, %f2904, %f2933; - setp.lt.u32 %p352, %r278, 2139095040; - @%p352 bra BB2_210; + mov.u32 %r839, 0; + mov.b64 %fd1148, {%r839, %r97}; + +$L__BB2_467: + cvt.rn.f32.f64 %f3161, %fd827; + sub.f32 %f3160, %f3274, %f540; + setp.eq.f32 %p1290, %f3160, 0f3F800000; + selp.f64 %fd844, 0d3FF0000000000000, %fd1148, %p618; + selp.f64 %fd845, 0d3FF0000000000000, %fd1145, %p1290; + mul.f64 %fd846, %fd845, %fd39; + div.rn.f64 %fd847, %fd846, %fd844; + add.f64 %fd848, %fd847, %fd52; + cvt.rn.f32.f64 %f1731, %fd848; + mul.f32 %f1732, %f240, %f1731; + cvt.f64.f32 %fd849, %f1732; + add.f64 %fd280, %fd263, %fd849; + cvt.f64.f32 %fd281, %f3161; + { + .reg .b32 %temp; + mov.b64 {%temp, %r109}, %fd281; + } + abs.f64 %fd282, %fd281; + { // callseq 47, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd282; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1150, [retval0+0]; + } // callseq 47 + setp.lt.s32 %p736, %r109, 0; + and.pred %p47, %p736, %p144; + not.pred %p738, %p47; + @%p738 bra $L__BB2_469; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r841}, %fd1150; + } + xor.b32 %r842, %r841, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r843, %temp}, %fd1150; + } + mov.b64 %fd1150, {%r843, %r842}; + +$L__BB2_469: + cvt.rn.f32.f64 %f3162, %fd827; + setp.eq.f32 %p739, %f3162, 0f00000000; + @%p739 bra $L__BB2_473; + bra.uni $L__BB2_470; + +$L__BB2_473: + mov.u32 %r844, 0; + selp.b32 %r845, %r109, 0, %p144; + or.b32 %r846, %r845, 2146435072; + selp.b32 %r847, %r846, %r845, %p146; + mov.b64 %fd1150, {%r844, %r847}; + bra.uni $L__BB2_474; + +$L__BB2_470: + setp.gt.s32 %p740, %r109, -1; + @%p740 bra $L__BB2_474; + + cvt.rzi.f64.f64 %fd852, %fd644; + setp.eq.f64 %p741, %fd852, 0d4000000000000000; + @%p741 bra $L__BB2_474; + + mov.f64 %fd1150, 0dFFF8000000000000; + +$L__BB2_474: + add.f64 %fd288, %fd281, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r848}, %fd288; + } + and.b32 %r849, %r848, 2146435072; + setp.ne.s32 %p744, %r849, 2146435072; + mov.f64 %fd1151, %fd1150; + @%p744 bra $L__BB2_480; + + setp.gtu.f64 %p745, %fd282, 0d7FF0000000000000; + mov.f64 %fd1151, %fd288; + @%p745 bra $L__BB2_480; + + setp.eq.s32 %p746, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r850, %temp}, %fd644; + } + setp.eq.s32 %p747, %r850, 0; + and.pred %p748, %p746, %p747; + @%p748 bra $L__BB2_479; + bra.uni $L__BB2_477; + +$L__BB2_479: + cvt.rn.f32.f64 %f3168, %fd827; + mov.u32 %r855, 0; + setp.gt.f64 %p755, %fd282, 0d3FF0000000000000; + selp.b32 %r856, 2146435072, 0, %p755; + xor.b32 %r857, %r856, 2146435072; + selp.b32 %r858, %r857, %r856, %p146; + setp.eq.f32 %p756, %f3168, 0fBF800000; + selp.b32 %r859, 1072693248, %r858, %p756; + mov.b64 %fd1151, {%r855, %r859}; + bra.uni $L__BB2_480; + +$L__BB2_477: + { + .reg .b32 %temp; + mov.b64 {%r851, %temp}, %fd281; + } + and.b32 %r852, %r109, 2147483647; + setp.ne.s32 %p749, %r852, 2146435072; + setp.ne.s32 %p750, %r851, 0; + or.pred %p751, %p749, %p750; + mov.f64 %fd1151, %fd1150; + @%p751 bra $L__BB2_480; + + and.pred %p753, %p155, %p47; + selp.b32 %r853, %r57, %r56, %p753; + mov.u32 %r854, 0; + mov.b64 %fd1151, {%r854, %r853}; + +$L__BB2_480: + cvt.rn.f32.f64 %f3163, %fd827; + not.pred %p1291, %p15; + setp.eq.f32 %p757, %f3163, 0f3F800000; + selp.f64 %fd855, 0d3FF0000000000000, %fd1151, %p757; + div.rn.f32 %f1733, %f56, %f270; + cvt.f64.f32 %fd856, %f1733; + mul.f64 %fd292, %fd855, %fd856; + mov.f64 %fd1153, %fd67; + @%p1291 bra $L__BB2_482; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r860}, %fd67; + } + xor.b32 %r861, %r860, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r862, %temp}, %fd67; + } + mov.b64 %fd1153, {%r862, %r861}; + +$L__BB2_482: + @%p621 bra $L__BB2_486; + bra.uni $L__BB2_483; + +$L__BB2_486: + mov.u32 %r863, 0; + mov.b64 %fd1153, {%r863, %r95}; + bra.uni $L__BB2_487; + +$L__BB2_483: + setp.gt.s32 %p760, %r93, -1; + @%p760 bra $L__BB2_487; + + cvt.rzi.f64.f64 %fd858, %fd644; + setp.eq.f64 %p761, %fd858, 0d4000000000000000; + @%p761 bra $L__BB2_487; + + mov.f64 %fd1153, 0dFFF8000000000000; + +$L__BB2_487: + selp.f64 %fd1154, %fd1153, %fd50, %p194; + @%p28 bra $L__BB2_492; + + setp.eq.s32 %p763, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r864, %temp}, %fd644; + } + setp.eq.s32 %p764, %r864, 0; + and.pred %p765, %p763, %p764; + @%p765 bra $L__BB2_491; + bra.uni $L__BB2_489; + +$L__BB2_491: + mov.u32 %r868, 0; + mov.b64 %fd1154, {%r868, %r98}; + bra.uni $L__BB2_492; + +$L__BB2_489: + and.b32 %r865, %r93, 2147483647; + setp.ne.s32 %p766, %r865, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r866, %temp}, %fd49; + } + setp.ne.s32 %p767, %r866, 0; + or.pred %p768, %p766, %p767; + mov.f64 %fd1154, %fd1153; + @%p768 bra $L__BB2_492; + + mov.u32 %r867, 0; + mov.b64 %fd1154, {%r867, %r100}; + +$L__BB2_492: + mov.f64 %fd1156, %fd66; + @%p607 bra $L__BB2_494; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r869}, %fd66; + } + xor.b32 %r870, %r869, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r871, %temp}, %fd66; + } + mov.b64 %fd1156, {%r871, %r870}; + +$L__BB2_494: + @%p608 bra $L__BB2_498; + bra.uni $L__BB2_495; + +$L__BB2_498: + mov.u32 %r872, 0; + mov.b64 %fd1156, {%r872, %r91}; + bra.uni $L__BB2_499; + +$L__BB2_495: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1353}, %fd643; + } + setp.gt.s32 %p771, %r1353, -1; + @%p771 bra $L__BB2_499; + + cvt.rzi.f64.f64 %fd862, %fd649; + setp.eq.f64 %p772, %fd862, 0d4010000000000000; + @%p772 bra $L__BB2_499; + + mov.f64 %fd1156, 0dFFF8000000000000; + +$L__BB2_499: + selp.f64 %fd1157, %fd1156, %fd36, %p189; + @%p27 bra $L__BB2_504; + + setp.eq.s32 %p774, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r873, %temp}, %fd649; + } + setp.eq.s32 %p775, %r873, 0; + and.pred %p776, %p774, %p775; + @%p776 bra $L__BB2_503; + bra.uni $L__BB2_501; + +$L__BB2_503: + mov.u32 %r877, 0; + mov.b64 %fd1157, {%r877, %r94}; + bra.uni $L__BB2_504; + +$L__BB2_501: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1354}, %fd643; + } + and.b32 %r874, %r1354, 2147483647; + setp.ne.s32 %p777, %r874, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r875, %temp}, %fd643; + } + setp.ne.s32 %p778, %r875, 0; + or.pred %p779, %p777, %p778; + mov.f64 %fd1157, %fd1156; + @%p779 bra $L__BB2_504; - mov.f32 %f2935, 0f7F800000; - fma.rn.f32 %f3392, %f601, %f2935, %f2935; - -BB2_210: - setp.eq.f32 %p353, %f601, 0f00000000; - selp.f32 %f2936, 0fFF800000, %f3392, %p353; - mul.f32 %f2937, %f579, %f2936; - sub.f32 %f2938, %f600, %f2937; - add.f32 %f2939, %f579, %f2938; - add.f32 %f3393, %f3393, %f2939; - bra.uni BB2_211; - -BB2_205: - sub.f32 %f3393, %f3393, %f578; - -BB2_211: - add.s32 %r322, %r322, 1; - setp.lt.s32 %p354, %r322, %r63; - @%p354 bra BB2_131; - - st.local.f32 [%rd2], %f3365; - st.local.f32 [%rd2+4], %f3364; - st.local.f32 [%rd2+20], %f3364; - st.local.f32 [%rd2+8], %f3363; - st.local.f32 [%rd2+40], %f3363; - st.local.f32 [%rd2+12], %f3362; - st.local.f32 [%rd2+60], %f3362; - st.local.f32 [%rd2+16], %f3361; - st.local.f32 [%rd2+80], %f3361; - st.local.f32 [%rd2+24], %f3360; - st.local.f32 [%rd2+28], %f3359; - st.local.f32 [%rd2+44], %f3359; - st.local.f32 [%rd2+32], %f3358; - st.local.f32 [%rd2+64], %f3358; - st.local.f32 [%rd2+36], %f3357; - st.local.f32 [%rd2+84], %f3357; - st.local.f32 [%rd2+48], %f3356; - st.local.f32 [%rd2+52], %f3355; - st.local.f32 [%rd2+68], %f3355; - st.local.f32 [%rd2+56], %f3354; - st.local.f32 [%rd2+88], %f3354; - st.local.f32 [%rd2+72], %f3353; - st.local.f32 [%rd2+76], %f3352; - st.local.f32 [%rd2+92], %f3352; - st.local.f32 [%rd2+96], %f3351; - add.s32 %r321, %r321, 1; - setp.lt.s32 %p355, %r321, %r63; - @%p355 bra BB2_130; - -BB2_213: - mov.f32 %f3395, 0f00000000; - ld.local.f32 %f2941, [%rd2]; - rcp.rn.f32 %f608, %f2941; - ld.local.f32 %f2942, [%rd2+4]; - mul.f32 %f609, %f608, %f2942; - ld.local.f32 %f2943, [%rd2+8]; - ld.local.f32 %f2944, [%rd2+12]; - ld.local.f32 %f2945, [%rd2+16]; - ld.local.f32 %f2946, [%rd2+20]; - ld.local.f32 %f2947, [%rd2+24]; - ld.local.f32 %f2948, [%rd2+28]; - ld.local.f32 %f2949, [%rd2+32]; - ld.local.f32 %f2950, [%rd2+36]; - ld.local.f32 %f2951, [%rd2+40]; - ld.local.f32 %f2952, [%rd2+44]; - st.local.f32 [%rd2+4], %f609; - mul.f32 %f610, %f608, %f2943; - st.local.f32 [%rd2+8], %f610; - mul.f32 %f611, %f608, %f2944; - st.local.f32 [%rd2+12], %f611; - mul.f32 %f612, %f608, %f2945; - st.local.f32 [%rd2+16], %f612; - ld.local.f32 %f2953, [%rd2+4]; - fma.rn.f32 %f2954, %f2953, %f2946, 0f00000000; - sub.f32 %f2955, %f2947, %f2954; - ld.local.f32 %f613, [%rd2+20]; - st.local.f32 [%rd2+24], %f2955; - fma.rn.f32 %f2956, %f610, %f613, 0f00000000; - rcp.rn.f32 %f614, %f2955; - sub.f32 %f2957, %f2948, %f2956; - mul.f32 %f615, %f614, %f2957; - st.local.f32 [%rd2+28], %f615; - fma.rn.f32 %f2958, %f611, %f613, 0f00000000; - sub.f32 %f2959, %f2949, %f2958; - mul.f32 %f616, %f614, %f2959; - st.local.f32 [%rd2+32], %f616; - fma.rn.f32 %f2960, %f612, %f613, 0f00000000; - sub.f32 %f2961, %f2950, %f2960; - mul.f32 %f617, %f614, %f2961; - st.local.f32 [%rd2+36], %f617; - ld.local.f32 %f2962, [%rd2+4]; - fma.rn.f32 %f2963, %f2962, %f2951, 0f00000000; - sub.f32 %f618, %f2952, %f2963; - st.local.f32 [%rd2+44], %f618; - add.s64 %rd96, %rd2, 40; - add.s64 %rd95, %rd2, 8; - mov.u32 %r323, -1; - -BB2_214: - ld.local.f32 %f2964, [%rd96]; - ld.local.f32 %f2965, [%rd95]; - fma.rn.f32 %f3395, %f2965, %f2964, %f3395; - add.s64 %rd96, %rd96, 4; - add.s64 %rd95, %rd95, 20; - add.s32 %r323, %r323, 1; - setp.lt.s32 %p356, %r323, 1; - @%p356 bra BB2_214; - - ld.local.f32 %f2967, [%rd2+48]; - sub.f32 %f2968, %f2967, %f3395; - ld.local.f32 %f621, [%rd2+40]; - ld.local.f32 %f2969, [%rd2+52]; - ld.local.f32 %f2970, [%rd2+56]; - ld.local.f32 %f2971, [%rd2+60]; - ld.local.f32 %f2972, [%rd2+4]; - ld.local.f32 %f2973, [%rd2+64]; - st.local.f32 [%rd2+48], %f2968; - fma.rn.f32 %f2974, %f611, %f621, 0f00000000; - fma.rn.f32 %f2975, %f616, %f618, %f2974; - rcp.rn.f32 %f622, %f2968; - sub.f32 %f2976, %f2969, %f2975; - mul.f32 %f623, %f622, %f2976; - st.local.f32 [%rd2+52], %f623; - fma.rn.f32 %f2977, %f612, %f621, 0f00000000; - fma.rn.f32 %f2978, %f617, %f618, %f2977; - sub.f32 %f2979, %f2970, %f2978; - mul.f32 %f624, %f622, %f2979; - st.local.f32 [%rd2+56], %f624; - fma.rn.f32 %f2980, %f2972, %f2971, 0f00000000; - sub.f32 %f625, %f2973, %f2980; - st.local.f32 [%rd2+64], %f625; - add.s64 %rd98, %rd2, 60; - add.s64 %rd97, %rd2, 8; - mov.f32 %f3396, 0f00000000; - mov.u32 %r324, -1; - -BB2_216: - ld.local.f32 %f2981, [%rd98]; - ld.local.f32 %f2982, [%rd97]; - fma.rn.f32 %f3396, %f2982, %f2981, %f3396; - add.s64 %rd98, %rd98, 4; - add.s64 %rd97, %rd97, 20; - add.s32 %r324, %r324, 1; - setp.lt.s32 %p357, %r324, 1; - @%p357 bra BB2_216; - - ld.local.f32 %f2984, [%rd2+68]; - sub.f32 %f628, %f2984, %f3396; - st.local.f32 [%rd2+68], %f628; - add.s64 %rd100, %rd2, 60; - add.s64 %rd99, %rd2, 12; - mov.f32 %f3397, 0f00000000; - mov.u32 %r325, -1; - -BB2_218: - ld.local.f32 %f2985, [%rd100]; - ld.local.f32 %f2986, [%rd99]; - fma.rn.f32 %f3397, %f2986, %f2985, %f3397; - add.s64 %rd100, %rd100, 4; - add.s64 %rd99, %rd99, 20; - add.s32 %r325, %r325, 1; - setp.lt.s32 %p358, %r325, 2; - @%p358 bra BB2_218; - - ld.local.f32 %f2988, [%rd2+72]; - sub.f32 %f2989, %f2988, %f3397; - ld.local.f32 %f631, [%rd2+60]; - ld.local.f32 %f2990, [%rd2+76]; - ld.local.f32 %f2991, [%rd2+80]; - ld.local.f32 %f2992, [%rd2+4]; - ld.local.f32 %f2993, [%rd2+84]; - st.local.f32 [%rd2+72], %f2989; - fma.rn.f32 %f2994, %f612, %f631, 0f00000000; - fma.rn.f32 %f2995, %f617, %f625, %f2994; - fma.rn.f32 %f2996, %f624, %f628, %f2995; - rcp.rn.f32 %f632, %f2989; - sub.f32 %f2997, %f2990, %f2996; - mul.f32 %f633, %f632, %f2997; - st.local.f32 [%rd2+76], %f633; - fma.rn.f32 %f2998, %f2992, %f2991, 0f00000000; - sub.f32 %f634, %f2993, %f2998; - st.local.f32 [%rd2+84], %f634; - add.s64 %rd102, %rd2, 80; - add.s64 %rd101, %rd2, 8; - mov.f32 %f3398, 0f00000000; - mov.u32 %r326, -1; - -BB2_220: - ld.local.f32 %f2999, [%rd102]; - ld.local.f32 %f3000, [%rd101]; - fma.rn.f32 %f3398, %f3000, %f2999, %f3398; - add.s64 %rd102, %rd102, 4; - add.s64 %rd101, %rd101, 20; - add.s32 %r326, %r326, 1; - setp.lt.s32 %p359, %r326, 1; - @%p359 bra BB2_220; - - ld.local.f32 %f3002, [%rd2+88]; - sub.f32 %f637, %f3002, %f3398; - st.local.f32 [%rd2+88], %f637; - add.s64 %rd104, %rd2, 80; - add.s64 %rd103, %rd2, 12; - mov.f32 %f3399, 0f00000000; - mov.u32 %r327, -1; - -BB2_222: - ld.local.f32 %f3003, [%rd104]; - ld.local.f32 %f3004, [%rd103]; - fma.rn.f32 %f3399, %f3004, %f3003, %f3399; - add.s64 %rd104, %rd104, 4; - add.s64 %rd103, %rd103, 20; - add.s32 %r327, %r327, 1; - setp.lt.s32 %p360, %r327, 2; - @%p360 bra BB2_222; - - ld.local.f32 %f3006, [%rd2+92]; - sub.f32 %f640, %f3006, %f3399; - st.local.f32 [%rd2+92], %f640; - add.s64 %rd106, %rd2, 80; - add.s64 %rd105, %rd2, 16; - mov.f32 %f3400, 0f00000000; - mov.u32 %r328, -1; - -BB2_224: - ld.local.f32 %f3007, [%rd106]; - ld.local.f32 %f3008, [%rd105]; - fma.rn.f32 %f3400, %f3008, %f3007, %f3400; - add.s64 %rd106, %rd106, 4; - add.s64 %rd105, %rd105, 20; - add.s32 %r328, %r328, 1; - setp.lt.s32 %p361, %r328, 3; - @%p361 bra BB2_224; - - ld.param.u64 %rd93, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_13]; - ld.param.u64 %rd92, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_12]; - ld.param.u32 %r300, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_14]; - ld.param.u64 %rd91, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_11]; - mov.u32 %r299, %tid.x; - mov.u32 %r298, %ctaid.x; - mov.u32 %r297, %ntid.x; - mad.lo.s32 %r296, %r297, %r298, %r299; - ld.local.f32 %f3009, [%rd2+96]; - sub.f32 %f3010, %f3009, %f3400; - ld.local.f32 %f3011, [%rd2+80]; - st.local.f32 [%rd2+96], %f3010; - add.f32 %f3012, %f609, 0f00000000; - mov.f32 %f3013, 0f00000000; - sub.f32 %f3014, %f3013, %f3012; - add.f32 %f3015, %f610, 0f00000000; - fma.rn.f32 %f3016, %f615, %f3014, %f3015; - sub.f32 %f3017, %f3013, %f3016; - add.f32 %f3018, %f611, 0f00000000; - fma.rn.f32 %f3019, %f616, %f3014, %f3018; - fma.rn.f32 %f3020, %f623, %f3017, %f3019; - sub.f32 %f3021, %f3013, %f3020; - add.f32 %f3022, %f612, 0f00000000; - fma.rn.f32 %f3023, %f617, %f3014, %f3022; - fma.rn.f32 %f3024, %f624, %f3017, %f3023; - fma.rn.f32 %f3025, %f633, %f3021, %f3024; - sub.f32 %f3026, %f3013, %f3025; - div.rn.f32 %f3027, %f3026, %f3010; - fma.rn.f32 %f3028, %f640, %f3027, 0f00000000; - sub.f32 %f3029, %f3021, %f3028; - mul.f32 %f3030, %f632, %f3029; - fma.rn.f32 %f3031, %f628, %f3030, 0f00000000; - fma.rn.f32 %f3032, %f637, %f3027, %f3031; - sub.f32 %f3033, %f3017, %f3032; - mul.f32 %f3034, %f622, %f3033; - fma.rn.f32 %f3035, %f618, %f3034, 0f00000000; - fma.rn.f32 %f3036, %f625, %f3030, %f3035; - fma.rn.f32 %f3037, %f634, %f3027, %f3036; - sub.f32 %f3038, %f3014, %f3037; - mul.f32 %f3039, %f614, %f3038; - fma.rn.f32 %f3040, %f613, %f3039, 0f00000000; - fma.rn.f32 %f3041, %f621, %f3034, %f3040; - fma.rn.f32 %f3042, %f631, %f3030, %f3041; - fma.rn.f32 %f3043, %f3011, %f3027, %f3042; - mov.f32 %f3044, 0f3F800000; - sub.f32 %f3045, %f3044, %f3043; - mul.f32 %f3046, %f608, %f3045; - fma.rn.f32 %f3047, %f609, 0f00000000, 0f00000000; - sub.f32 %f3048, %f3044, %f3047; - fma.rn.f32 %f3049, %f610, 0f00000000, 0f00000000; - fma.rn.f32 %f3050, %f615, %f3048, %f3049; - sub.f32 %f3051, %f3013, %f3050; - fma.rn.f32 %f3052, %f611, 0f00000000, 0f00000000; - fma.rn.f32 %f3053, %f616, %f3048, %f3052; - fma.rn.f32 %f3054, %f623, %f3051, %f3053; - sub.f32 %f3055, %f3013, %f3054; - fma.rn.f32 %f3056, %f612, 0f00000000, 0f00000000; - fma.rn.f32 %f3057, %f617, %f3048, %f3056; - fma.rn.f32 %f3058, %f624, %f3051, %f3057; - fma.rn.f32 %f3059, %f633, %f3055, %f3058; - sub.f32 %f3060, %f3013, %f3059; - div.rn.f32 %f3061, %f3060, %f3010; - fma.rn.f32 %f3062, %f640, %f3061, 0f00000000; - sub.f32 %f3063, %f3055, %f3062; - mul.f32 %f3064, %f632, %f3063; - fma.rn.f32 %f3065, %f628, %f3064, 0f00000000; - fma.rn.f32 %f3066, %f637, %f3061, %f3065; - sub.f32 %f3067, %f3051, %f3066; - mul.f32 %f3068, %f622, %f3067; - fma.rn.f32 %f3069, %f618, %f3068, 0f00000000; - fma.rn.f32 %f3070, %f625, %f3064, %f3069; - fma.rn.f32 %f3071, %f634, %f3061, %f3070; - sub.f32 %f3072, %f3048, %f3071; - mul.f32 %f3073, %f614, %f3072; - sub.f32 %f3074, %f3013, %f3047; - fma.rn.f32 %f3075, %f615, %f3074, %f3049; - sub.f32 %f3076, %f3044, %f3075; - fma.rn.f32 %f3077, %f616, %f3074, %f3052; - fma.rn.f32 %f3078, %f623, %f3076, %f3077; - sub.f32 %f3079, %f3013, %f3078; - fma.rn.f32 %f3080, %f617, %f3074, %f3056; - fma.rn.f32 %f3081, %f624, %f3076, %f3080; - fma.rn.f32 %f3082, %f633, %f3079, %f3081; - sub.f32 %f3083, %f3013, %f3082; - div.rn.f32 %f3084, %f3083, %f3010; - fma.rn.f32 %f3085, %f640, %f3084, 0f00000000; - sub.f32 %f3086, %f3079, %f3085; - mul.f32 %f3087, %f632, %f3086; - fma.rn.f32 %f3088, %f628, %f3087, 0f00000000; - fma.rn.f32 %f3089, %f637, %f3084, %f3088; - sub.f32 %f3090, %f3076, %f3089; - mul.f32 %f3091, %f622, %f3090; - sub.f32 %f3092, %f3013, %f3075; - fma.rn.f32 %f3093, %f623, %f3092, %f3077; - sub.f32 %f3094, %f3044, %f3093; - fma.rn.f32 %f3095, %f624, %f3092, %f3080; - fma.rn.f32 %f3096, %f633, %f3094, %f3095; - sub.f32 %f3097, %f3013, %f3096; - div.rn.f32 %f3098, %f3097, %f3010; - fma.rn.f32 %f3099, %f640, %f3098, 0f00000000; - sub.f32 %f3100, %f3094, %f3099; - mul.f32 %f3101, %f632, %f3100; - sub.f32 %f3102, %f3013, %f3093; - fma.rn.f32 %f3103, %f633, %f3102, %f3095; - sub.f32 %f3104, %f3044, %f3103; - div.rn.f32 %f3105, %f3104, %f3010; - cvta.to.global.u64 %rd75, %rd91; - mul.wide.s32 %rd76, %r296, 4; - add.s64 %rd77, %rd75, %rd76; - st.global.f32 [%rd77], %f3349; - shl.b32 %r292, %r300, 2; - cvt.s64.s32 %rd78, %r292; - add.s64 %rd79, %rd77, %rd78; - st.global.f32 [%rd79], %f3348; - add.s64 %rd80, %rd79, %rd78; - st.global.f32 [%rd80], %f3347; - add.s64 %rd81, %rd80, %rd78; - st.global.f32 [%rd81], %f3258; - add.s64 %rd82, %rd81, %rd78; - st.global.f32 [%rd82], %f3345; - cvta.to.global.u64 %rd83, %rd92; - add.s64 %rd84, %rd83, %rd76; - st.global.f32 [%rd84], %f3046; - add.s64 %rd85, %rd84, %rd78; - st.global.f32 [%rd85], %f3073; - add.s64 %rd86, %rd85, %rd78; - st.global.f32 [%rd86], %f3091; - add.s64 %rd87, %rd86, %rd78; - st.global.f32 [%rd87], %f3101; - add.s64 %rd88, %rd87, %rd78; - st.global.f32 [%rd88], %f3105; - cvta.to.global.u64 %rd89, %rd93; - add.s64 %rd90, %rd89, %rd76; - st.global.f32 [%rd90], %f3393; - -BB2_226: + mov.u32 %r876, 0; + mov.b64 %fd1157, {%r876, %r97}; + +$L__BB2_504: + cvt.rn.f32.f64 %f3167, %fd827; + mul.f32 %f3166, %f241, %f3167; + cvt.rn.f32.f64 %f3165, %fd806; + mul.f32 %f3164, %f240, %f3165; + selp.f64 %fd866, 0d3FF0000000000000, %fd1157, %p618; + selp.f64 %fd867, 0d3FF0000000000000, %fd1154, %p631; + mul.f64 %fd868, %fd867, %fd40; + div.rn.f64 %fd869, %fd868, %fd866; + add.f64 %fd870, %fd869, %fd53; + cvt.rn.f32.f64 %f1735, %fd870; + mul.f32 %f1736, %f241, %f1735; + cvt.f64.f32 %fd871, %f1736; + add.f64 %fd872, %fd292, %fd871; + cvt.rn.f32.f64 %f1737, %fd872; + mul.f32 %f1738, %f3164, %f3164; + cvt.rn.f32.f64 %f1739, %fd280; + mul.f32 %f1740, %f211, %f1739; + fma.rn.f32 %f1741, %f1738, %f212, %f1740; + mul.f32 %f1742, %f3166, %f3166; + cvt.rn.f32.f64 %f1743, %fd201; + fma.rn.f32 %f1744, %f1742, %f1743, %f1741; + fma.rn.f32 %f271, %f239, %f1737, %f1744; + mul.f32 %f1745, %f111, %f3276; + fma.rn.f32 %f272, %f125, %f1745, %f3275; + mad.lo.s32 %r878, %r1371, %r182, %r1370; + add.s32 %r879, %r878, %r2; + mul.wide.s32 %rd22, %r879, 4; + add.s64 %rd23, %rd1, %rd22; + ld.global.f32 %f273, [%rd23]; + mul.f32 %f274, %f111, %f125; + setp.leu.f32 %p782, %f272, 0f3C23D70A; + mov.f32 %f3262, %f3263; + @%p782 bra $L__BB2_506; + + div.rn.f32 %f1746, %f273, %f272; + add.f32 %f3262, %f1746, 0fBF800000; + +$L__BB2_506: + @%p782 bra $L__BB2_521; + + cvt.f64.f32 %fd309, %f272; + { + .reg .b32 %temp; + mov.b64 {%temp, %r110}, %fd309; + } + abs.f64 %fd310, %fd309; + { // callseq 48, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd310; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1159, [retval0+0]; + } // callseq 48 + setp.lt.s32 %p785, %r110, 0; + and.pred %p48, %p785, %p144; + not.pred %p786, %p48; + @%p786 bra $L__BB2_509; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r880}, %fd1159; + } + xor.b32 %r881, %r880, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r882, %temp}, %fd1159; + } + mov.b64 %fd1159, {%r882, %r881}; + +$L__BB2_509: + setp.eq.f32 %p787, %f272, 0f00000000; + @%p787 bra $L__BB2_513; + bra.uni $L__BB2_510; + +$L__BB2_513: + mov.u32 %r883, 0; + selp.b32 %r884, %r110, 0, %p144; + or.b32 %r885, %r884, 2146435072; + selp.b32 %r886, %r885, %r884, %p146; + mov.b64 %fd1159, {%r883, %r886}; + bra.uni $L__BB2_514; + +$L__BB2_510: + setp.gt.s32 %p788, %r110, -1; + @%p788 bra $L__BB2_514; + + cvt.rzi.f64.f64 %fd875, %fd644; + setp.eq.f64 %p789, %fd875, 0d4000000000000000; + @%p789 bra $L__BB2_514; + + mov.f64 %fd1159, 0dFFF8000000000000; + +$L__BB2_514: + add.f64 %fd316, %fd309, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r887}, %fd316; + } + and.b32 %r888, %r887, 2146435072; + setp.ne.s32 %p792, %r888, 2146435072; + mov.f64 %fd1160, %fd1159; + @%p792 bra $L__BB2_520; + + setp.gtu.f64 %p793, %fd310, 0d7FF0000000000000; + mov.f64 %fd1160, %fd316; + @%p793 bra $L__BB2_520; + + setp.eq.s32 %p794, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r889, %temp}, %fd644; + } + setp.eq.s32 %p795, %r889, 0; + and.pred %p796, %p794, %p795; + @%p796 bra $L__BB2_519; + bra.uni $L__BB2_517; + +$L__BB2_519: + mov.u32 %r894, 0; + setp.gt.f64 %p803, %fd310, 0d3FF0000000000000; + selp.b32 %r895, 2146435072, 0, %p803; + xor.b32 %r896, %r895, 2146435072; + selp.b32 %r897, %r896, %r895, %p146; + setp.eq.f32 %p804, %f272, 0fBF800000; + selp.b32 %r898, 1072693248, %r897, %p804; + mov.b64 %fd1160, {%r894, %r898}; + bra.uni $L__BB2_520; + +$L__BB2_517: + { + .reg .b32 %temp; + mov.b64 {%r890, %temp}, %fd309; + } + and.b32 %r891, %r110, 2147483647; + setp.ne.s32 %p797, %r891, 2146435072; + setp.ne.s32 %p798, %r890, 0; + or.pred %p799, %p797, %p798; + mov.f64 %fd1160, %fd1159; + @%p799 bra $L__BB2_520; + + and.pred %p801, %p155, %p48; + selp.b32 %r892, %r57, %r56, %p801; + mov.u32 %r893, 0; + mov.b64 %fd1160, {%r893, %r892}; + +$L__BB2_520: + setp.eq.f32 %p805, %f272, 0f3F800000; + selp.f64 %fd878, 0d3FF0000000000000, %fd1160, %p805; + cvt.f64.f32 %fd879, %f273; + div.rn.f64 %fd880, %fd879, %fd878; + cvt.rn.f32.f64 %f3263, %fd880; + +$L__BB2_521: + mov.f32 %f1748, 0f47C35000; + min.f32 %f1749, %f3263, %f1748; + cvt.f64.f32 %fd320, %f1749; + min.f32 %f279, %f3262, %f1748; + fma.rn.f32 %f3222, %f279, %f153, %f3222; + mul.f32 %f1750, %f279, %f154; + cvt.f64.f32 %fd321, %f1750; + cvt.f64.f32 %fd322, %f153; + { + .reg .b32 %temp; + mov.b64 {%temp, %r111}, %fd322; + } + abs.f64 %fd323, %fd322; + { // callseq 49, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd323; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1161, [retval0+0]; + } // callseq 49 + @%p144 bra $L__BB2_567; + bra.uni $L__BB2_522; + +$L__BB2_567: + setp.gt.s32 %p867, %r111, -1; + @%p867 bra $L__BB2_569; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r947}, %fd1161; + } + xor.b32 %r948, %r947, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r949, %temp}, %fd1161; + } + mov.b64 %fd1161, {%r949, %r948}; + +$L__BB2_569: + setp.eq.f32 %p868, %f153, 0f00000000; + @%p868 bra $L__BB2_573; + bra.uni $L__BB2_570; + +$L__BB2_573: + mov.u32 %r950, 0; + or.b32 %r951, %r111, 2146435072; + selp.b32 %r952, %r951, %r111, %p146; + mov.b64 %fd1161, {%r950, %r952}; + bra.uni $L__BB2_574; + +$L__BB2_522: + setp.eq.f32 %p807, %f153, 0f00000000; + @%p807 bra $L__BB2_526; + bra.uni $L__BB2_523; + +$L__BB2_526: + mov.u32 %r899, 0; + mov.b64 %fd1161, {%r899, %r102}; + bra.uni $L__BB2_527; + +$L__BB2_570: + @%p867 bra $L__BB2_574; + + cvt.rzi.f64.f64 %fd923, %fd644; + setp.eq.f64 %p870, %fd923, 0d4000000000000000; + @%p870 bra $L__BB2_574; + + mov.f64 %fd1161, 0dFFF8000000000000; + +$L__BB2_574: + add.f64 %fd367, %fd322, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r953}, %fd367; + } + and.b32 %r954, %r953, 2146435072; + setp.ne.s32 %p872, %r954, 2146435072; + mov.f64 %fd1171, %fd1161; + @%p872 bra $L__BB2_580; + + setp.gtu.f64 %p873, %fd323, 0d7FF0000000000000; + mov.f64 %fd1171, %fd367; + @%p873 bra $L__BB2_580; + + setp.eq.s32 %p874, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r955, %temp}, %fd644; + } + setp.eq.s32 %p875, %r955, 0; + and.pred %p876, %p874, %p875; + @%p876 bra $L__BB2_579; + bra.uni $L__BB2_577; + +$L__BB2_579: + mov.u32 %r960, 0; + setp.gt.f64 %p884, %fd323, 0d3FF0000000000000; + selp.b32 %r961, 2146435072, 0, %p884; + xor.b32 %r962, %r961, 2146435072; + selp.b32 %r963, %r962, %r961, %p146; + setp.eq.f32 %p885, %f153, 0fBF800000; + selp.b32 %r964, 1072693248, %r963, %p885; + mov.b64 %fd1171, {%r960, %r964}; + bra.uni $L__BB2_580; + +$L__BB2_523: + setp.gt.s32 %p808, %r111, -1; + @%p808 bra $L__BB2_527; + + cvt.rzi.f64.f64 %fd883, %fd644; + setp.eq.f64 %p809, %fd883, 0d4000000000000000; + @%p809 bra $L__BB2_527; + + mov.f64 %fd1161, 0dFFF8000000000000; + +$L__BB2_527: + add.f64 %fd327, %fd322, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r900}, %fd327; + } + and.b32 %r901, %r900, 2146435072; + setp.ne.s32 %p810, %r901, 2146435072; + mov.f64 %fd1162, %fd1161; + @%p810 bra $L__BB2_533; + + setp.gtu.f64 %p811, %fd323, 0d7FF0000000000000; + mov.f64 %fd1162, %fd327; + @%p811 bra $L__BB2_533; + + setp.eq.s32 %p812, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r902, %temp}, %fd644; + } + setp.eq.s32 %p813, %r902, 0; + and.pred %p814, %p812, %p813; + @%p814 bra $L__BB2_532; + bra.uni $L__BB2_530; + +$L__BB2_532: + mov.u32 %r906, 0; + setp.gt.f64 %p819, %fd323, 0d3FF0000000000000; + selp.b32 %r907, 2146435072, 0, %p819; + xor.b32 %r908, %r907, 2146435072; + selp.b32 %r909, %r908, %r907, %p146; + setp.eq.f32 %p820, %f153, 0fBF800000; + selp.b32 %r910, 1072693248, %r909, %p820; + mov.b64 %fd1162, {%r906, %r910}; + bra.uni $L__BB2_533; + +$L__BB2_577: + { + .reg .b32 %temp; + mov.b64 {%r956, %temp}, %fd322; + } + and.b32 %r957, %r111, 2147483647; + setp.ne.s32 %p877, %r957, 2146435072; + setp.ne.s32 %p878, %r956, 0; + or.pred %p879, %p877, %p878; + mov.f64 %fd1171, %fd1161; + @%p879 bra $L__BB2_580; + + setp.lt.s32 %p880, %r111, 0; + mov.u32 %r958, 0; + and.pred %p882, %p155, %p880; + selp.b32 %r959, %r57, %r56, %p882; + mov.b64 %fd1171, {%r958, %r959}; + +$L__BB2_580: + setp.eq.f32 %p886, %f153, 0f3F800000; + selp.f64 %fd926, 0d3FF0000000000000, %fd1171, %p886; + mul.f64 %fd927, %fd926, %fd320; + sub.f64 %fd928, %fd321, %fd927; + cvt.f64.f32 %fd929, %f3227; + add.f64 %fd1185, %fd928, %fd929; + cvt.f64.f32 %fd372, %f183; + { + .reg .b32 %temp; + mov.b64 {%temp, %r115}, %fd372; + } + abs.f64 %fd373, %fd372; + { // callseq 53, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd373; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1173, [retval0+0]; + } // callseq 53 + setp.gt.s32 %p887, %r115, -1; + @%p887 bra $L__BB2_582; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r965}, %fd1173; + } + xor.b32 %r966, %r965, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r967, %temp}, %fd1173; + } + mov.b64 %fd1173, {%r967, %r966}; + +$L__BB2_582: + setp.eq.f32 %p888, %f183, 0f00000000; + @%p888 bra $L__BB2_586; + bra.uni $L__BB2_583; + +$L__BB2_586: + mov.u32 %r968, 0; + or.b32 %r969, %r115, 2146435072; + selp.b32 %r970, %r969, %r115, %p146; + mov.b64 %fd1173, {%r968, %r970}; + bra.uni $L__BB2_587; + +$L__BB2_583: + @%p887 bra $L__BB2_587; + + cvt.rzi.f64.f64 %fd932, %fd644; + setp.eq.f64 %p890, %fd932, 0d4000000000000000; + @%p890 bra $L__BB2_587; + + mov.f64 %fd1173, 0dFFF8000000000000; + +$L__BB2_587: + add.f64 %fd379, %fd372, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r971}, %fd379; + } + and.b32 %r972, %r971, 2146435072; + setp.ne.s32 %p892, %r972, 2146435072; + mov.f64 %fd1174, %fd1173; + @%p892 bra $L__BB2_593; + + setp.gtu.f64 %p893, %fd373, 0d7FF0000000000000; + mov.f64 %fd1174, %fd379; + @%p893 bra $L__BB2_593; + + setp.eq.s32 %p894, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r973, %temp}, %fd644; + } + setp.eq.s32 %p895, %r973, 0; + and.pred %p896, %p894, %p895; + @%p896 bra $L__BB2_592; + bra.uni $L__BB2_590; + +$L__BB2_592: + mov.u32 %r978, 0; + setp.gt.f64 %p904, %fd373, 0d3FF0000000000000; + selp.b32 %r979, 2146435072, 0, %p904; + xor.b32 %r980, %r979, 2146435072; + selp.b32 %r981, %r980, %r979, %p146; + setp.eq.f32 %p905, %f183, 0fBF800000; + selp.b32 %r982, 1072693248, %r981, %p905; + mov.b64 %fd1174, {%r978, %r982}; + bra.uni $L__BB2_593; + +$L__BB2_530: + { + .reg .b32 %temp; + mov.b64 {%r903, %temp}, %fd322; + } + and.b32 %r904, %r111, 2147483647; + setp.ne.s32 %p815, %r904, 2146435072; + setp.ne.s32 %p816, %r903, 0; + or.pred %p817, %p815, %p816; + mov.f64 %fd1162, %fd1161; + @%p817 bra $L__BB2_533; + + mov.u32 %r905, 0; + mov.b64 %fd1162, {%r905, %r56}; + +$L__BB2_533: + setp.eq.f32 %p821, %f153, 0f3F800000; + selp.f64 %fd886, 0d3FF0000000000000, %fd1162, %p821; + mul.f64 %fd887, %fd886, %fd320; + sub.f64 %fd888, %fd321, %fd887; + cvt.f64.f32 %fd889, %f3227; + add.f64 %fd1185, %fd888, %fd889; + cvt.f64.f32 %fd332, %f183; + { + .reg .b32 %temp; + mov.b64 {%temp, %r112}, %fd332; + } + abs.f64 %fd333, %fd332; + setp.eq.f32 %p822, %f183, 0f00000000; + @%p822 bra $L__BB2_537; + bra.uni $L__BB2_534; + +$L__BB2_537: + mov.u32 %r911, 0; + mov.b64 %fd1163, {%r911, %r102}; + bra.uni $L__BB2_538; + +$L__BB2_534: + { // callseq 50, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd333; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1163, [retval0+0]; + } // callseq 50 + setp.gt.s32 %p823, %r112, -1; + @%p823 bra $L__BB2_538; + + cvt.rzi.f64.f64 %fd892, %fd644; + setp.eq.f64 %p824, %fd892, 0d4000000000000000; + @%p824 bra $L__BB2_538; + + mov.f64 %fd1163, 0dFFF8000000000000; + +$L__BB2_538: + add.f64 %fd337, %fd332, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r912}, %fd337; + } + and.b32 %r913, %r912, 2146435072; + setp.ne.s32 %p825, %r913, 2146435072; + mov.f64 %fd1164, %fd1163; + @%p825 bra $L__BB2_544; + + setp.gtu.f64 %p826, %fd333, 0d7FF0000000000000; + mov.f64 %fd1164, %fd337; + @%p826 bra $L__BB2_544; + + setp.eq.s32 %p827, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r914, %temp}, %fd644; + } + setp.eq.s32 %p828, %r914, 0; + and.pred %p829, %p827, %p828; + @%p829 bra $L__BB2_543; + bra.uni $L__BB2_541; + +$L__BB2_543: + mov.u32 %r918, 0; + setp.gt.f64 %p834, %fd333, 0d3FF0000000000000; + selp.b32 %r919, 2146435072, 0, %p834; + xor.b32 %r920, %r919, 2146435072; + selp.b32 %r921, %r920, %r919, %p146; + setp.eq.f32 %p835, %f183, 0fBF800000; + selp.b32 %r922, 1072693248, %r921, %p835; + mov.b64 %fd1164, {%r918, %r922}; + bra.uni $L__BB2_544; + +$L__BB2_590: + { + .reg .b32 %temp; + mov.b64 {%r974, %temp}, %fd372; + } + and.b32 %r975, %r115, 2147483647; + setp.ne.s32 %p897, %r975, 2146435072; + setp.ne.s32 %p898, %r974, 0; + or.pred %p899, %p897, %p898; + mov.f64 %fd1174, %fd1173; + @%p899 bra $L__BB2_593; + + setp.lt.s32 %p900, %r115, 0; + mov.u32 %r976, 0; + and.pred %p902, %p155, %p900; + selp.b32 %r977, %r57, %r56, %p902; + mov.b64 %fd1174, {%r976, %r977}; + +$L__BB2_593: + setp.eq.f32 %p906, %f183, 0f3F800000; + selp.f64 %fd935, 0d3FF0000000000000, %fd1174, %p906; + mul.f64 %fd936, %fd935, %fd320; + mul.f32 %f1754, %f279, %f184; + cvt.f64.f32 %fd937, %f1754; + sub.f64 %fd938, %fd937, %fd936; + cvt.f64.f32 %fd939, %f3226; + add.f64 %fd1184, %fd938, %fd939; + cvt.f64.f32 %fd384, %f274; + { + .reg .b32 %temp; + mov.b64 {%temp, %r116}, %fd384; + } + abs.f64 %fd385, %fd384; + { // callseq 54, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd385; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1176, [retval0+0]; + } // callseq 54 + setp.gt.s32 %p907, %r116, -1; + @%p907 bra $L__BB2_595; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r983}, %fd1176; + } + xor.b32 %r984, %r983, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r985, %temp}, %fd1176; + } + mov.b64 %fd1176, {%r985, %r984}; + +$L__BB2_595: + setp.eq.f32 %p908, %f274, 0f00000000; + @%p908 bra $L__BB2_599; + bra.uni $L__BB2_596; + +$L__BB2_599: + mov.u32 %r986, 0; + or.b32 %r987, %r116, 2146435072; + selp.b32 %r988, %r987, %r116, %p146; + mov.b64 %fd1176, {%r986, %r988}; + bra.uni $L__BB2_600; + +$L__BB2_596: + @%p907 bra $L__BB2_600; + + cvt.rzi.f64.f64 %fd942, %fd644; + setp.eq.f64 %p910, %fd942, 0d4000000000000000; + @%p910 bra $L__BB2_600; + + mov.f64 %fd1176, 0dFFF8000000000000; + +$L__BB2_600: + add.f64 %fd391, %fd384, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r989}, %fd391; + } + and.b32 %r990, %r989, 2146435072; + setp.ne.s32 %p912, %r990, 2146435072; + mov.f64 %fd1177, %fd1176; + @%p912 bra $L__BB2_606; + + setp.gtu.f64 %p913, %fd385, 0d7FF0000000000000; + mov.f64 %fd1177, %fd391; + @%p913 bra $L__BB2_606; + + setp.eq.s32 %p914, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r991, %temp}, %fd644; + } + setp.eq.s32 %p915, %r991, 0; + and.pred %p916, %p914, %p915; + @%p916 bra $L__BB2_605; + bra.uni $L__BB2_603; + +$L__BB2_605: + mov.u32 %r996, 0; + setp.gt.f64 %p924, %fd385, 0d3FF0000000000000; + selp.b32 %r997, 2146435072, 0, %p924; + xor.b32 %r998, %r997, 2146435072; + selp.b32 %r999, %r998, %r997, %p146; + setp.eq.f32 %p925, %f274, 0fBF800000; + selp.b32 %r1000, 1072693248, %r999, %p925; + mov.b64 %fd1177, {%r996, %r1000}; + bra.uni $L__BB2_606; + +$L__BB2_541: + { + .reg .b32 %temp; + mov.b64 {%r915, %temp}, %fd332; + } + and.b32 %r916, %r112, 2147483647; + setp.ne.s32 %p830, %r916, 2146435072; + setp.ne.s32 %p831, %r915, 0; + or.pred %p832, %p830, %p831; + mov.f64 %fd1164, %fd1163; + @%p832 bra $L__BB2_544; + + mov.u32 %r917, 0; + mov.b64 %fd1164, {%r917, %r56}; + +$L__BB2_544: + setp.eq.f32 %p836, %f183, 0f3F800000; + selp.f64 %fd895, 0d3FF0000000000000, %fd1164, %p836; + mul.f64 %fd896, %fd895, %fd320; + mul.f32 %f1751, %f279, %f184; + cvt.f64.f32 %fd897, %f1751; + sub.f64 %fd898, %fd897, %fd896; + cvt.f64.f32 %fd899, %f3226; + add.f64 %fd1184, %fd898, %fd899; + cvt.f64.f32 %fd342, %f274; + { + .reg .b32 %temp; + mov.b64 {%temp, %r113}, %fd342; + } + abs.f64 %fd343, %fd342; + setp.eq.f32 %p837, %f274, 0f00000000; + @%p837 bra $L__BB2_548; + bra.uni $L__BB2_545; + +$L__BB2_548: + mov.u32 %r923, 0; + mov.b64 %fd1165, {%r923, %r102}; + bra.uni $L__BB2_549; + +$L__BB2_545: + { // callseq 51, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd343; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1165, [retval0+0]; + } // callseq 51 + setp.gt.s32 %p838, %r113, -1; + @%p838 bra $L__BB2_549; + + cvt.rzi.f64.f64 %fd902, %fd644; + setp.eq.f64 %p839, %fd902, 0d4000000000000000; + @%p839 bra $L__BB2_549; + + mov.f64 %fd1165, 0dFFF8000000000000; + +$L__BB2_549: + add.f64 %fd347, %fd342, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r924}, %fd347; + } + and.b32 %r925, %r924, 2146435072; + setp.ne.s32 %p840, %r925, 2146435072; + mov.f64 %fd1166, %fd1165; + @%p840 bra $L__BB2_555; + + setp.gtu.f64 %p841, %fd343, 0d7FF0000000000000; + mov.f64 %fd1166, %fd347; + @%p841 bra $L__BB2_555; + + setp.eq.s32 %p842, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r926, %temp}, %fd644; + } + setp.eq.s32 %p843, %r926, 0; + and.pred %p844, %p842, %p843; + @%p844 bra $L__BB2_554; + bra.uni $L__BB2_552; + +$L__BB2_554: + mov.u32 %r930, 0; + setp.gt.f64 %p849, %fd343, 0d3FF0000000000000; + selp.b32 %r931, 2146435072, 0, %p849; + xor.b32 %r932, %r931, 2146435072; + selp.b32 %r933, %r932, %r931, %p146; + setp.eq.f32 %p850, %f274, 0fBF800000; + selp.b32 %r934, 1072693248, %r933, %p850; + mov.b64 %fd1166, {%r930, %r934}; + bra.uni $L__BB2_555; + +$L__BB2_603: + { + .reg .b32 %temp; + mov.b64 {%r992, %temp}, %fd384; + } + and.b32 %r993, %r116, 2147483647; + setp.ne.s32 %p917, %r993, 2146435072; + setp.ne.s32 %p918, %r992, 0; + or.pred %p919, %p917, %p918; + mov.f64 %fd1177, %fd1176; + @%p919 bra $L__BB2_606; + + setp.lt.s32 %p920, %r116, 0; + mov.u32 %r994, 0; + and.pred %p922, %p155, %p920; + selp.b32 %r995, %r57, %r56, %p922; + mov.b64 %fd1177, {%r994, %r995}; + +$L__BB2_606: + mul.f32 %f1755, %f279, 0f00000000; + cvt.f64.f32 %fd945, %f1755; + setp.eq.f32 %p926, %f274, 0f3F800000; + selp.f64 %fd946, 0d3FF0000000000000, %fd1177, %p926; + mul.f64 %fd947, %fd946, %fd320; + sub.f64 %fd948, %fd945, %fd947; + cvt.f64.f32 %fd949, %f3225; + add.f64 %fd1183, %fd948, %fd949; + cvt.f64.f32 %fd950, %f3224; + sub.f64 %fd951, %fd945, %fd320; + add.f64 %fd1182, %fd951, %fd950; + cvt.f64.f32 %fd397, %f246; + { + .reg .b32 %temp; + mov.b64 {%temp, %r117}, %fd397; + } + abs.f64 %fd398, %fd397; + { // callseq 55, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd398; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1179, [retval0+0]; + } // callseq 55 + setp.gt.s32 %p927, %r117, -1; + @%p927 bra $L__BB2_608; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1001}, %fd1179; + } + xor.b32 %r1002, %r1001, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1003, %temp}, %fd1179; + } + mov.b64 %fd1179, {%r1003, %r1002}; + +$L__BB2_608: + setp.eq.f32 %p928, %f246, 0f00000000; + @%p928 bra $L__BB2_612; + bra.uni $L__BB2_609; + +$L__BB2_612: + mov.u32 %r1004, 0; + or.b32 %r1005, %r117, 2146435072; + selp.b32 %r1006, %r1005, %r117, %p146; + mov.b64 %fd1179, {%r1004, %r1006}; + bra.uni $L__BB2_613; + +$L__BB2_609: + @%p927 bra $L__BB2_613; + + cvt.rzi.f64.f64 %fd954, %fd644; + setp.eq.f64 %p930, %fd954, 0d4000000000000000; + @%p930 bra $L__BB2_613; + + mov.f64 %fd1179, 0dFFF8000000000000; + +$L__BB2_613: + add.f64 %fd404, %fd397, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1007}, %fd404; + } + and.b32 %r1008, %r1007, 2146435072; + setp.ne.s32 %p932, %r1008, 2146435072; + mov.f64 %fd1180, %fd1179; + @%p932 bra $L__BB2_619; + + setp.gtu.f64 %p933, %fd398, 0d7FF0000000000000; + mov.f64 %fd1180, %fd404; + @%p933 bra $L__BB2_619; + + setp.eq.s32 %p934, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1009, %temp}, %fd644; + } + setp.eq.s32 %p935, %r1009, 0; + and.pred %p936, %p934, %p935; + @%p936 bra $L__BB2_618; + bra.uni $L__BB2_616; + +$L__BB2_618: + mov.u32 %r1014, 0; + setp.gt.f64 %p944, %fd398, 0d3FF0000000000000; + selp.b32 %r1015, 2146435072, 0, %p944; + xor.b32 %r1016, %r1015, 2146435072; + selp.b32 %r1017, %r1016, %r1015, %p146; + setp.eq.f32 %p945, %f246, 0fBF800000; + selp.b32 %r1018, 1072693248, %r1017, %p945; + mov.b64 %fd1180, {%r1014, %r1018}; + bra.uni $L__BB2_619; + +$L__BB2_552: + { + .reg .b32 %temp; + mov.b64 {%r927, %temp}, %fd342; + } + and.b32 %r928, %r113, 2147483647; + setp.ne.s32 %p845, %r928, 2146435072; + setp.ne.s32 %p846, %r927, 0; + or.pred %p847, %p845, %p846; + mov.f64 %fd1166, %fd1165; + @%p847 bra $L__BB2_555; + + mov.u32 %r929, 0; + mov.b64 %fd1166, {%r929, %r56}; + +$L__BB2_555: + mul.f32 %f1752, %f279, 0f00000000; + cvt.f64.f32 %fd905, %f1752; + setp.eq.f32 %p851, %f274, 0f3F800000; + selp.f64 %fd906, 0d3FF0000000000000, %fd1166, %p851; + mul.f64 %fd907, %fd906, %fd320; + sub.f64 %fd908, %fd905, %fd907; + cvt.f64.f32 %fd909, %f3225; + add.f64 %fd1183, %fd908, %fd909; + cvt.f64.f32 %fd910, %f3224; + sub.f64 %fd911, %fd905, %fd320; + add.f64 %fd1182, %fd911, %fd910; + cvt.f64.f32 %fd353, %f246; + { + .reg .b32 %temp; + mov.b64 {%temp, %r114}, %fd353; + } + abs.f64 %fd354, %fd353; + setp.eq.f32 %p852, %f246, 0f00000000; + @%p852 bra $L__BB2_559; + bra.uni $L__BB2_556; + +$L__BB2_559: + mov.u32 %r935, 0; + mov.b64 %fd1167, {%r935, %r102}; + bra.uni $L__BB2_560; + +$L__BB2_556: + { // callseq 52, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd354; + .param .b64 param1; + st.param.f64 [param1+0], %fd644; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1167, [retval0+0]; + } // callseq 52 + setp.gt.s32 %p853, %r114, -1; + @%p853 bra $L__BB2_560; + + cvt.rzi.f64.f64 %fd914, %fd644; + setp.eq.f64 %p854, %fd914, 0d4000000000000000; + @%p854 bra $L__BB2_560; + + mov.f64 %fd1167, 0dFFF8000000000000; + +$L__BB2_560: + add.f64 %fd358, %fd353, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r936}, %fd358; + } + and.b32 %r937, %r936, 2146435072; + setp.ne.s32 %p855, %r937, 2146435072; + mov.f64 %fd1168, %fd1167; + @%p855 bra $L__BB2_566; + + setp.gtu.f64 %p856, %fd354, 0d7FF0000000000000; + mov.f64 %fd1168, %fd358; + @%p856 bra $L__BB2_566; + + setp.eq.s32 %p857, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r938, %temp}, %fd644; + } + setp.eq.s32 %p858, %r938, 0; + and.pred %p859, %p857, %p858; + @%p859 bra $L__BB2_565; + bra.uni $L__BB2_563; + +$L__BB2_565: + mov.u32 %r942, 0; + setp.gt.f64 %p864, %fd354, 0d3FF0000000000000; + selp.b32 %r943, 2146435072, 0, %p864; + xor.b32 %r944, %r943, 2146435072; + selp.b32 %r945, %r944, %r943, %p146; + setp.eq.f32 %p865, %f246, 0fBF800000; + selp.b32 %r946, 1072693248, %r945, %p865; + mov.b64 %fd1168, {%r942, %r946}; + bra.uni $L__BB2_566; + +$L__BB2_616: + { + .reg .b32 %temp; + mov.b64 {%r1010, %temp}, %fd397; + } + and.b32 %r1011, %r117, 2147483647; + setp.ne.s32 %p937, %r1011, 2146435072; + setp.ne.s32 %p938, %r1010, 0; + or.pred %p939, %p937, %p938; + mov.f64 %fd1180, %fd1179; + @%p939 bra $L__BB2_619; + + setp.lt.s32 %p940, %r117, 0; + mov.u32 %r1012, 0; + and.pred %p942, %p155, %p940; + selp.b32 %r1013, %r57, %r56, %p942; + mov.b64 %fd1180, {%r1012, %r1013}; + +$L__BB2_619: + setp.eq.f32 %p946, %f246, 0f3F800000; + selp.f64 %fd957, 0d3FF0000000000000, %fd1180, %p946; + mul.f64 %fd958, %fd957, %fd320; + mul.f32 %f1756, %f279, %f271; + cvt.f64.f32 %fd959, %f1756; + sub.f64 %fd960, %fd959, %fd958; + cvt.f64.f32 %fd961, %f3223; + add.f64 %fd1181, %fd960, %fd961; + bra.uni $L__BB2_620; + +$L__BB2_563: + { + .reg .b32 %temp; + mov.b64 {%r939, %temp}, %fd353; + } + and.b32 %r940, %r114, 2147483647; + setp.ne.s32 %p860, %r940, 2146435072; + setp.ne.s32 %p861, %r939, 0; + or.pred %p862, %p860, %p861; + mov.f64 %fd1168, %fd1167; + @%p862 bra $L__BB2_566; + + mov.u32 %r941, 0; + mov.b64 %fd1168, {%r941, %r56}; + +$L__BB2_566: + setp.eq.f32 %p866, %f246, 0f3F800000; + selp.f64 %fd917, 0d3FF0000000000000, %fd1168, %p866; + mul.f64 %fd918, %fd917, %fd320; + mul.f32 %f1753, %f279, %f271; + cvt.f64.f32 %fd919, %f1753; + sub.f64 %fd920, %fd919, %fd918; + cvt.f64.f32 %fd921, %f3223; + add.f64 %fd1181, %fd920, %fd921; + +$L__BB2_620: + cvt.rn.f32.f64 %f3227, %fd1185; + cvt.rn.f32.f64 %f3226, %fd1184; + cvt.rn.f32.f64 %f3225, %fd1183; + cvt.rn.f32.f64 %f3224, %fd1182; + cvt.rn.f32.f64 %f3223, %fd1181; + fma.rn.f32 %f3221, %f279, %f183, %f3221; + fma.rn.f32 %f3220, %f279, %f274, %f3220; + add.f32 %f3219, %f3219, %f279; + fma.rn.f32 %f3218, %f279, %f246, %f3218; + add.s32 %r1371, %r1371, 1; + setp.lt.s32 %p947, %r1371, %r182; + @%p947 bra $L__BB2_56; + + add.s32 %r1370, %r1370, 1; + setp.lt.s32 %p948, %r1370, %r182; + @%p948 bra $L__BB2_55; + +$L__BB2_622: + ld.param.u32 %r1355, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_10]; + div.rn.f32 %f1757, %f3222, %f3227; + mov.f32 %f1758, 0fBF800000; + max.f32 %f1759, %f1757, %f1758; + mov.f32 %f1760, 0f3F800000; + min.f32 %f1761, %f1759, %f1760; + sub.f32 %f3278, %f3278, %f1761; + div.rn.f32 %f1762, %f3221, %f3226; + max.f32 %f1763, %f1762, %f1758; + min.f32 %f1764, %f1763, %f1760; + sub.f32 %f3277, %f3277, %f1764; + neg.f32 %f1765, %f3276; + div.rn.f32 %f1766, %f3220, %f3225; + max.f32 %f1767, %f1766, %f1765; + min.f32 %f1768, %f1767, %f3276; + sub.f32 %f1769, %f3276, %f1768; + neg.f32 %f1770, %f3275; + div.rn.f32 %f1771, %f3219, %f3224; + max.f32 %f1772, %f1771, %f1770; + min.f32 %f1773, %f1772, %f3275; + sub.f32 %f1774, %f3275, %f1773; + div.rn.f32 %f1775, %f3218, %f3223; + mov.f32 %f1776, 0fBDCCCCCD; + max.f32 %f1777, %f1775, %f1776; + mov.f32 %f1778, 0f3DCCCCCD; + min.f32 %f1779, %f1777, %f1778; + sub.f32 %f3274, %f3274, %f1779; + max.f32 %f3276, %f1769, %f1760; + mov.f32 %f1780, 0f3C23D70A; + max.f32 %f3275, %f1774, %f1780; + add.s32 %r1369, %r1369, 1; + setp.lt.s32 %p949, %r1369, %r1355; + @%p949 bra $L__BB2_53; + +$L__BB2_623: + mov.f32 %f3295, %f545; + mov.f32 %f3296, %f545; + mov.f32 %f3297, %f545; + mov.f32 %f3300, %f545; + mov.f32 %f3304, %f545; + mov.f32 %f3298, %f545; + mov.f32 %f3299, %f545; + mov.f32 %f3301, %f545; + mov.f32 %f3305, %f545; + mov.f32 %f3302, %f545; + mov.f32 %f3303, %f545; + mov.f32 %f3306, %f545; + mov.f32 %f3307, %f545; + mov.f32 %f3308, %f545; + mov.f32 %f3309, %f545; + mov.f32 %f3341, %f545; + @%p80 bra $L__BB2_884; + + sub.f32 %f310, %f3274, %f540; + div.rn.f32 %f311, %f310, %f541; + cvt.f64.f32 %fd414, %f311; + add.f64 %fd415, %fd414, 0d4000000000000000; + mov.f64 %fd962, 0d4000000000000000; + cvt.f64.f32 %fd416, %f536; + setp.eq.f32 %p951, %f311, 0fBF800000; + add.f64 %fd417, %fd414, 0d4008000000000000; + mov.f64 %fd963, 0d4008000000000000; + cvt.f64.f32 %fd418, %f538; + add.f64 %fd419, %fd414, 0d4010000000000000; + mov.f64 %fd964, 0d4010000000000000; + add.f32 %f312, %f3274, %f540; + div.rn.f32 %f313, %f312, %f541; + cvt.f64.f32 %fd420, %f313; + add.f64 %fd421, %fd420, 0d4000000000000000; + cvt.f64.f32 %fd422, %f537; + setp.eq.f32 %p952, %f313, 0fBF800000; + add.f64 %fd423, %fd420, 0d4008000000000000; + cvt.f64.f32 %fd424, %f539; + add.f64 %fd425, %fd420, 0d4010000000000000; + div.rn.f32 %f314, %f3276, 0fC0206C98; + mul.f32 %f315, %f535, 0f3F000000; + mul.f32 %f316, %f542, 0f3F000000; + add.f32 %f1813, %f310, %f310; + mov.f32 %f1814, 0f40000000; + mul.f32 %f1815, %f541, %f541; + div.rn.f32 %f1816, %f1813, %f1815; + cvt.f64.f32 %fd426, %f1816; + mul.f32 %f1817, %f536, 0f40400000; + cvt.f64.f32 %fd427, %f1817; + cvt.f64.f32 %fd428, %f310; + add.f64 %fd429, %fd428, 0d4000000000000000; + mul.f32 %f1818, %f1815, %f541; + cvt.f64.f32 %fd430, %f1818; + mul.f32 %f1819, %f538, 0f40800000; + cvt.f64.f32 %fd431, %f1819; + setp.eq.f32 %p953, %f310, 0fBF800000; + add.f64 %fd432, %fd428, 0d4008000000000000; + cvt.f64.f32 %fd433, %f541; + add.f64 %fd434, %fd433, 0d4010000000000000; + add.f32 %f1820, %f312, %f312; + div.rn.f32 %f1821, %f1820, %f1815; + cvt.f64.f32 %fd435, %f1821; + mul.f32 %f1822, %f537, 0f40400000; + cvt.f64.f32 %fd436, %f1822; + cvt.f64.f32 %fd437, %f312; + add.f64 %fd438, %fd437, 0d4000000000000000; + mul.f32 %f1823, %f539, 0f40800000; + cvt.f64.f32 %fd439, %f1823; + setp.eq.f32 %p954, %f312, 0fBF800000; + add.f64 %fd440, %fd437, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r121}, %fd414; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r1020}, %fd962; + } + and.b32 %r1021, %r1020, 2146435072; + setp.eq.s32 %p955, %r1021, 1062207488; + abs.f64 %fd965, %fd414; + { // callseq 56, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd965; + .param .b64 param1; + st.param.f64 [param1+0], %fd962; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd441, [retval0+0]; + } // callseq 56 + mov.u32 %r1019, 0; + setp.lt.s32 %p956, %r121, 0; + and.pred %p49, %p956, %p955; + selp.b32 %r1022, %r121, 0, %p955; + setp.lt.s32 %p957, %r1020, 0; + or.b32 %r1023, %r1022, 2146435072; + selp.b32 %r122, %r1023, %r1022, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1024}, %fd415; + } + and.b32 %r123, %r1024, 2146435072; + setp.ne.s32 %p958, %r123, 2146435072; + setp.gtu.f64 %p959, %fd965, 0d7FF0000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1025}, %fd963; + } + and.b32 %r1026, %r1025, 2146435072; + setp.eq.s32 %p960, %r1026, 1073741824; + { // callseq 57, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd965; + .param .b64 param1; + st.param.f64 [param1+0], %fd963; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd442, [retval0+0]; + } // callseq 57 + and.pred %p50, %p956, %p960; + and.b32 %r124, %r1020, 2147483647; + setp.gt.f64 %p961, %fd965, 0d3FF0000000000000; + selp.b32 %r1027, 2146435072, 0, %p961; + xor.b32 %r1028, %r1027, 2146435072; + selp.b32 %r1029, %r1028, %r1027, %p957; + selp.b32 %r125, 1072693248, %r1029, %p951; + and.b32 %r126, %r121, 2147483647; + selp.b32 %r1030, %r121, 0, %p960; + setp.lt.s32 %p962, %r1025, 0; + or.b32 %r1031, %r1030, 2146435072; + selp.b32 %r127, %r1031, %r1030, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1032}, %fd417; + } + and.b32 %r128, %r1032, 2146435072; + setp.ne.s32 %p963, %r128, 2146435072; + setp.gt.s32 %p964, %r1020, -1; + selp.b32 %r1033, 2146435072, 0, %p964; + setp.ne.s32 %p965, %r124, 1071644672; + and.pred %p966, %p965, %p49; + or.b32 %r1034, %r1033, -2147483648; + selp.b32 %r129, %r1034, %r1033, %p966; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1035}, %fd964; + } + and.b32 %r1036, %r1035, 2146435072; + setp.eq.s32 %p967, %r1036, 1072693248; + { // callseq 58, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd965; + .param .b64 param1; + st.param.f64 [param1+0], %fd964; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd443, [retval0+0]; + } // callseq 58 + and.pred %p51, %p956, %p967; + and.b32 %r130, %r1025, 2147483647; + selp.b32 %r1037, %r1028, %r1027, %p962; + selp.b32 %r131, 1072693248, %r1037, %p951; + selp.b32 %r1038, %r121, 0, %p967; + setp.lt.s32 %p968, %r1035, 0; + or.b32 %r1039, %r1038, 2146435072; + selp.b32 %r132, %r1039, %r1038, %p968; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1040}, %fd419; + } + and.b32 %r133, %r1040, 2146435072; + setp.ne.s32 %p969, %r133, 2146435072; + setp.gt.s32 %p970, %r1025, -1; + selp.b32 %r1041, 2146435072, 0, %p970; + setp.ne.s32 %p971, %r130, 1071644672; + and.pred %p972, %p971, %p50; + or.b32 %r1042, %r1041, -2147483648; + selp.b32 %r134, %r1042, %r1041, %p972; + { + .reg .b32 %temp; + mov.b64 {%temp, %r135}, %fd420; + } + abs.f64 %fd966, %fd420; + { // callseq 59, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd966; + .param .b64 param1; + st.param.f64 [param1+0], %fd962; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd444, [retval0+0]; + } // callseq 59 + setp.lt.s32 %p973, %r135, 0; + and.pred %p52, %p973, %p955; + and.b32 %r136, %r1035, 2147483647; + selp.b32 %r1043, %r1028, %r1027, %p968; + selp.b32 %r137, 1072693248, %r1043, %p951; + selp.b32 %r1044, %r135, 0, %p955; + or.b32 %r1045, %r1044, 2146435072; + selp.b32 %r138, %r1045, %r1044, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1046}, %fd421; + } + and.b32 %r139, %r1046, 2146435072; + setp.ne.s32 %p974, %r139, 2146435072; + setp.gt.s32 %p975, %r1035, -1; + selp.b32 %r1047, 2146435072, 0, %p975; + setp.ne.s32 %p976, %r136, 1071644672; + and.pred %p977, %p976, %p51; + or.b32 %r1048, %r1047, -2147483648; + selp.b32 %r140, %r1048, %r1047, %p977; + setp.gtu.f64 %p978, %fd966, 0d7FF0000000000000; + { // callseq 60, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd966; + .param .b64 param1; + st.param.f64 [param1+0], %fd963; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd445, [retval0+0]; + } // callseq 60 + and.pred %p53, %p973, %p960; + setp.gt.f64 %p979, %fd966, 0d3FF0000000000000; + selp.b32 %r1049, 2146435072, 0, %p979; + xor.b32 %r1050, %r1049, 2146435072; + selp.b32 %r1051, %r1050, %r1049, %p957; + selp.b32 %r141, 1072693248, %r1051, %p952; + and.b32 %r142, %r135, 2147483647; + selp.b32 %r1052, %r135, 0, %p960; + or.b32 %r1053, %r1052, 2146435072; + selp.b32 %r143, %r1053, %r1052, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1054}, %fd423; + } + and.b32 %r144, %r1054, 2146435072; + setp.ne.s32 %p980, %r144, 2146435072; + and.pred %p981, %p965, %p52; + selp.b32 %r145, %r1034, %r1033, %p981; + { // callseq 61, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd966; + .param .b64 param1; + st.param.f64 [param1+0], %fd964; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd446, [retval0+0]; + } // callseq 61 + and.pred %p54, %p973, %p967; + selp.b32 %r1055, %r1050, %r1049, %p962; + selp.b32 %r146, 1072693248, %r1055, %p952; + selp.b32 %r1056, %r135, 0, %p967; + or.b32 %r1057, %r1056, 2146435072; + selp.b32 %r147, %r1057, %r1056, %p968; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1058}, %fd425; + } + and.b32 %r148, %r1058, 2146435072; + setp.ne.s32 %p982, %r148, 2146435072; + and.pred %p983, %p971, %p53; + selp.b32 %r149, %r1042, %r1041, %p983; + selp.b32 %r1059, %r1050, %r1049, %p968; + selp.b32 %r150, 1072693248, %r1059, %p952; + and.pred %p984, %p976, %p54; + selp.b32 %r151, %r1048, %r1047, %p984; + mov.f32 %f1824, 0f3F800000; + cvt.rzi.f32.f32 %f1825, %f1824; + add.f32 %f1826, %f1825, %f1825; + sub.f32 %f1827, %f1814, %f1826; + abs.f32 %f317, %f1827; + { + .reg .b32 %temp; + mov.b64 {%temp, %r152}, %fd428; + } + abs.f64 %fd967, %fd428; + { // callseq 62, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd967; + .param .b64 param1; + st.param.f64 [param1+0], %fd962; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd447, [retval0+0]; + } // callseq 62 + setp.lt.s32 %p985, %r152, 0; + and.pred %p55, %p985, %p955; + selp.b32 %r1060, %r152, 0, %p955; + or.b32 %r1061, %r1060, 2146435072; + selp.b32 %r153, %r1061, %r1060, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1062}, %fd429; + } + and.b32 %r154, %r1062, 2146435072; + setp.ne.s32 %p986, %r154, 2146435072; + setp.gtu.f64 %p987, %fd967, 0d7FF0000000000000; + { // callseq 63, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd967; + .param .b64 param1; + st.param.f64 [param1+0], %fd963; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd448, [retval0+0]; + } // callseq 63 + and.pred %p56, %p985, %p960; + setp.gt.f64 %p988, %fd967, 0d3FF0000000000000; + selp.b32 %r1063, 2146435072, 0, %p988; + xor.b32 %r1064, %r1063, 2146435072; + selp.b32 %r1065, %r1064, %r1063, %p957; + selp.b32 %r155, 1072693248, %r1065, %p953; + and.b32 %r156, %r152, 2147483647; + selp.b32 %r1066, %r152, 0, %p960; + or.b32 %r1067, %r1066, 2146435072; + selp.b32 %r157, %r1067, %r1066, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1068}, %fd432; + } + and.b32 %r158, %r1068, 2146435072; + setp.ne.s32 %p989, %r158, 2146435072; + and.pred %p990, %p965, %p55; + selp.b32 %r159, %r1034, %r1033, %p990; + { + .reg .b32 %temp; + mov.b64 {%temp, %r160}, %fd433; + } + abs.f64 %fd968, %fd433; + { // callseq 64, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd968; + .param .b64 param1; + st.param.f64 [param1+0], %fd964; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd449, [retval0+0]; + } // callseq 64 + setp.lt.s32 %p991, %r160, 0; + and.pred %p57, %p991, %p967; + selp.b32 %r1069, %r1064, %r1063, %p962; + selp.b32 %r161, 1072693248, %r1069, %p953; + selp.b32 %r1070, %r160, 0, %p967; + or.b32 %r1071, %r1070, 2146435072; + selp.b32 %r162, %r1071, %r1070, %p968; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1072}, %fd434; + } + and.b32 %r163, %r1072, 2146435072; + setp.ne.s32 %p992, %r163, 2146435072; + and.pred %p993, %p971, %p56; + selp.b32 %r164, %r1042, %r1041, %p993; + setp.gtu.f64 %p994, %fd968, 0d7FF0000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r165}, %fd437; + } + abs.f64 %fd969, %fd437; + { // callseq 65, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd969; + .param .b64 param1; + st.param.f64 [param1+0], %fd962; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd450, [retval0+0]; + } // callseq 65 + setp.lt.s32 %p995, %r165, 0; + and.pred %p58, %p995, %p955; + setp.gt.f64 %p996, %fd968, 0d3FF0000000000000; + selp.b32 %r1073, 2146435072, 0, %p996; + xor.b32 %r1074, %r1073, 2146435072; + selp.b32 %r1075, %r1074, %r1073, %p968; + setp.eq.f32 %p997, %f541, 0fBF800000; + selp.b32 %r166, 1072693248, %r1075, %p997; + and.b32 %r167, %r160, 2147483647; + selp.b32 %r1076, %r165, 0, %p955; + or.b32 %r1077, %r1076, 2146435072; + selp.b32 %r168, %r1077, %r1076, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1078}, %fd438; + } + and.b32 %r169, %r1078, 2146435072; + setp.ne.s32 %p998, %r169, 2146435072; + and.pred %p999, %p976, %p57; + selp.b32 %r170, %r1048, %r1047, %p999; + setp.gtu.f64 %p1000, %fd969, 0d7FF0000000000000; + { // callseq 66, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd969; + .param .b64 param1; + st.param.f64 [param1+0], %fd963; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd451, [retval0+0]; + } // callseq 66 + and.pred %p59, %p995, %p960; + setp.gt.f64 %p1001, %fd969, 0d3FF0000000000000; + selp.b32 %r1079, 2146435072, 0, %p1001; + xor.b32 %r1080, %r1079, 2146435072; + selp.b32 %r1081, %r1080, %r1079, %p957; + selp.b32 %r171, 1072693248, %r1081, %p954; + and.b32 %r172, %r165, 2147483647; + selp.b32 %r1082, %r165, 0, %p960; + or.b32 %r1083, %r1082, 2146435072; + selp.b32 %r173, %r1083, %r1082, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1084}, %fd440; + } + and.b32 %r174, %r1084, 2146435072; + setp.ne.s32 %p1002, %r174, 2146435072; + and.pred %p1003, %p965, %p58; + selp.b32 %r175, %r1034, %r1033, %p1003; + selp.b32 %r1085, %r1080, %r1079, %p962; + selp.b32 %r176, 1072693248, %r1085, %p954; + and.pred %p1004, %p971, %p59; + selp.b32 %r177, %r1042, %r1041, %p1004; + or.pred %p60, %p958, %p959; + or.pred %p61, %p963, %p959; + or.pred %p62, %p969, %p959; + or.pred %p63, %p974, %p978; + or.pred %p64, %p980, %p978; + or.pred %p65, %p982, %p978; + or.pred %p66, %p986, %p987; + or.pred %p67, %p989, %p987; + or.pred %p68, %p992, %p994; + or.pred %p69, %p998, %p1000; + or.pred %p70, %p1002, %p1000; + mov.f32 %f3295, %f545; + mov.f32 %f3296, %f545; + mov.f32 %f3297, %f545; + mov.f32 %f3298, %f545; + mov.f32 %f3299, %f545; + mov.f32 %f3300, %f545; + mov.f32 %f3301, %f545; + mov.f32 %f3302, %f545; + mov.f32 %f3303, %f545; + mov.f32 %f3304, %f545; + mov.f32 %f3305, %f545; + mov.f32 %f3306, %f545; + mov.f32 %f3307, %f545; + mov.f32 %f3308, %f545; + mov.f32 %f3309, %f545; + mov.f32 %f3341, %f545; + mov.u32 %r1372, %r1019; + +$L__BB2_625: + cvt.rn.f32.s32 %f1828, %r1372; + sub.f32 %f334, %f1828, %f3278; + add.f32 %f335, %f334, 0f3F000000; + add.f32 %f336, %f334, 0fBF000000; + add.f32 %f1829, %f1828, 0f3F000000; + sub.f32 %f337, %f1829, %f3278; + add.f32 %f1830, %f1828, 0f3F800000; + sub.f32 %f338, %f1830, %f3278; + add.f32 %f339, %f334, 0f3F800000; + mov.u32 %r1373, %r1019; + +$L__BB2_626: + not.pred %p1005, %p49; + mov.f64 %fd1187, %fd441; + @%p1005 bra $L__BB2_628; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1087}, %fd441; + } + xor.b32 %r1088, %r1087, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1089, %temp}, %fd441; + } + mov.b64 %fd1187, {%r1089, %r1088}; + +$L__BB2_628: + setp.eq.f32 %p1006, %f311, 0f00000000; + @%p1006 bra $L__BB2_632; + bra.uni $L__BB2_629; + +$L__BB2_632: + mov.u32 %r1090, 0; + mov.b64 %fd1187, {%r1090, %r122}; + bra.uni $L__BB2_633; + +$L__BB2_629: + setp.gt.s32 %p1007, %r121, -1; + @%p1007 bra $L__BB2_633; + + cvt.rzi.f64.f64 %fd971, %fd962; + setp.eq.f64 %p1008, %fd971, 0d4000000000000000; + @%p1008 bra $L__BB2_633; + + mov.f64 %fd1187, 0dFFF8000000000000; + +$L__BB2_633: + selp.f64 %fd1188, %fd1187, %fd415, %p958; + @%p60 bra $L__BB2_638; + + setp.eq.s32 %p1010, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1091, %temp}, %fd962; + } + setp.eq.s32 %p1011, %r1091, 0; + and.pred %p1012, %p1010, %p1011; + @%p1012 bra $L__BB2_637; + bra.uni $L__BB2_635; + +$L__BB2_637: + mov.u32 %r1094, 0; + mov.b64 %fd1188, {%r1094, %r125}; + bra.uni $L__BB2_638; + +$L__BB2_635: + setp.ne.s32 %p1013, %r126, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1092, %temp}, %fd414; + } + setp.ne.s32 %p1014, %r1092, 0; + or.pred %p1015, %p1013, %p1014; + mov.f64 %fd1188, %fd1187; + @%p1015 bra $L__BB2_638; + + mov.u32 %r1093, 0; + mov.b64 %fd1188, {%r1093, %r129}; + +$L__BB2_638: + not.pred %p1016, %p50; + mov.f64 %fd1190, %fd442; + @%p1016 bra $L__BB2_640; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1095}, %fd442; + } + xor.b32 %r1096, %r1095, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1097, %temp}, %fd442; + } + mov.b64 %fd1190, {%r1097, %r1096}; + +$L__BB2_640: + @%p1006 bra $L__BB2_644; + bra.uni $L__BB2_641; + +$L__BB2_644: + mov.u32 %r1098, 0; + mov.b64 %fd1190, {%r1098, %r127}; + bra.uni $L__BB2_645; + +$L__BB2_641: + setp.gt.s32 %p1018, %r121, -1; + @%p1018 bra $L__BB2_645; + + cvt.rzi.f64.f64 %fd975, %fd963; + setp.eq.f64 %p1019, %fd975, 0d4008000000000000; + @%p1019 bra $L__BB2_645; + + mov.f64 %fd1190, 0dFFF8000000000000; + +$L__BB2_645: + selp.f64 %fd1191, %fd1190, %fd417, %p963; + @%p61 bra $L__BB2_650; + + setp.eq.s32 %p1021, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1099, %temp}, %fd963; + } + setp.eq.s32 %p1022, %r1099, 0; + and.pred %p1023, %p1021, %p1022; + @%p1023 bra $L__BB2_649; + bra.uni $L__BB2_647; + +$L__BB2_649: + mov.u32 %r1102, 0; + mov.b64 %fd1191, {%r1102, %r131}; + bra.uni $L__BB2_650; + +$L__BB2_647: + setp.ne.s32 %p1024, %r126, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1100, %temp}, %fd414; + } + setp.ne.s32 %p1025, %r1100, 0; + or.pred %p1026, %p1024, %p1025; + mov.f64 %fd1191, %fd1190; + @%p1026 bra $L__BB2_650; + + mov.u32 %r1101, 0; + mov.b64 %fd1191, {%r1101, %r134}; + +$L__BB2_650: + setp.eq.f32 %p1027, %f311, 0f3F800000; + selp.f64 %fd978, 0d3FF0000000000000, %fd1191, %p1027; + add.f64 %fd979, %fd1188, 0d3FF0000000000000; + selp.f64 %fd980, 0d4000000000000000, %fd979, %p1027; + fma.rn.f64 %fd468, %fd978, %fd416, %fd980; + not.pred %p1028, %p51; + mov.f64 %fd1193, %fd443; + @%p1028 bra $L__BB2_652; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1103}, %fd443; + } + xor.b32 %r1104, %r1103, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1105, %temp}, %fd443; + } + mov.b64 %fd1193, {%r1105, %r1104}; + +$L__BB2_652: + @%p1006 bra $L__BB2_656; + bra.uni $L__BB2_653; + +$L__BB2_656: + mov.u32 %r1106, 0; + mov.b64 %fd1193, {%r1106, %r132}; + bra.uni $L__BB2_657; + +$L__BB2_653: + setp.gt.s32 %p1030, %r121, -1; + @%p1030 bra $L__BB2_657; + + cvt.rzi.f64.f64 %fd982, %fd964; + setp.eq.f64 %p1031, %fd982, 0d4010000000000000; + @%p1031 bra $L__BB2_657; + + mov.f64 %fd1193, 0dFFF8000000000000; + +$L__BB2_657: + selp.f64 %fd1194, %fd1193, %fd419, %p969; + @%p62 bra $L__BB2_662; + + setp.eq.s32 %p1033, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1107, %temp}, %fd964; + } + setp.eq.s32 %p1034, %r1107, 0; + and.pred %p1035, %p1033, %p1034; + @%p1035 bra $L__BB2_661; + bra.uni $L__BB2_659; + +$L__BB2_661: + mov.u32 %r1110, 0; + mov.b64 %fd1194, {%r1110, %r137}; + bra.uni $L__BB2_662; + +$L__BB2_659: + setp.ne.s32 %p1036, %r126, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1108, %temp}, %fd414; + } + setp.ne.s32 %p1037, %r1108, 0; + or.pred %p1038, %p1036, %p1037; + mov.f64 %fd1194, %fd1193; + @%p1038 bra $L__BB2_662; + + mov.u32 %r1109, 0; + mov.b64 %fd1194, {%r1109, %r140}; + +$L__BB2_662: + selp.f64 %fd985, 0d3FF0000000000000, %fd1194, %p1027; + fma.rn.f64 %fd477, %fd985, %fd418, %fd468; + not.pred %p1040, %p52; + mov.f64 %fd1196, %fd444; + @%p1040 bra $L__BB2_664; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1111}, %fd444; + } + xor.b32 %r1112, %r1111, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1113, %temp}, %fd444; + } + mov.b64 %fd1196, {%r1113, %r1112}; + +$L__BB2_664: + setp.eq.f32 %p1041, %f313, 0f00000000; + @%p1041 bra $L__BB2_668; + bra.uni $L__BB2_665; + +$L__BB2_668: + mov.u32 %r1114, 0; + mov.b64 %fd1196, {%r1114, %r138}; + bra.uni $L__BB2_669; + +$L__BB2_665: + setp.gt.s32 %p1042, %r135, -1; + @%p1042 bra $L__BB2_669; + + cvt.rzi.f64.f64 %fd987, %fd962; + setp.eq.f64 %p1043, %fd987, 0d4000000000000000; + @%p1043 bra $L__BB2_669; + + mov.f64 %fd1196, 0dFFF8000000000000; + +$L__BB2_669: + selp.f64 %fd1197, %fd1196, %fd421, %p974; + @%p63 bra $L__BB2_674; + + setp.eq.s32 %p1045, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1115, %temp}, %fd962; + } + setp.eq.s32 %p1046, %r1115, 0; + and.pred %p1047, %p1045, %p1046; + @%p1047 bra $L__BB2_673; + bra.uni $L__BB2_671; + +$L__BB2_673: + mov.u32 %r1118, 0; + mov.b64 %fd1197, {%r1118, %r141}; + bra.uni $L__BB2_674; + +$L__BB2_671: + setp.ne.s32 %p1048, %r142, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1116, %temp}, %fd420; + } + setp.ne.s32 %p1049, %r1116, 0; + or.pred %p1050, %p1048, %p1049; + mov.f64 %fd1197, %fd1196; + @%p1050 bra $L__BB2_674; + + mov.u32 %r1117, 0; + mov.b64 %fd1197, {%r1117, %r145}; + +$L__BB2_674: + not.pred %p1051, %p53; + mov.f64 %fd1199, %fd445; + @%p1051 bra $L__BB2_676; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1119}, %fd445; + } + xor.b32 %r1120, %r1119, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1121, %temp}, %fd445; + } + mov.b64 %fd1199, {%r1121, %r1120}; + +$L__BB2_676: + @%p1041 bra $L__BB2_680; + bra.uni $L__BB2_677; + +$L__BB2_680: + mov.u32 %r1122, 0; + mov.b64 %fd1199, {%r1122, %r143}; + bra.uni $L__BB2_681; + +$L__BB2_677: + setp.gt.s32 %p1053, %r135, -1; + @%p1053 bra $L__BB2_681; + + cvt.rzi.f64.f64 %fd991, %fd963; + setp.eq.f64 %p1054, %fd991, 0d4008000000000000; + @%p1054 bra $L__BB2_681; + + mov.f64 %fd1199, 0dFFF8000000000000; + +$L__BB2_681: + selp.f64 %fd1200, %fd1199, %fd423, %p980; + @%p64 bra $L__BB2_686; + + setp.eq.s32 %p1056, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1123, %temp}, %fd963; + } + setp.eq.s32 %p1057, %r1123, 0; + and.pred %p1058, %p1056, %p1057; + @%p1058 bra $L__BB2_685; + bra.uni $L__BB2_683; + +$L__BB2_685: + mov.u32 %r1126, 0; + mov.b64 %fd1200, {%r1126, %r146}; + bra.uni $L__BB2_686; + +$L__BB2_683: + setp.ne.s32 %p1059, %r142, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1124, %temp}, %fd420; + } + setp.ne.s32 %p1060, %r1124, 0; + or.pred %p1061, %p1059, %p1060; + mov.f64 %fd1200, %fd1199; + @%p1061 bra $L__BB2_686; + + mov.u32 %r1125, 0; + mov.b64 %fd1200, {%r1125, %r149}; + +$L__BB2_686: + setp.eq.f32 %p1062, %f313, 0f3F800000; + selp.f64 %fd994, 0d3FF0000000000000, %fd1200, %p1062; + add.f64 %fd995, %fd1197, 0d3FF0000000000000; + selp.f64 %fd996, 0d4000000000000000, %fd995, %p1062; + fma.rn.f64 %fd494, %fd994, %fd422, %fd996; + not.pred %p1063, %p54; + mov.f64 %fd1202, %fd446; + @%p1063 bra $L__BB2_688; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1127}, %fd446; + } + xor.b32 %r1128, %r1127, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1129, %temp}, %fd446; + } + mov.b64 %fd1202, {%r1129, %r1128}; + +$L__BB2_688: + @%p1041 bra $L__BB2_692; + bra.uni $L__BB2_689; + +$L__BB2_692: + mov.u32 %r1130, 0; + mov.b64 %fd1202, {%r1130, %r147}; + bra.uni $L__BB2_693; + +$L__BB2_689: + setp.gt.s32 %p1065, %r135, -1; + @%p1065 bra $L__BB2_693; + + cvt.rzi.f64.f64 %fd998, %fd964; + setp.eq.f64 %p1066, %fd998, 0d4010000000000000; + @%p1066 bra $L__BB2_693; + + mov.f64 %fd1202, 0dFFF8000000000000; + +$L__BB2_693: + selp.f64 %fd1203, %fd1202, %fd425, %p982; + @%p65 bra $L__BB2_698; + + setp.eq.s32 %p1068, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1131, %temp}, %fd964; + } + setp.eq.s32 %p1069, %r1131, 0; + and.pred %p1070, %p1068, %p1069; + @%p1070 bra $L__BB2_697; + bra.uni $L__BB2_695; + +$L__BB2_697: + mov.u32 %r1134, 0; + mov.b64 %fd1203, {%r1134, %r150}; + bra.uni $L__BB2_698; + +$L__BB2_695: + setp.ne.s32 %p1071, %r142, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1132, %temp}, %fd420; + } + setp.ne.s32 %p1072, %r1132, 0; + or.pred %p1073, %p1071, %p1072; + mov.f64 %fd1203, %fd1202; + @%p1073 bra $L__BB2_698; + + mov.u32 %r1133, 0; + mov.b64 %fd1203, {%r1133, %r151}; + +$L__BB2_698: + selp.f64 %fd1001, 0d3FF0000000000000, %fd1203, %p1062; + fma.rn.f64 %fd1002, %fd1001, %fd424, %fd494; + cvt.rn.f32.f64 %f1831, %fd1002; + cvt.rn.f32.f64 %f1832, %fd477; + sqrt.rn.f32 %f356, %f1832; + mul.f32 %f357, %f356, %f535; + sqrt.rn.f32 %f358, %f1831; + mul.f32 %f359, %f358, %f542; + mov.f32 %f1833, 0f3F000000; + div.rn.f32 %f1834, %f1833, %f357; + div.rn.f32 %f1835, %f1834, %f357; + sqrt.rn.f32 %f360, %f1835; + mul.f32 %f361, %f360, %f335; + abs.f32 %f1836, %f361; + setp.ltu.f32 %p1075, %f1836, 0f3F8060FE; + setp.ge.f32 %p1076, %f1836, 0f3F8060FE; + mul.f32 %f1837, %f361, %f361; + selp.f32 %f1838, %f1836, %f1837, %p1076; + selp.f32 %f1839, 0f3789CA3C, 0f38B1E96A, %p1076; + selp.f32 %f1840, 0fB9F560B9, 0fBA574D20, %p1076; + fma.rn.f32 %f1841, %f1839, %f1838, %f1840; + selp.f32 %f1842, 0f3BAC840B, 0f3BAAD5EA, %p1076; + fma.rn.f32 %f1843, %f1841, %f1838, %f1842; + selp.f32 %f1844, 0fBD0C8162, 0fBCDC1BE7, %p1076; + fma.rn.f32 %f1845, %f1843, %f1838, %f1844; + selp.f32 %f1846, 0f3E1CF906, 0f3DE718AF, %p1076; + fma.rn.f32 %f1847, %f1845, %f1838, %f1846; + selp.f32 %f1848, 0f3F6A937E, 0fBEC093AC, %p1076; + fma.rn.f32 %f1849, %f1847, %f1838, %f1848; + selp.f32 %f1850, 0f3F20D842, 0f3E0375D3, %p1076; + fma.rn.f32 %f1851, %f1849, %f1838, %f1850; + neg.f32 %f1852, %f1836; + selp.f32 %f1853, %f1852, %f361, %p1076; + fma.rn.f32 %f3311, %f1851, %f1853, %f1853; + @%p1075 bra $L__BB2_700; + + ex2.approx.ftz.f32 %f1854, %f3311; + sub.f32 %f1856, %f1824, %f1854; + mov.b32 %r1135, %f1856; + mov.b32 %r1136, %f361; + and.b32 %r1137, %r1136, -2147483648; + or.b32 %r1138, %r1137, %r1135; + mov.b32 %f3311, %r1138; + +$L__BB2_700: + mul.f32 %f365, %f360, %f336; + abs.f32 %f1857, %f365; + setp.ltu.f32 %p1077, %f1857, 0f3F8060FE; + setp.ge.f32 %p1078, %f1857, 0f3F8060FE; + mul.f32 %f1858, %f365, %f365; + selp.f32 %f1859, %f1857, %f1858, %p1078; + selp.f32 %f1860, 0f3789CA3C, 0f38B1E96A, %p1078; + selp.f32 %f1861, 0fB9F560B9, 0fBA574D20, %p1078; + fma.rn.f32 %f1862, %f1860, %f1859, %f1861; + selp.f32 %f1863, 0f3BAC840B, 0f3BAAD5EA, %p1078; + fma.rn.f32 %f1864, %f1862, %f1859, %f1863; + selp.f32 %f1865, 0fBD0C8162, 0fBCDC1BE7, %p1078; + fma.rn.f32 %f1866, %f1864, %f1859, %f1865; + selp.f32 %f1867, 0f3E1CF906, 0f3DE718AF, %p1078; + fma.rn.f32 %f1868, %f1866, %f1859, %f1867; + selp.f32 %f1869, 0f3F6A937E, 0fBEC093AC, %p1078; + fma.rn.f32 %f1870, %f1868, %f1859, %f1869; + selp.f32 %f1871, 0f3F20D842, 0f3E0375D3, %p1078; + fma.rn.f32 %f1872, %f1870, %f1859, %f1871; + neg.f32 %f1873, %f1857; + selp.f32 %f1874, %f1873, %f365, %p1078; + fma.rn.f32 %f3312, %f1872, %f1874, %f1874; + @%p1077 bra $L__BB2_702; + + ex2.approx.ftz.f32 %f1875, %f3312; + sub.f32 %f1877, %f1824, %f1875; + mov.b32 %r1139, %f1877; + mov.b32 %r1140, %f365; + and.b32 %r1141, %r1140, -2147483648; + or.b32 %r1142, %r1141, %r1139; + mov.b32 %f3312, %r1142; + +$L__BB2_702: + sub.f32 %f1878, %f3311, %f3312; + mul.f32 %f369, %f1878, 0f3F000000; + div.rn.f32 %f1880, %f1833, %f359; + div.rn.f32 %f1881, %f1880, %f359; + cvt.rn.f32.s32 %f370, %r1373; + sub.f32 %f371, %f370, %f3277; + add.f32 %f1882, %f371, 0f3F000000; + sqrt.rn.f32 %f372, %f1881; + mul.f32 %f373, %f372, %f1882; + abs.f32 %f1883, %f373; + setp.ltu.f32 %p1079, %f1883, 0f3F8060FE; + setp.ge.f32 %p1080, %f1883, 0f3F8060FE; + mul.f32 %f1884, %f373, %f373; + selp.f32 %f1885, %f1883, %f1884, %p1080; + selp.f32 %f1886, 0f3789CA3C, 0f38B1E96A, %p1080; + selp.f32 %f1887, 0fB9F560B9, 0fBA574D20, %p1080; + fma.rn.f32 %f1888, %f1886, %f1885, %f1887; + selp.f32 %f1889, 0f3BAC840B, 0f3BAAD5EA, %p1080; + fma.rn.f32 %f1890, %f1888, %f1885, %f1889; + selp.f32 %f1891, 0fBD0C8162, 0fBCDC1BE7, %p1080; + fma.rn.f32 %f1892, %f1890, %f1885, %f1891; + selp.f32 %f1893, 0f3E1CF906, 0f3DE718AF, %p1080; + fma.rn.f32 %f1894, %f1892, %f1885, %f1893; + selp.f32 %f1895, 0f3F6A937E, 0fBEC093AC, %p1080; + fma.rn.f32 %f1896, %f1894, %f1885, %f1895; + selp.f32 %f1897, 0f3F20D842, 0f3E0375D3, %p1080; + fma.rn.f32 %f1898, %f1896, %f1885, %f1897; + neg.f32 %f1899, %f1883; + selp.f32 %f1900, %f1899, %f373, %p1080; + fma.rn.f32 %f3313, %f1898, %f1900, %f1900; + @%p1079 bra $L__BB2_704; + + ex2.approx.ftz.f32 %f1901, %f3313; + sub.f32 %f1903, %f1824, %f1901; + mov.b32 %r1143, %f1903; + mov.b32 %r1144, %f373; + and.b32 %r1145, %r1144, -2147483648; + or.b32 %r1146, %r1145, %r1143; + mov.b32 %f3313, %r1146; + +$L__BB2_704: + add.f32 %f377, %f371, 0fBF000000; + mul.f32 %f378, %f372, %f377; + abs.f32 %f1904, %f378; + setp.ltu.f32 %p1081, %f1904, 0f3F8060FE; + setp.ge.f32 %p1082, %f1904, 0f3F8060FE; + mul.f32 %f1905, %f378, %f378; + selp.f32 %f1906, %f1904, %f1905, %p1082; + selp.f32 %f1907, 0f3789CA3C, 0f38B1E96A, %p1082; + selp.f32 %f1908, 0fB9F560B9, 0fBA574D20, %p1082; + fma.rn.f32 %f1909, %f1907, %f1906, %f1908; + selp.f32 %f1910, 0f3BAC840B, 0f3BAAD5EA, %p1082; + fma.rn.f32 %f1911, %f1909, %f1906, %f1910; + selp.f32 %f1912, 0fBD0C8162, 0fBCDC1BE7, %p1082; + fma.rn.f32 %f1913, %f1911, %f1906, %f1912; + selp.f32 %f1914, 0f3E1CF906, 0f3DE718AF, %p1082; + fma.rn.f32 %f1915, %f1913, %f1906, %f1914; + selp.f32 %f1916, 0f3F6A937E, 0fBEC093AC, %p1082; + fma.rn.f32 %f1917, %f1915, %f1906, %f1916; + selp.f32 %f1918, 0f3F20D842, 0f3E0375D3, %p1082; + fma.rn.f32 %f1919, %f1917, %f1906, %f1918; + neg.f32 %f1920, %f1904; + selp.f32 %f1921, %f1920, %f378, %p1082; + fma.rn.f32 %f3314, %f1919, %f1921, %f1921; + @%p1081 bra $L__BB2_706; + + ex2.approx.ftz.f32 %f1922, %f3314; + sub.f32 %f1924, %f1824, %f1922; + mov.b32 %r1147, %f1924; + mov.b32 %r1148, %f378; + and.b32 %r1149, %r1148, -2147483648; + or.b32 %r1150, %r1149, %r1147; + mov.b32 %f3314, %r1150; + +$L__BB2_706: + sub.f32 %f1926, %f3313, %f3314; + mul.f32 %f382, %f1926, 0f3F000000; + div.rn.f32 %f383, %f337, %f357; + abs.f32 %f384, %f383; + setp.lt.f32 %p1083, %f384, 0f00800000; + mul.f32 %f1927, %f384, 0f4B800000; + selp.f32 %f1928, %f1927, %f384, %p1083; + selp.f32 %f1929, 0fC3170000, 0fC2FE0000, %p1083; + mov.b32 %r1151, %f1928; + and.b32 %r1152, %r1151, 8388607; + or.b32 %r1153, %r1152, 1065353216; + mov.b32 %f1930, %r1153; + shr.u32 %r1154, %r1151, 23; + cvt.rn.f32.u32 %f1931, %r1154; + add.f32 %f1932, %f1929, %f1931; + setp.gt.f32 %p1084, %f1930, 0f3FB504F3; + mul.f32 %f1933, %f1930, 0f3F000000; + add.f32 %f1934, %f1932, 0f3F800000; + selp.f32 %f1935, %f1934, %f1932, %p1084; + selp.f32 %f1936, %f1933, %f1930, %p1084; + add.f32 %f1937, %f1936, 0fBF800000; + add.f32 %f1938, %f1936, 0f3F800000; + rcp.approx.ftz.f32 %f1939, %f1938; + add.f32 %f1940, %f1937, %f1937; + mul.f32 %f1942, %f1940, %f1939; + mul.f32 %f1943, %f1942, %f1942; + mov.f32 %f1944, 0f3C4CAF63; + mov.f32 %f1945, 0f3B18F0FE; + fma.rn.f32 %f1946, %f1945, %f1943, %f1944; + mov.f32 %f1947, 0f3DAAAABD; + fma.rn.f32 %f1948, %f1946, %f1943, %f1947; + mul.rn.f32 %f1949, %f1948, %f1943; + mul.rn.f32 %f1950, %f1949, %f1942; + sub.f32 %f1951, %f1937, %f1942; + add.f32 %f1952, %f1951, %f1951; + neg.f32 %f1953, %f1942; + fma.rn.f32 %f1954, %f1953, %f1937, %f1952; + mul.rn.f32 %f1955, %f1939, %f1954; + add.f32 %f1956, %f1950, %f1942; + sub.f32 %f1957, %f1942, %f1956; + add.f32 %f1958, %f1950, %f1957; + add.f32 %f1959, %f1955, %f1958; + add.f32 %f1960, %f1956, %f1959; + sub.f32 %f1961, %f1956, %f1960; + add.f32 %f1962, %f1959, %f1961; + mov.f32 %f1963, 0f3F317200; + mul.rn.f32 %f1964, %f1935, %f1963; + mov.f32 %f1965, 0f35BFBE8E; + mul.rn.f32 %f1966, %f1935, %f1965; + add.f32 %f1967, %f1964, %f1960; + sub.f32 %f1968, %f1964, %f1967; + add.f32 %f1969, %f1960, %f1968; + add.f32 %f1970, %f1962, %f1969; + add.f32 %f1971, %f1966, %f1970; + add.f32 %f1972, %f1967, %f1971; + sub.f32 %f1973, %f1967, %f1972; + add.f32 %f1974, %f1971, %f1973; + mul.rn.f32 %f1975, %f1814, %f1972; + neg.f32 %f1976, %f1975; + fma.rn.f32 %f1977, %f1814, %f1972, %f1976; + fma.rn.f32 %f1978, %f1814, %f1974, %f1977; + mov.f32 %f1979, 0f00000000; + fma.rn.f32 %f1980, %f1979, %f1972, %f1978; + add.rn.f32 %f1981, %f1975, %f1980; + neg.f32 %f1982, %f1981; + add.rn.f32 %f1983, %f1975, %f1982; + add.rn.f32 %f1984, %f1983, %f1980; + mov.b32 %r1155, %f1981; + setp.eq.s32 %p1085, %r1155, 1118925336; + add.s32 %r1156, %r1155, -1; + mov.b32 %f1985, %r1156; + add.f32 %f1986, %f1984, 0f37000000; + selp.f32 %f385, %f1986, %f1984, %p1085; + selp.f32 %f1987, %f1985, %f1981, %p1085; + mov.f32 %f1988, 0f3FB8AA3B; + mul.rn.f32 %f1989, %f1987, %f1988; + cvt.rzi.f32.f32 %f1990, %f1989; + abs.f32 %f1991, %f1990; + setp.gt.f32 %p1086, %f1991, 0f42FC0000; + mov.b32 %r1157, %f1990; + and.b32 %r1158, %r1157, -2147483648; + or.b32 %r1159, %r1158, 1123811328; + mov.b32 %f1992, %r1159; + selp.f32 %f1993, %f1992, %f1990, %p1086; + mov.f32 %f1994, 0fBF317218; + fma.rn.f32 %f1995, %f1993, %f1994, %f1987; + mov.f32 %f1996, 0f3102E308; + fma.rn.f32 %f1997, %f1993, %f1996, %f1995; + mul.f32 %f1998, %f1997, 0f3FB8AA3B; + add.f32 %f1999, %f1993, 0f4B40007F; + mov.b32 %r1160, %f1999; + shl.b32 %r1161, %r1160, 23; + mov.b32 %f2000, %r1161; + ex2.approx.ftz.f32 %f2001, %f1998; + mul.f32 %f386, %f2001, %f2000; + setp.eq.f32 %p1087, %f386, 0f7F800000; + mov.f32 %f3315, 0f7F800000; + @%p1087 bra $L__BB2_708; + + fma.rn.f32 %f3315, %f386, %f385, %f386; + +$L__BB2_708: + setp.lt.f32 %p1088, %f383, 0f00000000; + setp.eq.f32 %p1089, %f317, 0f3F800000; + and.pred %p71, %p1088, %p1089; + setp.eq.f32 %p1090, %f383, 0f00000000; + @%p1090 bra $L__BB2_712; + bra.uni $L__BB2_709; + +$L__BB2_712: + add.f32 %f2006, %f383, %f383; + selp.f32 %f3317, %f2006, 0f00000000, %p1089; + bra.uni $L__BB2_713; + +$L__BB2_709: + mov.b32 %r1162, %f3315; + xor.b32 %r1163, %r1162, -2147483648; + mov.b32 %f2002, %r1163; + selp.f32 %f3317, %f2002, %f3315, %p71; + setp.geu.f32 %p1091, %f383, 0f00000000; + @%p1091 bra $L__BB2_713; + + cvt.rzi.f32.f32 %f2004, %f1814; + setp.eq.f32 %p1092, %f2004, 0f40000000; + @%p1092 bra $L__BB2_713; + + mov.f32 %f3317, 0f7FFFFFFF; + +$L__BB2_713: + add.f32 %f2007, %f384, 0f40000000; + mov.b32 %r1164, %f2007; + setp.lt.s32 %p1094, %r1164, 2139095040; + @%p1094 bra $L__BB2_718; + + setp.gtu.f32 %p1095, %f384, 0f7F800000; + @%p1095 bra $L__BB2_717; + bra.uni $L__BB2_715; + +$L__BB2_717: + add.f32 %f3317, %f383, 0f40000000; + bra.uni $L__BB2_718; + +$L__BB2_715: + setp.neu.f32 %p1096, %f384, 0f7F800000; + @%p1096 bra $L__BB2_718; + + selp.f32 %f3317, 0fFF800000, 0f7F800000, %p71; + +$L__BB2_718: + mul.f32 %f2009, %f3317, 0fBF000000; + setp.eq.f32 %p1097, %f383, 0f3F800000; + selp.f32 %f2010, 0fBF000000, %f2009, %p1097; + mov.f32 %f2012, 0f3BBB989D; + fma.rn.f32 %f2013, %f2010, %f2012, %f1833; + mov.f32 %f2015, 0f437C0000; + cvt.sat.f32.f32 %f2016, %f2013; + mov.f32 %f2017, 0f4B400001; + fma.rm.f32 %f2018, %f2016, %f2015, %f2017; + add.f32 %f2019, %f2018, 0fCB40007F; + neg.f32 %f2020, %f2019; + fma.rn.f32 %f2021, %f2010, %f1988, %f2020; + mov.f32 %f2022, 0f32A57060; + fma.rn.f32 %f2023, %f2010, %f2022, %f2021; + mov.b32 %r1165, %f2018; + shl.b32 %r1166, %r1165, 23; + mov.b32 %f2024, %r1166; + ex2.approx.ftz.f32 %f2025, %f2023; + mul.f32 %f395, %f2025, %f2024; + div.rn.f32 %f396, %f336, %f357; + abs.f32 %f397, %f396; + setp.lt.f32 %p1098, %f397, 0f00800000; + mul.f32 %f2026, %f397, 0f4B800000; + selp.f32 %f2027, %f2026, %f397, %p1098; + selp.f32 %f2028, 0fC3170000, 0fC2FE0000, %p1098; + mov.b32 %r1167, %f2027; + and.b32 %r1168, %r1167, 8388607; + or.b32 %r1169, %r1168, 1065353216; + mov.b32 %f2029, %r1169; + shr.u32 %r1170, %r1167, 23; + cvt.rn.f32.u32 %f2030, %r1170; + add.f32 %f2031, %f2028, %f2030; + setp.gt.f32 %p1099, %f2029, 0f3FB504F3; + mul.f32 %f2032, %f2029, 0f3F000000; + add.f32 %f2033, %f2031, 0f3F800000; + selp.f32 %f2034, %f2033, %f2031, %p1099; + selp.f32 %f2035, %f2032, %f2029, %p1099; + add.f32 %f2036, %f2035, 0fBF800000; + add.f32 %f2037, %f2035, 0f3F800000; + rcp.approx.ftz.f32 %f2038, %f2037; + add.f32 %f2039, %f2036, %f2036; + mul.f32 %f2041, %f2039, %f2038; + mul.f32 %f2042, %f2041, %f2041; + fma.rn.f32 %f2045, %f1945, %f2042, %f1944; + fma.rn.f32 %f2047, %f2045, %f2042, %f1947; + mul.rn.f32 %f2048, %f2047, %f2042; + mul.rn.f32 %f2049, %f2048, %f2041; + sub.f32 %f2050, %f2036, %f2041; + add.f32 %f2051, %f2050, %f2050; + neg.f32 %f2052, %f2041; + fma.rn.f32 %f2053, %f2052, %f2036, %f2051; + mul.rn.f32 %f2054, %f2038, %f2053; + add.f32 %f2055, %f2049, %f2041; + sub.f32 %f2056, %f2041, %f2055; + add.f32 %f2057, %f2049, %f2056; + add.f32 %f2058, %f2054, %f2057; + add.f32 %f2059, %f2055, %f2058; + sub.f32 %f2060, %f2055, %f2059; + add.f32 %f2061, %f2058, %f2060; + mul.rn.f32 %f2063, %f2034, %f1963; + mul.rn.f32 %f2065, %f2034, %f1965; + add.f32 %f2066, %f2063, %f2059; + sub.f32 %f2067, %f2063, %f2066; + add.f32 %f2068, %f2059, %f2067; + add.f32 %f2069, %f2061, %f2068; + add.f32 %f2070, %f2065, %f2069; + add.f32 %f2071, %f2066, %f2070; + sub.f32 %f2072, %f2066, %f2071; + add.f32 %f2073, %f2070, %f2072; + mul.rn.f32 %f2074, %f1814, %f2071; + neg.f32 %f2075, %f2074; + fma.rn.f32 %f2076, %f1814, %f2071, %f2075; + fma.rn.f32 %f2077, %f1814, %f2073, %f2076; + fma.rn.f32 %f2079, %f1979, %f2071, %f2077; + add.rn.f32 %f2080, %f2074, %f2079; + neg.f32 %f2081, %f2080; + add.rn.f32 %f2082, %f2074, %f2081; + add.rn.f32 %f2083, %f2082, %f2079; + mov.b32 %r1171, %f2080; + setp.eq.s32 %p1100, %r1171, 1118925336; + add.s32 %r1172, %r1171, -1; + mov.b32 %f2084, %r1172; + add.f32 %f2085, %f2083, 0f37000000; + selp.f32 %f398, %f2085, %f2083, %p1100; + selp.f32 %f2086, %f2084, %f2080, %p1100; + mul.rn.f32 %f2087, %f2086, %f1988; + cvt.rzi.f32.f32 %f2088, %f2087; + abs.f32 %f2089, %f2088; + setp.gt.f32 %p1101, %f2089, 0f42FC0000; + mov.b32 %r1173, %f2088; + and.b32 %r1174, %r1173, -2147483648; + or.b32 %r1175, %r1174, 1123811328; + mov.b32 %f2090, %r1175; + selp.f32 %f2091, %f2090, %f2088, %p1101; + fma.rn.f32 %f2093, %f2091, %f1994, %f2086; + fma.rn.f32 %f2095, %f2091, %f1996, %f2093; + mul.f32 %f2096, %f2095, 0f3FB8AA3B; + add.f32 %f2097, %f2091, 0f4B40007F; + mov.b32 %r1176, %f2097; + shl.b32 %r1177, %r1176, 23; + mov.b32 %f2098, %r1177; + ex2.approx.ftz.f32 %f2099, %f2096; + mul.f32 %f399, %f2099, %f2098; + setp.eq.f32 %p1102, %f399, 0f7F800000; + mov.f32 %f3318, 0f7F800000; + @%p1102 bra $L__BB2_720; + + fma.rn.f32 %f3318, %f399, %f398, %f399; + +$L__BB2_720: + setp.lt.f32 %p1103, %f396, 0f00000000; + and.pred %p72, %p1103, %p1089; + setp.eq.f32 %p1105, %f396, 0f00000000; + @%p1105 bra $L__BB2_724; + bra.uni $L__BB2_721; + +$L__BB2_724: + add.f32 %f2104, %f396, %f396; + selp.f32 %f3320, %f2104, 0f00000000, %p1089; + bra.uni $L__BB2_725; + +$L__BB2_721: + mov.b32 %r1178, %f3318; + xor.b32 %r1179, %r1178, -2147483648; + mov.b32 %f2100, %r1179; + selp.f32 %f3320, %f2100, %f3318, %p72; + setp.geu.f32 %p1106, %f396, 0f00000000; + @%p1106 bra $L__BB2_725; + + cvt.rzi.f32.f32 %f2102, %f1814; + setp.eq.f32 %p1107, %f2102, 0f40000000; + @%p1107 bra $L__BB2_725; + + mov.f32 %f3320, 0f7FFFFFFF; + +$L__BB2_725: + add.f32 %f2105, %f397, 0f40000000; + mov.b32 %r1180, %f2105; + setp.lt.s32 %p1109, %r1180, 2139095040; + @%p1109 bra $L__BB2_730; + + setp.gtu.f32 %p1110, %f397, 0f7F800000; + @%p1110 bra $L__BB2_729; + bra.uni $L__BB2_727; + +$L__BB2_729: + add.f32 %f3320, %f396, 0f40000000; + bra.uni $L__BB2_730; + +$L__BB2_727: + setp.neu.f32 %p1111, %f397, 0f7F800000; + @%p1111 bra $L__BB2_730; + + selp.f32 %f3320, 0fFF800000, 0f7F800000, %p72; + +$L__BB2_730: + mul.f32 %f2107, %f3320, 0fBF000000; + setp.eq.f32 %p1112, %f396, 0f3F800000; + selp.f32 %f2108, 0fBF000000, %f2107, %p1112; + fma.rn.f32 %f2111, %f2108, %f2012, %f1833; + cvt.sat.f32.f32 %f2114, %f2111; + fma.rm.f32 %f2116, %f2114, %f2015, %f2017; + add.f32 %f2117, %f2116, 0fCB40007F; + neg.f32 %f2118, %f2117; + fma.rn.f32 %f2119, %f2108, %f1988, %f2118; + fma.rn.f32 %f2121, %f2108, %f2022, %f2119; + mov.b32 %r1181, %f2116; + shl.b32 %r1182, %r1181, 23; + mov.b32 %f2122, %r1182; + ex2.approx.ftz.f32 %f2123, %f2121; + mul.f32 %f2124, %f2123, %f2122; + sub.f32 %f2125, %f395, %f2124; + div.rn.f32 %f408, %f314, %f357; + mul.f32 %f2126, %f408, %f2125; + mul.f32 %f409, %f382, %f2126; + add.f32 %f2127, %f370, 0f3F000000; + sub.f32 %f2128, %f2127, %f3277; + div.rn.f32 %f410, %f2128, %f359; + abs.f32 %f411, %f410; + setp.lt.f32 %p1113, %f411, 0f00800000; + mul.f32 %f2129, %f411, 0f4B800000; + selp.f32 %f2130, %f2129, %f411, %p1113; + selp.f32 %f2131, 0fC3170000, 0fC2FE0000, %p1113; + mov.b32 %r1183, %f2130; + and.b32 %r1184, %r1183, 8388607; + or.b32 %r1185, %r1184, 1065353216; + mov.b32 %f2132, %r1185; + shr.u32 %r1186, %r1183, 23; + cvt.rn.f32.u32 %f2133, %r1186; + add.f32 %f2134, %f2131, %f2133; + setp.gt.f32 %p1114, %f2132, 0f3FB504F3; + mul.f32 %f2135, %f2132, 0f3F000000; + add.f32 %f2136, %f2134, 0f3F800000; + selp.f32 %f2137, %f2136, %f2134, %p1114; + selp.f32 %f2138, %f2135, %f2132, %p1114; + add.f32 %f2139, %f2138, 0fBF800000; + add.f32 %f2140, %f2138, 0f3F800000; + rcp.approx.ftz.f32 %f2141, %f2140; + add.f32 %f2142, %f2139, %f2139; + mul.f32 %f2144, %f2142, %f2141; + mul.f32 %f2145, %f2144, %f2144; + fma.rn.f32 %f2148, %f1945, %f2145, %f1944; + fma.rn.f32 %f2150, %f2148, %f2145, %f1947; + mul.rn.f32 %f2151, %f2150, %f2145; + mul.rn.f32 %f2152, %f2151, %f2144; + sub.f32 %f2153, %f2139, %f2144; + add.f32 %f2154, %f2153, %f2153; + neg.f32 %f2155, %f2144; + fma.rn.f32 %f2156, %f2155, %f2139, %f2154; + mul.rn.f32 %f2157, %f2141, %f2156; + add.f32 %f2158, %f2152, %f2144; + sub.f32 %f2159, %f2144, %f2158; + add.f32 %f2160, %f2152, %f2159; + add.f32 %f2161, %f2157, %f2160; + add.f32 %f2162, %f2158, %f2161; + sub.f32 %f2163, %f2158, %f2162; + add.f32 %f2164, %f2161, %f2163; + mul.rn.f32 %f2166, %f2137, %f1963; + mul.rn.f32 %f2168, %f2137, %f1965; + add.f32 %f2169, %f2166, %f2162; + sub.f32 %f2170, %f2166, %f2169; + add.f32 %f2171, %f2162, %f2170; + add.f32 %f2172, %f2164, %f2171; + add.f32 %f2173, %f2168, %f2172; + add.f32 %f2174, %f2169, %f2173; + sub.f32 %f2175, %f2169, %f2174; + add.f32 %f2176, %f2173, %f2175; + mul.rn.f32 %f2177, %f1814, %f2174; + neg.f32 %f2178, %f2177; + fma.rn.f32 %f2179, %f1814, %f2174, %f2178; + fma.rn.f32 %f2180, %f1814, %f2176, %f2179; + fma.rn.f32 %f2182, %f1979, %f2174, %f2180; + add.rn.f32 %f2183, %f2177, %f2182; + neg.f32 %f2184, %f2183; + add.rn.f32 %f2185, %f2177, %f2184; + add.rn.f32 %f2186, %f2185, %f2182; + mov.b32 %r1187, %f2183; + setp.eq.s32 %p1115, %r1187, 1118925336; + add.s32 %r1188, %r1187, -1; + mov.b32 %f2187, %r1188; + add.f32 %f2188, %f2186, 0f37000000; + selp.f32 %f412, %f2188, %f2186, %p1115; + selp.f32 %f2189, %f2187, %f2183, %p1115; + mul.rn.f32 %f2190, %f2189, %f1988; + cvt.rzi.f32.f32 %f2191, %f2190; + abs.f32 %f2192, %f2191; + setp.gt.f32 %p1116, %f2192, 0f42FC0000; + mov.b32 %r1189, %f2191; + and.b32 %r1190, %r1189, -2147483648; + or.b32 %r1191, %r1190, 1123811328; + mov.b32 %f2193, %r1191; + selp.f32 %f2194, %f2193, %f2191, %p1116; + fma.rn.f32 %f2196, %f2194, %f1994, %f2189; + fma.rn.f32 %f2198, %f2194, %f1996, %f2196; + mul.f32 %f2199, %f2198, 0f3FB8AA3B; + add.f32 %f2200, %f2194, 0f4B40007F; + mov.b32 %r1192, %f2200; + shl.b32 %r1193, %r1192, 23; + mov.b32 %f2201, %r1193; + ex2.approx.ftz.f32 %f2202, %f2199; + mul.f32 %f413, %f2202, %f2201; + setp.eq.f32 %p1117, %f413, 0f7F800000; + mov.f32 %f3321, 0f7F800000; + @%p1117 bra $L__BB2_732; + + fma.rn.f32 %f3321, %f413, %f412, %f413; + +$L__BB2_732: + setp.lt.f32 %p1118, %f410, 0f00000000; + and.pred %p73, %p1118, %p1089; + setp.eq.f32 %p1120, %f410, 0f00000000; + @%p1120 bra $L__BB2_736; + bra.uni $L__BB2_733; + +$L__BB2_736: + add.f32 %f2207, %f410, %f410; + selp.f32 %f3323, %f2207, 0f00000000, %p1089; + bra.uni $L__BB2_737; + +$L__BB2_733: + mov.b32 %r1194, %f3321; + xor.b32 %r1195, %r1194, -2147483648; + mov.b32 %f2203, %r1195; + selp.f32 %f3323, %f2203, %f3321, %p73; + setp.geu.f32 %p1121, %f410, 0f00000000; + @%p1121 bra $L__BB2_737; + + cvt.rzi.f32.f32 %f2205, %f1814; + setp.eq.f32 %p1122, %f2205, 0f40000000; + @%p1122 bra $L__BB2_737; + + mov.f32 %f3323, 0f7FFFFFFF; + +$L__BB2_737: + add.f32 %f2208, %f411, 0f40000000; + mov.b32 %r1196, %f2208; + setp.lt.s32 %p1124, %r1196, 2139095040; + @%p1124 bra $L__BB2_742; + + setp.gtu.f32 %p1125, %f411, 0f7F800000; + @%p1125 bra $L__BB2_741; + bra.uni $L__BB2_739; + +$L__BB2_741: + add.f32 %f3323, %f410, 0f40000000; + bra.uni $L__BB2_742; + +$L__BB2_739: + setp.neu.f32 %p1126, %f411, 0f7F800000; + @%p1126 bra $L__BB2_742; + + selp.f32 %f3323, 0fFF800000, 0f7F800000, %p73; + +$L__BB2_742: + mul.f32 %f2210, %f3323, 0fBF000000; + setp.eq.f32 %p1127, %f410, 0f3F800000; + selp.f32 %f2211, 0fBF000000, %f2210, %p1127; + fma.rn.f32 %f2214, %f2211, %f2012, %f1833; + cvt.sat.f32.f32 %f2217, %f2214; + fma.rm.f32 %f2219, %f2217, %f2015, %f2017; + add.f32 %f2220, %f2219, 0fCB40007F; + neg.f32 %f2221, %f2220; + fma.rn.f32 %f2222, %f2211, %f1988, %f2221; + fma.rn.f32 %f2224, %f2211, %f2022, %f2222; + mov.b32 %r1197, %f2219; + shl.b32 %r1198, %r1197, 23; + mov.b32 %f2225, %r1198; + ex2.approx.ftz.f32 %f2226, %f2224; + mul.f32 %f422, %f2226, %f2225; + div.rn.f32 %f423, %f377, %f359; + abs.f32 %f424, %f423; + setp.lt.f32 %p1128, %f424, 0f00800000; + mul.f32 %f2227, %f424, 0f4B800000; + selp.f32 %f2228, %f2227, %f424, %p1128; + selp.f32 %f2229, 0fC3170000, 0fC2FE0000, %p1128; + mov.b32 %r1199, %f2228; + and.b32 %r1200, %r1199, 8388607; + or.b32 %r1201, %r1200, 1065353216; + mov.b32 %f2230, %r1201; + shr.u32 %r1202, %r1199, 23; + cvt.rn.f32.u32 %f2231, %r1202; + add.f32 %f2232, %f2229, %f2231; + setp.gt.f32 %p1129, %f2230, 0f3FB504F3; + mul.f32 %f2233, %f2230, 0f3F000000; + add.f32 %f2234, %f2232, 0f3F800000; + selp.f32 %f2235, %f2234, %f2232, %p1129; + selp.f32 %f2236, %f2233, %f2230, %p1129; + add.f32 %f2237, %f2236, 0fBF800000; + add.f32 %f2238, %f2236, 0f3F800000; + rcp.approx.ftz.f32 %f2239, %f2238; + add.f32 %f2240, %f2237, %f2237; + mul.f32 %f2242, %f2240, %f2239; + mul.f32 %f2243, %f2242, %f2242; + fma.rn.f32 %f2246, %f1945, %f2243, %f1944; + fma.rn.f32 %f2248, %f2246, %f2243, %f1947; + mul.rn.f32 %f2249, %f2248, %f2243; + mul.rn.f32 %f2250, %f2249, %f2242; + sub.f32 %f2251, %f2237, %f2242; + add.f32 %f2252, %f2251, %f2251; + neg.f32 %f2253, %f2242; + fma.rn.f32 %f2254, %f2253, %f2237, %f2252; + mul.rn.f32 %f2255, %f2239, %f2254; + add.f32 %f2256, %f2250, %f2242; + sub.f32 %f2257, %f2242, %f2256; + add.f32 %f2258, %f2250, %f2257; + add.f32 %f2259, %f2255, %f2258; + add.f32 %f2260, %f2256, %f2259; + sub.f32 %f2261, %f2256, %f2260; + add.f32 %f2262, %f2259, %f2261; + mul.rn.f32 %f2264, %f2235, %f1963; + mul.rn.f32 %f2266, %f2235, %f1965; + add.f32 %f2267, %f2264, %f2260; + sub.f32 %f2268, %f2264, %f2267; + add.f32 %f2269, %f2260, %f2268; + add.f32 %f2270, %f2262, %f2269; + add.f32 %f2271, %f2266, %f2270; + add.f32 %f2272, %f2267, %f2271; + sub.f32 %f2273, %f2267, %f2272; + add.f32 %f2274, %f2271, %f2273; + mul.rn.f32 %f2275, %f1814, %f2272; + neg.f32 %f2276, %f2275; + fma.rn.f32 %f2277, %f1814, %f2272, %f2276; + fma.rn.f32 %f2278, %f1814, %f2274, %f2277; + fma.rn.f32 %f2280, %f1979, %f2272, %f2278; + add.rn.f32 %f2281, %f2275, %f2280; + neg.f32 %f2282, %f2281; + add.rn.f32 %f2283, %f2275, %f2282; + add.rn.f32 %f2284, %f2283, %f2280; + mov.b32 %r1203, %f2281; + setp.eq.s32 %p1130, %r1203, 1118925336; + add.s32 %r1204, %r1203, -1; + mov.b32 %f2285, %r1204; + add.f32 %f2286, %f2284, 0f37000000; + selp.f32 %f425, %f2286, %f2284, %p1130; + selp.f32 %f2287, %f2285, %f2281, %p1130; + mul.rn.f32 %f2288, %f2287, %f1988; + cvt.rzi.f32.f32 %f2289, %f2288; + abs.f32 %f2290, %f2289; + setp.gt.f32 %p1131, %f2290, 0f42FC0000; + mov.b32 %r1205, %f2289; + and.b32 %r1206, %r1205, -2147483648; + or.b32 %r1207, %r1206, 1123811328; + mov.b32 %f2291, %r1207; + selp.f32 %f2292, %f2291, %f2289, %p1131; + fma.rn.f32 %f2294, %f2292, %f1994, %f2287; + fma.rn.f32 %f2296, %f2292, %f1996, %f2294; + mul.f32 %f2297, %f2296, 0f3FB8AA3B; + add.f32 %f2298, %f2292, 0f4B40007F; + mov.b32 %r1208, %f2298; + shl.b32 %r1209, %r1208, 23; + mov.b32 %f2299, %r1209; + ex2.approx.ftz.f32 %f2300, %f2297; + mul.f32 %f426, %f2300, %f2299; + setp.eq.f32 %p1132, %f426, 0f7F800000; + mov.f32 %f3324, 0f7F800000; + @%p1132 bra $L__BB2_744; + + fma.rn.f32 %f3324, %f426, %f425, %f426; + +$L__BB2_744: + setp.lt.f32 %p1133, %f423, 0f00000000; + and.pred %p74, %p1133, %p1089; + setp.eq.f32 %p1135, %f423, 0f00000000; + @%p1135 bra $L__BB2_748; + bra.uni $L__BB2_745; + +$L__BB2_748: + add.f32 %f2305, %f423, %f423; + selp.f32 %f3326, %f2305, 0f00000000, %p1089; + bra.uni $L__BB2_749; + +$L__BB2_745: + mov.b32 %r1210, %f3324; + xor.b32 %r1211, %r1210, -2147483648; + mov.b32 %f2301, %r1211; + selp.f32 %f3326, %f2301, %f3324, %p74; + setp.geu.f32 %p1136, %f423, 0f00000000; + @%p1136 bra $L__BB2_749; + + cvt.rzi.f32.f32 %f2303, %f1814; + setp.eq.f32 %p1137, %f2303, 0f40000000; + @%p1137 bra $L__BB2_749; + + mov.f32 %f3326, 0f7FFFFFFF; + +$L__BB2_749: + add.f32 %f2306, %f424, 0f40000000; + mov.b32 %r1212, %f2306; + setp.lt.s32 %p1139, %r1212, 2139095040; + @%p1139 bra $L__BB2_754; + + setp.gtu.f32 %p1140, %f424, 0f7F800000; + @%p1140 bra $L__BB2_753; + bra.uni $L__BB2_751; + +$L__BB2_753: + add.f32 %f3326, %f423, 0f40000000; + bra.uni $L__BB2_754; + +$L__BB2_751: + setp.neu.f32 %p1141, %f424, 0f7F800000; + @%p1141 bra $L__BB2_754; + + selp.f32 %f3326, 0fFF800000, 0f7F800000, %p74; + +$L__BB2_754: + mul.f32 %f2308, %f3326, 0fBF000000; + setp.eq.f32 %p1142, %f423, 0f3F800000; + selp.f32 %f2309, 0fBF000000, %f2308, %p1142; + fma.rn.f32 %f2312, %f2309, %f2012, %f1833; + cvt.sat.f32.f32 %f2315, %f2312; + fma.rm.f32 %f2317, %f2315, %f2015, %f2017; + add.f32 %f2318, %f2317, 0fCB40007F; + neg.f32 %f2319, %f2318; + fma.rn.f32 %f2320, %f2309, %f1988, %f2319; + fma.rn.f32 %f2322, %f2309, %f2022, %f2320; + mov.b32 %r1213, %f2317; + shl.b32 %r1214, %r1213, 23; + mov.b32 %f2323, %r1214; + ex2.approx.ftz.f32 %f2324, %f2322; + mul.f32 %f2325, %f2324, %f2323; + sub.f32 %f2326, %f422, %f2325; + div.rn.f32 %f435, %f314, %f359; + mul.f32 %f2327, %f435, %f2326; + mul.f32 %f436, %f369, %f2327; + div.rn.f32 %f437, %f338, %f357; + abs.f32 %f438, %f437; + setp.lt.f32 %p1143, %f438, 0f00800000; + mul.f32 %f2328, %f438, 0f4B800000; + selp.f32 %f2329, %f2328, %f438, %p1143; + selp.f32 %f2330, 0fC3170000, 0fC2FE0000, %p1143; + mov.b32 %r1215, %f2329; + and.b32 %r1216, %r1215, 8388607; + or.b32 %r1217, %r1216, 1065353216; + mov.b32 %f2331, %r1217; + shr.u32 %r1218, %r1215, 23; + cvt.rn.f32.u32 %f2332, %r1218; + add.f32 %f2333, %f2330, %f2332; + setp.gt.f32 %p1144, %f2331, 0f3FB504F3; + mul.f32 %f2334, %f2331, 0f3F000000; + add.f32 %f2335, %f2333, 0f3F800000; + selp.f32 %f2336, %f2335, %f2333, %p1144; + selp.f32 %f2337, %f2334, %f2331, %p1144; + add.f32 %f2338, %f2337, 0fBF800000; + add.f32 %f2339, %f2337, 0f3F800000; + rcp.approx.ftz.f32 %f2340, %f2339; + add.f32 %f2341, %f2338, %f2338; + mul.f32 %f2343, %f2341, %f2340; + mul.f32 %f2344, %f2343, %f2343; + fma.rn.f32 %f2347, %f1945, %f2344, %f1944; + fma.rn.f32 %f2349, %f2347, %f2344, %f1947; + mul.rn.f32 %f2350, %f2349, %f2344; + mul.rn.f32 %f2351, %f2350, %f2343; + sub.f32 %f2352, %f2338, %f2343; + add.f32 %f2353, %f2352, %f2352; + neg.f32 %f2354, %f2343; + fma.rn.f32 %f2355, %f2354, %f2338, %f2353; + mul.rn.f32 %f2356, %f2340, %f2355; + add.f32 %f2357, %f2351, %f2343; + sub.f32 %f2358, %f2343, %f2357; + add.f32 %f2359, %f2351, %f2358; + add.f32 %f2360, %f2356, %f2359; + add.f32 %f2361, %f2357, %f2360; + sub.f32 %f2362, %f2357, %f2361; + add.f32 %f2363, %f2360, %f2362; + mul.rn.f32 %f2365, %f2336, %f1963; + mul.rn.f32 %f2367, %f2336, %f1965; + add.f32 %f2368, %f2365, %f2361; + sub.f32 %f2369, %f2365, %f2368; + add.f32 %f2370, %f2361, %f2369; + add.f32 %f2371, %f2363, %f2370; + add.f32 %f2372, %f2367, %f2371; + add.f32 %f2373, %f2368, %f2372; + sub.f32 %f2374, %f2368, %f2373; + add.f32 %f2375, %f2372, %f2374; + mul.rn.f32 %f2376, %f1814, %f2373; + neg.f32 %f2377, %f2376; + fma.rn.f32 %f2378, %f1814, %f2373, %f2377; + fma.rn.f32 %f2379, %f1814, %f2375, %f2378; + fma.rn.f32 %f2381, %f1979, %f2373, %f2379; + add.rn.f32 %f2382, %f2376, %f2381; + neg.f32 %f2383, %f2382; + add.rn.f32 %f2384, %f2376, %f2383; + add.rn.f32 %f2385, %f2384, %f2381; + mov.b32 %r1219, %f2382; + setp.eq.s32 %p1145, %r1219, 1118925336; + add.s32 %r1220, %r1219, -1; + mov.b32 %f2386, %r1220; + add.f32 %f2387, %f2385, 0f37000000; + selp.f32 %f439, %f2387, %f2385, %p1145; + selp.f32 %f2388, %f2386, %f2382, %p1145; + mul.rn.f32 %f2389, %f2388, %f1988; + cvt.rzi.f32.f32 %f2390, %f2389; + abs.f32 %f2391, %f2390; + setp.gt.f32 %p1146, %f2391, 0f42FC0000; + mov.b32 %r1221, %f2390; + and.b32 %r1222, %r1221, -2147483648; + or.b32 %r1223, %r1222, 1123811328; + mov.b32 %f2392, %r1223; + selp.f32 %f2393, %f2392, %f2390, %p1146; + fma.rn.f32 %f2395, %f2393, %f1994, %f2388; + fma.rn.f32 %f2397, %f2393, %f1996, %f2395; + mul.f32 %f2398, %f2397, 0f3FB8AA3B; + add.f32 %f2399, %f2393, 0f4B40007F; + mov.b32 %r1224, %f2399; + shl.b32 %r1225, %r1224, 23; + mov.b32 %f2400, %r1225; + ex2.approx.ftz.f32 %f2401, %f2398; + mul.f32 %f440, %f2401, %f2400; + setp.eq.f32 %p1147, %f440, 0f7F800000; + mov.f32 %f3327, 0f7F800000; + @%p1147 bra $L__BB2_756; + + fma.rn.f32 %f3327, %f440, %f439, %f440; + +$L__BB2_756: + setp.lt.f32 %p1148, %f437, 0f00000000; + and.pred %p75, %p1148, %p1089; + setp.eq.f32 %p1150, %f437, 0f00000000; + @%p1150 bra $L__BB2_760; + bra.uni $L__BB2_757; + +$L__BB2_760: + add.f32 %f2406, %f437, %f437; + selp.f32 %f3329, %f2406, 0f00000000, %p1089; + bra.uni $L__BB2_761; + +$L__BB2_757: + mov.b32 %r1226, %f3327; + xor.b32 %r1227, %r1226, -2147483648; + mov.b32 %f2402, %r1227; + selp.f32 %f3329, %f2402, %f3327, %p75; + setp.geu.f32 %p1151, %f437, 0f00000000; + @%p1151 bra $L__BB2_761; + + cvt.rzi.f32.f32 %f2404, %f1814; + setp.eq.f32 %p1152, %f2404, 0f40000000; + @%p1152 bra $L__BB2_761; + + mov.f32 %f3329, 0f7FFFFFFF; + +$L__BB2_761: + add.f32 %f2407, %f438, 0f40000000; + mov.b32 %r1228, %f2407; + setp.lt.s32 %p1154, %r1228, 2139095040; + @%p1154 bra $L__BB2_766; + + setp.gtu.f32 %p1155, %f438, 0f7F800000; + @%p1155 bra $L__BB2_765; + bra.uni $L__BB2_763; + +$L__BB2_765: + add.f32 %f3329, %f437, 0f40000000; + bra.uni $L__BB2_766; + +$L__BB2_763: + setp.neu.f32 %p1156, %f438, 0f7F800000; + @%p1156 bra $L__BB2_766; + + selp.f32 %f3329, 0fFF800000, 0f7F800000, %p75; + +$L__BB2_766: + mul.f32 %f2409, %f3329, 0fBF000000; + setp.eq.f32 %p1157, %f437, 0f3F800000; + selp.f32 %f2410, 0fBF000000, %f2409, %p1157; + fma.rn.f32 %f2413, %f2410, %f2012, %f1833; + cvt.sat.f32.f32 %f2416, %f2413; + fma.rm.f32 %f2418, %f2416, %f2015, %f2017; + add.f32 %f2419, %f2418, 0fCB40007F; + neg.f32 %f2420, %f2419; + fma.rn.f32 %f2421, %f2410, %f1988, %f2420; + fma.rn.f32 %f2423, %f2410, %f2022, %f2421; + mov.b32 %r1229, %f2418; + shl.b32 %r1230, %r1229, 23; + mov.b32 %f2424, %r1230; + ex2.approx.ftz.f32 %f2425, %f2423; + mul.f32 %f449, %f2425, %f2424; + div.rn.f32 %f450, %f334, %f357; + abs.f32 %f451, %f450; + setp.lt.f32 %p1158, %f451, 0f00800000; + mul.f32 %f2426, %f451, 0f4B800000; + selp.f32 %f2427, %f2426, %f451, %p1158; + selp.f32 %f2428, 0fC3170000, 0fC2FE0000, %p1158; + mov.b32 %r1231, %f2427; + and.b32 %r1232, %r1231, 8388607; + or.b32 %r1233, %r1232, 1065353216; + mov.b32 %f2429, %r1233; + shr.u32 %r1234, %r1231, 23; + cvt.rn.f32.u32 %f2430, %r1234; + add.f32 %f2431, %f2428, %f2430; + setp.gt.f32 %p1159, %f2429, 0f3FB504F3; + mul.f32 %f2432, %f2429, 0f3F000000; + add.f32 %f2433, %f2431, 0f3F800000; + selp.f32 %f2434, %f2433, %f2431, %p1159; + selp.f32 %f2435, %f2432, %f2429, %p1159; + add.f32 %f2436, %f2435, 0fBF800000; + add.f32 %f2437, %f2435, 0f3F800000; + rcp.approx.ftz.f32 %f2438, %f2437; + add.f32 %f2439, %f2436, %f2436; + mul.f32 %f2441, %f2439, %f2438; + mul.f32 %f2442, %f2441, %f2441; + fma.rn.f32 %f2445, %f1945, %f2442, %f1944; + fma.rn.f32 %f2447, %f2445, %f2442, %f1947; + mul.rn.f32 %f2448, %f2447, %f2442; + mul.rn.f32 %f2449, %f2448, %f2441; + sub.f32 %f2450, %f2436, %f2441; + add.f32 %f2451, %f2450, %f2450; + neg.f32 %f2452, %f2441; + fma.rn.f32 %f2453, %f2452, %f2436, %f2451; + mul.rn.f32 %f2454, %f2438, %f2453; + add.f32 %f2455, %f2449, %f2441; + sub.f32 %f2456, %f2441, %f2455; + add.f32 %f2457, %f2449, %f2456; + add.f32 %f2458, %f2454, %f2457; + add.f32 %f2459, %f2455, %f2458; + sub.f32 %f2460, %f2455, %f2459; + add.f32 %f2461, %f2458, %f2460; + mul.rn.f32 %f2463, %f2434, %f1963; + mul.rn.f32 %f2465, %f2434, %f1965; + add.f32 %f2466, %f2463, %f2459; + sub.f32 %f2467, %f2463, %f2466; + add.f32 %f2468, %f2459, %f2467; + add.f32 %f2469, %f2461, %f2468; + add.f32 %f2470, %f2465, %f2469; + add.f32 %f2471, %f2466, %f2470; + sub.f32 %f2472, %f2466, %f2471; + add.f32 %f2473, %f2470, %f2472; + mul.rn.f32 %f2474, %f1814, %f2471; + neg.f32 %f2475, %f2474; + fma.rn.f32 %f2476, %f1814, %f2471, %f2475; + fma.rn.f32 %f2477, %f1814, %f2473, %f2476; + fma.rn.f32 %f2479, %f1979, %f2471, %f2477; + add.rn.f32 %f2480, %f2474, %f2479; + neg.f32 %f2481, %f2480; + add.rn.f32 %f2482, %f2474, %f2481; + add.rn.f32 %f2483, %f2482, %f2479; + mov.b32 %r1235, %f2480; + setp.eq.s32 %p1160, %r1235, 1118925336; + add.s32 %r1236, %r1235, -1; + mov.b32 %f2484, %r1236; + add.f32 %f2485, %f2483, 0f37000000; + selp.f32 %f452, %f2485, %f2483, %p1160; + selp.f32 %f2486, %f2484, %f2480, %p1160; + mul.rn.f32 %f2487, %f2486, %f1988; + cvt.rzi.f32.f32 %f2488, %f2487; + abs.f32 %f2489, %f2488; + setp.gt.f32 %p1161, %f2489, 0f42FC0000; + mov.b32 %r1237, %f2488; + and.b32 %r1238, %r1237, -2147483648; + or.b32 %r1239, %r1238, 1123811328; + mov.b32 %f2490, %r1239; + selp.f32 %f2491, %f2490, %f2488, %p1161; + fma.rn.f32 %f2493, %f2491, %f1994, %f2486; + fma.rn.f32 %f2495, %f2491, %f1996, %f2493; + mul.f32 %f2496, %f2495, 0f3FB8AA3B; + add.f32 %f2497, %f2491, 0f4B40007F; + mov.b32 %r1240, %f2497; + shl.b32 %r1241, %r1240, 23; + mov.b32 %f2498, %r1241; + ex2.approx.ftz.f32 %f2499, %f2496; + mul.f32 %f453, %f2499, %f2498; + setp.eq.f32 %p1162, %f453, 0f7F800000; + mov.f32 %f3330, 0f7F800000; + @%p1162 bra $L__BB2_768; + + fma.rn.f32 %f3330, %f453, %f452, %f453; + +$L__BB2_768: + setp.lt.f32 %p1163, %f450, 0f00000000; + and.pred %p76, %p1163, %p1089; + setp.eq.f32 %p1165, %f450, 0f00000000; + @%p1165 bra $L__BB2_772; + bra.uni $L__BB2_769; + +$L__BB2_772: + add.f32 %f2504, %f450, %f450; + selp.f32 %f3332, %f2504, 0f00000000, %p1089; + bra.uni $L__BB2_773; + +$L__BB2_769: + mov.b32 %r1242, %f3330; + xor.b32 %r1243, %r1242, -2147483648; + mov.b32 %f2500, %r1243; + selp.f32 %f3332, %f2500, %f3330, %p76; + setp.geu.f32 %p1166, %f450, 0f00000000; + @%p1166 bra $L__BB2_773; + + cvt.rzi.f32.f32 %f2502, %f1814; + setp.eq.f32 %p1167, %f2502, 0f40000000; + @%p1167 bra $L__BB2_773; + + mov.f32 %f3332, 0f7FFFFFFF; + +$L__BB2_773: + add.f32 %f2505, %f451, 0f40000000; + mov.b32 %r1244, %f2505; + setp.lt.s32 %p1169, %r1244, 2139095040; + @%p1169 bra $L__BB2_778; + + setp.gtu.f32 %p1170, %f451, 0f7F800000; + @%p1170 bra $L__BB2_777; + bra.uni $L__BB2_775; + +$L__BB2_777: + add.f32 %f3332, %f450, 0f40000000; + bra.uni $L__BB2_778; + +$L__BB2_775: + setp.neu.f32 %p1171, %f451, 0f7F800000; + @%p1171 bra $L__BB2_778; + + selp.f32 %f3332, 0fFF800000, 0f7F800000, %p76; + +$L__BB2_778: + mul.f32 %f2507, %f3332, 0fBF000000; + setp.eq.f32 %p1172, %f450, 0f3F800000; + selp.f32 %f2508, 0fBF000000, %f2507, %p1172; + fma.rn.f32 %f2511, %f2508, %f2012, %f1833; + cvt.sat.f32.f32 %f2514, %f2511; + fma.rm.f32 %f2516, %f2514, %f2015, %f2017; + add.f32 %f2517, %f2516, 0fCB40007F; + neg.f32 %f2518, %f2517; + fma.rn.f32 %f2519, %f2508, %f1988, %f2518; + fma.rn.f32 %f2521, %f2508, %f2022, %f2519; + mov.b32 %r1245, %f2516; + shl.b32 %r1246, %r1245, 23; + mov.b32 %f2522, %r1246; + ex2.approx.ftz.f32 %f2523, %f2521; + mul.f32 %f2524, %f2523, %f2522; + mul.f32 %f2525, %f334, %f2524; + mul.f32 %f2526, %f339, %f449; + sub.f32 %f2527, %f2526, %f2525; + div.rn.f32 %f2528, %f408, %f357; + mul.f32 %f2529, %f2528, %f2527; + mul.f32 %f462, %f382, %f2529; + add.f32 %f2530, %f370, 0f3F800000; + sub.f32 %f2531, %f2530, %f3277; + div.rn.f32 %f463, %f2531, %f359; + abs.f32 %f464, %f463; + setp.lt.f32 %p1173, %f464, 0f00800000; + mul.f32 %f2532, %f464, 0f4B800000; + selp.f32 %f2533, %f2532, %f464, %p1173; + selp.f32 %f2534, 0fC3170000, 0fC2FE0000, %p1173; + mov.b32 %r1247, %f2533; + and.b32 %r1248, %r1247, 8388607; + or.b32 %r1249, %r1248, 1065353216; + mov.b32 %f2535, %r1249; + shr.u32 %r1250, %r1247, 23; + cvt.rn.f32.u32 %f2536, %r1250; + add.f32 %f2537, %f2534, %f2536; + setp.gt.f32 %p1174, %f2535, 0f3FB504F3; + mul.f32 %f2538, %f2535, 0f3F000000; + add.f32 %f2539, %f2537, 0f3F800000; + selp.f32 %f2540, %f2539, %f2537, %p1174; + selp.f32 %f2541, %f2538, %f2535, %p1174; + add.f32 %f2542, %f2541, 0fBF800000; + add.f32 %f2543, %f2541, 0f3F800000; + rcp.approx.ftz.f32 %f2544, %f2543; + add.f32 %f2545, %f2542, %f2542; + mul.f32 %f2547, %f2545, %f2544; + mul.f32 %f2548, %f2547, %f2547; + fma.rn.f32 %f2551, %f1945, %f2548, %f1944; + fma.rn.f32 %f2553, %f2551, %f2548, %f1947; + mul.rn.f32 %f2554, %f2553, %f2548; + mul.rn.f32 %f2555, %f2554, %f2547; + sub.f32 %f2556, %f2542, %f2547; + add.f32 %f2557, %f2556, %f2556; + neg.f32 %f2558, %f2547; + fma.rn.f32 %f2559, %f2558, %f2542, %f2557; + mul.rn.f32 %f2560, %f2544, %f2559; + add.f32 %f2561, %f2555, %f2547; + sub.f32 %f2562, %f2547, %f2561; + add.f32 %f2563, %f2555, %f2562; + add.f32 %f2564, %f2560, %f2563; + add.f32 %f2565, %f2561, %f2564; + sub.f32 %f2566, %f2561, %f2565; + add.f32 %f2567, %f2564, %f2566; + mul.rn.f32 %f2569, %f2540, %f1963; + mul.rn.f32 %f2571, %f2540, %f1965; + add.f32 %f2572, %f2569, %f2565; + sub.f32 %f2573, %f2569, %f2572; + add.f32 %f2574, %f2565, %f2573; + add.f32 %f2575, %f2567, %f2574; + add.f32 %f2576, %f2571, %f2575; + add.f32 %f2577, %f2572, %f2576; + sub.f32 %f2578, %f2572, %f2577; + add.f32 %f2579, %f2576, %f2578; + mul.rn.f32 %f2580, %f1814, %f2577; + neg.f32 %f2581, %f2580; + fma.rn.f32 %f2582, %f1814, %f2577, %f2581; + fma.rn.f32 %f2583, %f1814, %f2579, %f2582; + fma.rn.f32 %f2585, %f1979, %f2577, %f2583; + add.rn.f32 %f2586, %f2580, %f2585; + neg.f32 %f2587, %f2586; + add.rn.f32 %f2588, %f2580, %f2587; + add.rn.f32 %f2589, %f2588, %f2585; + mov.b32 %r1251, %f2586; + setp.eq.s32 %p1175, %r1251, 1118925336; + add.s32 %r1252, %r1251, -1; + mov.b32 %f2590, %r1252; + add.f32 %f2591, %f2589, 0f37000000; + selp.f32 %f465, %f2591, %f2589, %p1175; + selp.f32 %f2592, %f2590, %f2586, %p1175; + mul.rn.f32 %f2593, %f2592, %f1988; + cvt.rzi.f32.f32 %f2594, %f2593; + abs.f32 %f2595, %f2594; + setp.gt.f32 %p1176, %f2595, 0f42FC0000; + mov.b32 %r1253, %f2594; + and.b32 %r1254, %r1253, -2147483648; + or.b32 %r1255, %r1254, 1123811328; + mov.b32 %f2596, %r1255; + selp.f32 %f2597, %f2596, %f2594, %p1176; + fma.rn.f32 %f2599, %f2597, %f1994, %f2592; + fma.rn.f32 %f2601, %f2597, %f1996, %f2599; + mul.f32 %f2602, %f2601, 0f3FB8AA3B; + add.f32 %f2603, %f2597, 0f4B40007F; + mov.b32 %r1256, %f2603; + shl.b32 %r1257, %r1256, 23; + mov.b32 %f2604, %r1257; + ex2.approx.ftz.f32 %f2605, %f2602; + mul.f32 %f466, %f2605, %f2604; + setp.eq.f32 %p1177, %f466, 0f7F800000; + mov.f32 %f3333, 0f7F800000; + @%p1177 bra $L__BB2_780; + + fma.rn.f32 %f3333, %f466, %f465, %f466; + +$L__BB2_780: + setp.lt.f32 %p1178, %f463, 0f00000000; + and.pred %p77, %p1178, %p1089; + setp.eq.f32 %p1180, %f463, 0f00000000; + @%p1180 bra $L__BB2_784; + bra.uni $L__BB2_781; + +$L__BB2_784: + add.f32 %f2610, %f463, %f463; + selp.f32 %f3335, %f2610, 0f00000000, %p1089; + bra.uni $L__BB2_785; + +$L__BB2_781: + mov.b32 %r1258, %f3333; + xor.b32 %r1259, %r1258, -2147483648; + mov.b32 %f2606, %r1259; + selp.f32 %f3335, %f2606, %f3333, %p77; + setp.geu.f32 %p1181, %f463, 0f00000000; + @%p1181 bra $L__BB2_785; + + cvt.rzi.f32.f32 %f2608, %f1814; + setp.eq.f32 %p1182, %f2608, 0f40000000; + @%p1182 bra $L__BB2_785; + + mov.f32 %f3335, 0f7FFFFFFF; + +$L__BB2_785: + add.f32 %f2611, %f464, 0f40000000; + mov.b32 %r1260, %f2611; + setp.lt.s32 %p1184, %r1260, 2139095040; + @%p1184 bra $L__BB2_790; + + setp.gtu.f32 %p1185, %f464, 0f7F800000; + @%p1185 bra $L__BB2_789; + bra.uni $L__BB2_787; + +$L__BB2_789: + add.f32 %f3335, %f463, 0f40000000; + bra.uni $L__BB2_790; + +$L__BB2_787: + setp.neu.f32 %p1186, %f464, 0f7F800000; + @%p1186 bra $L__BB2_790; + + selp.f32 %f3335, 0fFF800000, 0f7F800000, %p77; + +$L__BB2_790: + mul.f32 %f2613, %f3335, 0fBF000000; + setp.eq.f32 %p1187, %f463, 0f3F800000; + selp.f32 %f2614, 0fBF000000, %f2613, %p1187; + fma.rn.f32 %f2617, %f2614, %f2012, %f1833; + cvt.sat.f32.f32 %f2620, %f2617; + fma.rm.f32 %f2622, %f2620, %f2015, %f2017; + add.f32 %f2623, %f2622, 0fCB40007F; + neg.f32 %f2624, %f2623; + fma.rn.f32 %f2625, %f2614, %f1988, %f2624; + fma.rn.f32 %f2627, %f2614, %f2022, %f2625; + mov.b32 %r1261, %f2622; + shl.b32 %r1262, %r1261, 23; + mov.b32 %f2628, %r1262; + ex2.approx.ftz.f32 %f2629, %f2627; + mul.f32 %f475, %f2629, %f2628; + div.rn.f32 %f476, %f371, %f359; + abs.f32 %f477, %f476; + setp.lt.f32 %p1188, %f477, 0f00800000; + mul.f32 %f2630, %f477, 0f4B800000; + selp.f32 %f2631, %f2630, %f477, %p1188; + selp.f32 %f2632, 0fC3170000, 0fC2FE0000, %p1188; + mov.b32 %r1263, %f2631; + and.b32 %r1264, %r1263, 8388607; + or.b32 %r1265, %r1264, 1065353216; + mov.b32 %f2633, %r1265; + shr.u32 %r1266, %r1263, 23; + cvt.rn.f32.u32 %f2634, %r1266; + add.f32 %f2635, %f2632, %f2634; + setp.gt.f32 %p1189, %f2633, 0f3FB504F3; + mul.f32 %f2636, %f2633, 0f3F000000; + add.f32 %f2637, %f2635, 0f3F800000; + selp.f32 %f2638, %f2637, %f2635, %p1189; + selp.f32 %f2639, %f2636, %f2633, %p1189; + add.f32 %f2640, %f2639, 0fBF800000; + add.f32 %f2641, %f2639, 0f3F800000; + rcp.approx.ftz.f32 %f2642, %f2641; + add.f32 %f2643, %f2640, %f2640; + mul.f32 %f2645, %f2643, %f2642; + mul.f32 %f2646, %f2645, %f2645; + fma.rn.f32 %f2649, %f1945, %f2646, %f1944; + fma.rn.f32 %f2651, %f2649, %f2646, %f1947; + mul.rn.f32 %f2652, %f2651, %f2646; + mul.rn.f32 %f2653, %f2652, %f2645; + sub.f32 %f2654, %f2640, %f2645; + add.f32 %f2655, %f2654, %f2654; + neg.f32 %f2656, %f2645; + fma.rn.f32 %f2657, %f2656, %f2640, %f2655; + mul.rn.f32 %f2658, %f2642, %f2657; + add.f32 %f2659, %f2653, %f2645; + sub.f32 %f2660, %f2645, %f2659; + add.f32 %f2661, %f2653, %f2660; + add.f32 %f2662, %f2658, %f2661; + add.f32 %f2663, %f2659, %f2662; + sub.f32 %f2664, %f2659, %f2663; + add.f32 %f2665, %f2662, %f2664; + mul.rn.f32 %f2667, %f2638, %f1963; + mul.rn.f32 %f2669, %f2638, %f1965; + add.f32 %f2670, %f2667, %f2663; + sub.f32 %f2671, %f2667, %f2670; + add.f32 %f2672, %f2663, %f2671; + add.f32 %f2673, %f2665, %f2672; + add.f32 %f2674, %f2669, %f2673; + add.f32 %f2675, %f2670, %f2674; + sub.f32 %f2676, %f2670, %f2675; + add.f32 %f2677, %f2674, %f2676; + mul.rn.f32 %f2678, %f1814, %f2675; + neg.f32 %f2679, %f2678; + fma.rn.f32 %f2680, %f1814, %f2675, %f2679; + fma.rn.f32 %f2681, %f1814, %f2677, %f2680; + fma.rn.f32 %f2683, %f1979, %f2675, %f2681; + add.rn.f32 %f2684, %f2678, %f2683; + neg.f32 %f2685, %f2684; + add.rn.f32 %f2686, %f2678, %f2685; + add.rn.f32 %f2687, %f2686, %f2683; + mov.b32 %r1267, %f2684; + setp.eq.s32 %p1190, %r1267, 1118925336; + add.s32 %r1268, %r1267, -1; + mov.b32 %f2688, %r1268; + add.f32 %f2689, %f2687, 0f37000000; + selp.f32 %f478, %f2689, %f2687, %p1190; + selp.f32 %f2690, %f2688, %f2684, %p1190; + mul.rn.f32 %f2691, %f2690, %f1988; + cvt.rzi.f32.f32 %f2692, %f2691; + abs.f32 %f2693, %f2692; + setp.gt.f32 %p1191, %f2693, 0f42FC0000; + mov.b32 %r1269, %f2692; + and.b32 %r1270, %r1269, -2147483648; + or.b32 %r1271, %r1270, 1123811328; + mov.b32 %f2694, %r1271; + selp.f32 %f2695, %f2694, %f2692, %p1191; + fma.rn.f32 %f2697, %f2695, %f1994, %f2690; + fma.rn.f32 %f2699, %f2695, %f1996, %f2697; + mul.f32 %f2700, %f2699, 0f3FB8AA3B; + add.f32 %f2701, %f2695, 0f4B40007F; + mov.b32 %r1272, %f2701; + shl.b32 %r1273, %r1272, 23; + mov.b32 %f2702, %r1273; + ex2.approx.ftz.f32 %f2703, %f2700; + mul.f32 %f479, %f2703, %f2702; + setp.eq.f32 %p1192, %f479, 0f7F800000; + mov.f32 %f3336, 0f7F800000; + @%p1192 bra $L__BB2_792; + + fma.rn.f32 %f3336, %f479, %f478, %f479; + +$L__BB2_792: + setp.lt.f32 %p1193, %f476, 0f00000000; + and.pred %p78, %p1193, %p1089; + setp.eq.f32 %p1195, %f476, 0f00000000; + @%p1195 bra $L__BB2_796; + bra.uni $L__BB2_793; + +$L__BB2_796: + add.f32 %f2708, %f476, %f476; + selp.f32 %f3338, %f2708, 0f00000000, %p1089; + bra.uni $L__BB2_797; + +$L__BB2_793: + mov.b32 %r1274, %f3336; + xor.b32 %r1275, %r1274, -2147483648; + mov.b32 %f2704, %r1275; + selp.f32 %f3338, %f2704, %f3336, %p78; + setp.geu.f32 %p1196, %f476, 0f00000000; + @%p1196 bra $L__BB2_797; + + cvt.rzi.f32.f32 %f2706, %f1814; + setp.eq.f32 %p1197, %f2706, 0f40000000; + @%p1197 bra $L__BB2_797; + + mov.f32 %f3338, 0f7FFFFFFF; + +$L__BB2_797: + add.f32 %f2709, %f477, 0f40000000; + mov.b32 %r1276, %f2709; + setp.lt.s32 %p1199, %r1276, 2139095040; + @%p1199 bra $L__BB2_802; + + setp.gtu.f32 %p1200, %f477, 0f7F800000; + @%p1200 bra $L__BB2_801; + bra.uni $L__BB2_799; + +$L__BB2_801: + add.f32 %f3338, %f476, 0f40000000; + bra.uni $L__BB2_802; + +$L__BB2_799: + setp.neu.f32 %p1201, %f477, 0f7F800000; + @%p1201 bra $L__BB2_802; + + selp.f32 %f3338, 0fFF800000, 0f7F800000, %p78; + +$L__BB2_802: + mul.f32 %f2710, %f3338, 0fBF000000; + setp.eq.f32 %p1202, %f476, 0f3F800000; + selp.f32 %f2711, 0fBF000000, %f2710, %p1202; + fma.rn.f32 %f2714, %f2711, %f2012, %f1833; + cvt.sat.f32.f32 %f2717, %f2714; + fma.rm.f32 %f2719, %f2717, %f2015, %f2017; + add.f32 %f2720, %f2719, 0fCB40007F; + neg.f32 %f2721, %f2720; + fma.rn.f32 %f2722, %f2711, %f1988, %f2721; + fma.rn.f32 %f2724, %f2711, %f2022, %f2722; + mov.b32 %r1277, %f2719; + shl.b32 %r1278, %r1277, 23; + mov.b32 %f2725, %r1278; + ex2.approx.ftz.f32 %f2726, %f2724; + mul.f32 %f2727, %f2726, %f2725; + add.f32 %f2728, %f371, 0f3F800000; + mul.f32 %f2729, %f2728, %f475; + mul.f32 %f2730, %f371, %f2727; + sub.f32 %f2731, %f2729, %f2730; + div.rn.f32 %f2732, %f435, %f359; + mul.f32 %f488, %f2732, %f2731; + not.pred %p1203, %p55; + mov.f64 %fd1205, %fd447; + @%p1203 bra $L__BB2_804; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1279}, %fd447; + } + xor.b32 %r1280, %r1279, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1281, %temp}, %fd447; + } + mov.b64 %fd1205, {%r1281, %r1280}; + +$L__BB2_804: + setp.eq.f32 %p1204, %f310, 0f00000000; + @%p1204 bra $L__BB2_808; + bra.uni $L__BB2_805; + +$L__BB2_808: + mov.u32 %r1282, 0; + mov.b64 %fd1205, {%r1282, %r153}; + bra.uni $L__BB2_809; + +$L__BB2_805: + setp.gt.s32 %p1205, %r152, -1; + @%p1205 bra $L__BB2_809; + + cvt.rzi.f64.f64 %fd1004, %fd962; + setp.eq.f64 %p1206, %fd1004, 0d4000000000000000; + @%p1206 bra $L__BB2_809; + + mov.f64 %fd1205, 0dFFF8000000000000; + +$L__BB2_809: + selp.f64 %fd1206, %fd1205, %fd429, %p986; + @%p66 bra $L__BB2_814; + + setp.eq.s32 %p1208, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1283, %temp}, %fd962; + } + setp.eq.s32 %p1209, %r1283, 0; + and.pred %p1210, %p1208, %p1209; + @%p1210 bra $L__BB2_813; + bra.uni $L__BB2_811; + +$L__BB2_813: + mov.u32 %r1286, 0; + mov.b64 %fd1206, {%r1286, %r155}; + bra.uni $L__BB2_814; + +$L__BB2_811: + setp.ne.s32 %p1211, %r156, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1284, %temp}, %fd428; + } + setp.ne.s32 %p1212, %r1284, 0; + or.pred %p1213, %p1211, %p1212; + mov.f64 %fd1206, %fd1205; + @%p1213 bra $L__BB2_814; + + mov.u32 %r1285, 0; + mov.b64 %fd1206, {%r1285, %r159}; + +$L__BB2_814: + not.pred %p1214, %p56; + mov.f64 %fd1208, %fd448; + @%p1214 bra $L__BB2_816; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1287}, %fd448; + } + xor.b32 %r1288, %r1287, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1289, %temp}, %fd448; + } + mov.b64 %fd1208, {%r1289, %r1288}; + +$L__BB2_816: + @%p1204 bra $L__BB2_820; + bra.uni $L__BB2_817; + +$L__BB2_820: + mov.u32 %r1290, 0; + mov.b64 %fd1208, {%r1290, %r157}; + bra.uni $L__BB2_821; + +$L__BB2_817: + setp.gt.s32 %p1216, %r152, -1; + @%p1216 bra $L__BB2_821; + + cvt.rzi.f64.f64 %fd1008, %fd963; + setp.eq.f64 %p1217, %fd1008, 0d4008000000000000; + @%p1217 bra $L__BB2_821; + + mov.f64 %fd1208, 0dFFF8000000000000; + +$L__BB2_821: + selp.f64 %fd1209, %fd1208, %fd432, %p989; + @%p67 bra $L__BB2_826; + + setp.eq.s32 %p1219, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1291, %temp}, %fd963; + } + setp.eq.s32 %p1220, %r1291, 0; + and.pred %p1221, %p1219, %p1220; + @%p1221 bra $L__BB2_825; + bra.uni $L__BB2_823; + +$L__BB2_825: + mov.u32 %r1294, 0; + mov.b64 %fd1209, {%r1294, %r161}; + bra.uni $L__BB2_826; + +$L__BB2_823: + setp.ne.s32 %p1222, %r156, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1292, %temp}, %fd428; + } + setp.ne.s32 %p1223, %r1292, 0; + or.pred %p1224, %p1222, %p1223; + mov.f64 %fd1209, %fd1208; + @%p1224 bra $L__BB2_826; + + mov.u32 %r1293, 0; + mov.b64 %fd1209, {%r1293, %r164}; + +$L__BB2_826: + not.pred %p1225, %p57; + mov.f64 %fd1211, %fd449; + @%p1225 bra $L__BB2_828; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1295}, %fd449; + } + xor.b32 %r1296, %r1295, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1297, %temp}, %fd449; + } + mov.b64 %fd1211, {%r1297, %r1296}; + +$L__BB2_828: + setp.eq.f32 %p1226, %f541, 0f00000000; + @%p1226 bra $L__BB2_832; + bra.uni $L__BB2_829; + +$L__BB2_832: + mov.u32 %r1298, 0; + mov.b64 %fd1211, {%r1298, %r162}; + bra.uni $L__BB2_833; + +$L__BB2_829: + setp.gt.s32 %p1227, %r160, -1; + @%p1227 bra $L__BB2_833; + + cvt.rzi.f64.f64 %fd1012, %fd964; + setp.eq.f64 %p1228, %fd1012, 0d4010000000000000; + @%p1228 bra $L__BB2_833; + + mov.f64 %fd1211, 0dFFF8000000000000; + +$L__BB2_833: + selp.f64 %fd1212, %fd1211, %fd434, %p992; + @%p68 bra $L__BB2_838; + + setp.eq.s32 %p1230, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1299, %temp}, %fd964; + } + setp.eq.s32 %p1231, %r1299, 0; + and.pred %p1232, %p1230, %p1231; + @%p1232 bra $L__BB2_837; + bra.uni $L__BB2_835; + +$L__BB2_837: + mov.u32 %r1302, 0; + mov.b64 %fd1212, {%r1302, %r166}; + bra.uni $L__BB2_838; + +$L__BB2_835: + setp.ne.s32 %p1233, %r167, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1300, %temp}, %fd433; + } + setp.ne.s32 %p1234, %r1300, 0; + or.pred %p1235, %p1233, %p1234; + mov.f64 %fd1212, %fd1211; + @%p1235 bra $L__BB2_838; + + mov.u32 %r1301, 0; + mov.b64 %fd1212, {%r1301, %r170}; + +$L__BB2_838: + setp.eq.f32 %p1236, %f541, 0f3F800000; + selp.f64 %fd1015, 0d3FF0000000000000, %fd1212, %p1236; + setp.eq.f32 %p1237, %f310, 0f3F800000; + selp.f64 %fd1016, 0d3FF0000000000000, %fd1209, %p1237; + mul.f64 %fd1017, %fd1016, %fd431; + div.rn.f64 %fd1018, %fd1017, %fd1015; + selp.f64 %fd1019, 0d3FF0000000000000, %fd1206, %p1237; + mul.f64 %fd1020, %fd1019, %fd427; + div.rn.f64 %fd1021, %fd1020, %fd430; + add.f64 %fd1022, %fd1021, %fd426; + add.f64 %fd1023, %fd1022, %fd1018; + cvt.rn.f32.f64 %f2733, %fd1023; + div.rn.f32 %f2734, %f315, %f356; + mul.f32 %f489, %f2734, %f2733; + not.pred %p1238, %p58; + mov.f64 %fd1214, %fd450; + @%p1238 bra $L__BB2_840; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1303}, %fd450; + } + xor.b32 %r1304, %r1303, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1305, %temp}, %fd450; + } + mov.b64 %fd1214, {%r1305, %r1304}; + +$L__BB2_840: + setp.eq.f32 %p1239, %f312, 0f00000000; + @%p1239 bra $L__BB2_844; + bra.uni $L__BB2_841; + +$L__BB2_844: + mov.u32 %r1306, 0; + mov.b64 %fd1214, {%r1306, %r168}; + bra.uni $L__BB2_845; + +$L__BB2_841: + setp.gt.s32 %p1240, %r165, -1; + @%p1240 bra $L__BB2_845; + + cvt.rzi.f64.f64 %fd1025, %fd962; + setp.eq.f64 %p1241, %fd1025, 0d4000000000000000; + @%p1241 bra $L__BB2_845; + + mov.f64 %fd1214, 0dFFF8000000000000; + +$L__BB2_845: + selp.f64 %fd1215, %fd1214, %fd438, %p998; + @%p69 bra $L__BB2_850; + + setp.eq.s32 %p1243, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1307, %temp}, %fd962; + } + setp.eq.s32 %p1244, %r1307, 0; + and.pred %p1245, %p1243, %p1244; + @%p1245 bra $L__BB2_849; + bra.uni $L__BB2_847; + +$L__BB2_849: + mov.u32 %r1310, 0; + mov.b64 %fd1215, {%r1310, %r171}; + bra.uni $L__BB2_850; + +$L__BB2_847: + setp.ne.s32 %p1246, %r172, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1308, %temp}, %fd437; + } + setp.ne.s32 %p1247, %r1308, 0; + or.pred %p1248, %p1246, %p1247; + mov.f64 %fd1215, %fd1214; + @%p1248 bra $L__BB2_850; + + mov.u32 %r1309, 0; + mov.b64 %fd1215, {%r1309, %r175}; + +$L__BB2_850: + not.pred %p1249, %p59; + mov.f64 %fd1217, %fd451; + @%p1249 bra $L__BB2_852; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1311}, %fd451; + } + xor.b32 %r1312, %r1311, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1313, %temp}, %fd451; + } + mov.b64 %fd1217, {%r1313, %r1312}; + +$L__BB2_852: + @%p1239 bra $L__BB2_856; + bra.uni $L__BB2_853; + +$L__BB2_856: + mov.u32 %r1314, 0; + mov.b64 %fd1217, {%r1314, %r173}; + bra.uni $L__BB2_857; + +$L__BB2_853: + setp.gt.s32 %p1251, %r165, -1; + @%p1251 bra $L__BB2_857; + + cvt.rzi.f64.f64 %fd1029, %fd963; + setp.eq.f64 %p1252, %fd1029, 0d4008000000000000; + @%p1252 bra $L__BB2_857; + + mov.f64 %fd1217, 0dFFF8000000000000; + +$L__BB2_857: + selp.f64 %fd1218, %fd1217, %fd440, %p1002; + @%p70 bra $L__BB2_862; + + setp.eq.s32 %p1254, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1315, %temp}, %fd963; + } + setp.eq.s32 %p1255, %r1315, 0; + and.pred %p1256, %p1254, %p1255; + @%p1256 bra $L__BB2_861; + bra.uni $L__BB2_859; + +$L__BB2_861: + mov.u32 %r1318, 0; + mov.b64 %fd1218, {%r1318, %r176}; + bra.uni $L__BB2_862; + +$L__BB2_859: + setp.ne.s32 %p1257, %r172, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1316, %temp}, %fd437; + } + setp.ne.s32 %p1258, %r1316, 0; + or.pred %p1259, %p1257, %p1258; + mov.f64 %fd1218, %fd1217; + @%p1259 bra $L__BB2_862; + + mov.u32 %r1317, 0; + mov.b64 %fd1218, {%r1317, %r177}; + +$L__BB2_862: + mov.f64 %fd1220, %fd449; + @%p1225 bra $L__BB2_864; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1319}, %fd449; + } + xor.b32 %r1320, %r1319, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1321, %temp}, %fd449; + } + mov.b64 %fd1220, {%r1321, %r1320}; + +$L__BB2_864: + @%p1226 bra $L__BB2_868; + bra.uni $L__BB2_865; + +$L__BB2_868: + mov.u32 %r1322, 0; + mov.b64 %fd1220, {%r1322, %r162}; + bra.uni $L__BB2_869; + +$L__BB2_865: + setp.gt.s32 %p1262, %r160, -1; + @%p1262 bra $L__BB2_869; + + cvt.rzi.f64.f64 %fd1033, %fd964; + setp.eq.f64 %p1263, %fd1033, 0d4010000000000000; + @%p1263 bra $L__BB2_869; + + mov.f64 %fd1220, 0dFFF8000000000000; + +$L__BB2_869: + selp.f64 %fd1221, %fd1220, %fd434, %p992; + @%p68 bra $L__BB2_874; + + setp.eq.s32 %p1265, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1323, %temp}, %fd964; + } + setp.eq.s32 %p1266, %r1323, 0; + and.pred %p1267, %p1265, %p1266; + @%p1267 bra $L__BB2_873; + bra.uni $L__BB2_871; + +$L__BB2_873: + mov.u32 %r1326, 0; + mov.b64 %fd1221, {%r1326, %r166}; + bra.uni $L__BB2_874; + +$L__BB2_871: + setp.ne.s32 %p1268, %r167, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1324, %temp}, %fd433; + } + setp.ne.s32 %p1269, %r1324, 0; + or.pred %p1270, %p1268, %p1269; + mov.f64 %fd1221, %fd1220; + @%p1270 bra $L__BB2_874; + + mov.u32 %r1325, 0; + mov.b64 %fd1221, {%r1325, %r170}; + +$L__BB2_874: + selp.f64 %fd1036, 0d3FF0000000000000, %fd1221, %p1236; + setp.eq.f32 %p1272, %f312, 0f3F800000; + selp.f64 %fd1037, 0d3FF0000000000000, %fd1218, %p1272; + mul.f64 %fd1038, %fd1037, %fd439; + div.rn.f64 %fd1039, %fd1038, %fd1036; + selp.f64 %fd1040, 0d3FF0000000000000, %fd1215, %p1272; + mul.f64 %fd1041, %fd1040, %fd436; + div.rn.f64 %fd1042, %fd1041, %fd430; + add.f64 %fd1043, %fd1042, %fd435; + add.f64 %fd1044, %fd1043, %fd1039; + cvt.rn.f32.f64 %f2735, %fd1044; + div.rn.f32 %f2736, %f316, %f358; + mul.f32 %f2737, %f2736, %f2735; + mul.f32 %f2738, %f369, %f488; + mul.f32 %f2739, %f2738, %f2737; + fma.rn.f32 %f2740, %f462, %f489, %f2739; + mul.f32 %f2741, %f369, %f3276; + fma.rn.f32 %f490, %f382, %f2741, %f3275; + mad.lo.s32 %r1327, %r1373, %r182, %r1372; + add.s32 %r1328, %r1327, %r2; + mul.wide.s32 %rd24, %r1328, 4; + add.s64 %rd25, %rd1, %rd24; + ld.global.f32 %f491, [%rd25]; + mul.f32 %f2742, %f409, %f409; + div.rn.f32 %f2743, %f2742, %f490; + add.f32 %f3306, %f3306, %f2743; + mul.f32 %f2744, %f436, %f409; + div.rn.f32 %f2745, %f2744, %f490; + add.f32 %f3305, %f3305, %f2745; + mul.f32 %f2746, %f369, %f382; + mul.f32 %f2747, %f2746, %f409; + div.rn.f32 %f2748, %f2747, %f490; + add.f32 %f3304, %f3304, %f2748; + div.rn.f32 %f2749, %f409, %f490; + add.f32 %f3303, %f3303, %f2749; + mul.f32 %f2750, %f2740, %f409; + div.rn.f32 %f2751, %f2750, %f490; + add.f32 %f3302, %f3302, %f2751; + mul.f32 %f2752, %f436, %f436; + div.rn.f32 %f2753, %f2752, %f490; + add.f32 %f3301, %f3301, %f2753; + mul.f32 %f2754, %f2746, %f436; + div.rn.f32 %f2755, %f2754, %f490; + add.f32 %f3300, %f3300, %f2755; + div.rn.f32 %f2756, %f436, %f490; + add.f32 %f3299, %f3299, %f2756; + mul.f32 %f2757, %f2740, %f436; + div.rn.f32 %f2758, %f2757, %f490; + add.f32 %f3298, %f3298, %f2758; + mul.f32 %f2759, %f2746, %f2746; + div.rn.f32 %f2760, %f2759, %f490; + add.f32 %f3297, %f3297, %f2760; + div.rn.f32 %f2761, %f2746, %f490; + add.f32 %f3296, %f3296, %f2761; + mul.f32 %f2762, %f2740, %f2746; + div.rn.f32 %f2763, %f2762, %f490; + add.f32 %f3295, %f3295, %f2763; + rcp.rn.f32 %f2764, %f490; + add.f32 %f3307, %f3307, %f2764; + div.rn.f32 %f2765, %f2740, %f490; + add.f32 %f3308, %f3308, %f2765; + mul.f32 %f2766, %f2740, %f2740; + div.rn.f32 %f2767, %f2766, %f490; + add.f32 %f3309, %f3309, %f2767; + setp.leu.f32 %p1273, %f490, 0f00000000; + @%p1273 bra $L__BB2_882; + + setp.gt.f32 %p1274, %f491, 0f00000000; + @%p1274 bra $L__BB2_877; + bra.uni $L__BB2_876; + +$L__BB2_877: + setp.lt.f32 %p1275, %f490, 0f00800000; + mul.f32 %f2768, %f490, 0f4B000000; + selp.f32 %f508, %f2768, %f490, %p1275; + selp.f32 %f2769, 0fC1B80000, 0f00000000, %p1275; + mov.b32 %r1329, %f508; + add.s32 %r1330, %r1329, -1059760811; + and.b32 %r1331, %r1330, -8388608; + sub.s32 %r1332, %r1329, %r1331; + mov.b32 %f2770, %r1332; + cvt.rn.f32.s32 %f2771, %r1331; + mov.f32 %f2772, 0f34000000; + fma.rn.f32 %f2773, %f2771, %f2772, %f2769; + add.f32 %f2774, %f2770, 0fBF800000; + mov.f32 %f2775, 0f3E1039F6; + mov.f32 %f2776, 0fBE055027; + fma.rn.f32 %f2777, %f2776, %f2774, %f2775; + mov.f32 %f2778, 0fBDF8CDCC; + fma.rn.f32 %f2779, %f2777, %f2774, %f2778; + mov.f32 %f2780, 0f3E0F2955; + fma.rn.f32 %f2781, %f2779, %f2774, %f2780; + mov.f32 %f2782, 0fBE2AD8B9; + fma.rn.f32 %f2783, %f2781, %f2774, %f2782; + mov.f32 %f2784, 0f3E4CED0B; + fma.rn.f32 %f2785, %f2783, %f2774, %f2784; + mov.f32 %f2786, 0fBE7FFF22; + fma.rn.f32 %f2787, %f2785, %f2774, %f2786; + mov.f32 %f2788, 0f3EAAAA78; + fma.rn.f32 %f2789, %f2787, %f2774, %f2788; + mov.f32 %f2790, 0fBF000000; + fma.rn.f32 %f2791, %f2789, %f2774, %f2790; + mul.f32 %f2792, %f2774, %f2791; + fma.rn.f32 %f2793, %f2792, %f2774, %f2774; + mov.f32 %f2794, 0f3F317218; + fma.rn.f32 %f3339, %f2773, %f2794, %f2793; + setp.lt.u32 %p1276, %r1329, 2139095040; + @%p1276 bra $L__BB2_879; + + mov.f32 %f2795, 0f7F800000; + fma.rn.f32 %f3339, %f508, %f2795, %f2795; + +$L__BB2_879: + setp.eq.f32 %p1277, %f508, 0f00000000; + selp.f32 %f2796, 0fFF800000, %f3339, %p1277; + mul.f32 %f2797, %f491, %f2796; + sub.f32 %f512, %f2797, %f490; + mul.f32 %f2798, %f491, 0f4B000000; + setp.lt.f32 %p1278, %f491, 0f00800000; + selp.f32 %f513, %f2798, %f491, %p1278; + selp.f32 %f2799, 0fC1B80000, 0f00000000, %p1278; + mov.b32 %r1333, %f513; + add.s32 %r1334, %r1333, -1059760811; + and.b32 %r1335, %r1334, -8388608; + sub.s32 %r1336, %r1333, %r1335; + mov.b32 %f2800, %r1336; + cvt.rn.f32.s32 %f2801, %r1335; + fma.rn.f32 %f2803, %f2801, %f2772, %f2799; + add.f32 %f2804, %f2800, 0fBF800000; + fma.rn.f32 %f2807, %f2776, %f2804, %f2775; + fma.rn.f32 %f2809, %f2807, %f2804, %f2778; + fma.rn.f32 %f2811, %f2809, %f2804, %f2780; + fma.rn.f32 %f2813, %f2811, %f2804, %f2782; + fma.rn.f32 %f2815, %f2813, %f2804, %f2784; + fma.rn.f32 %f2817, %f2815, %f2804, %f2786; + fma.rn.f32 %f2819, %f2817, %f2804, %f2788; + fma.rn.f32 %f2821, %f2819, %f2804, %f2790; + mul.f32 %f2822, %f2804, %f2821; + fma.rn.f32 %f2823, %f2822, %f2804, %f2804; + fma.rn.f32 %f3340, %f2803, %f2794, %f2823; + setp.lt.u32 %p1279, %r1333, 2139095040; + @%p1279 bra $L__BB2_881; + + mov.f32 %f2825, 0f7F800000; + fma.rn.f32 %f3340, %f513, %f2825, %f2825; + +$L__BB2_881: + setp.eq.f32 %p1280, %f513, 0f00000000; + selp.f32 %f2826, 0fFF800000, %f3340, %p1280; + mul.f32 %f2827, %f491, %f2826; + sub.f32 %f2828, %f512, %f2827; + add.f32 %f2829, %f491, %f2828; + add.f32 %f3341, %f3341, %f2829; + bra.uni $L__BB2_882; + +$L__BB2_876: + sub.f32 %f3341, %f3341, %f490; + +$L__BB2_882: + add.s32 %r1373, %r1373, 1; + setp.lt.s32 %p1281, %r1373, %r182; + @%p1281 bra $L__BB2_626; + + add.s32 %r1372, %r1372, 1; + setp.lt.s32 %p1282, %r1372, %r182; + @%p1282 bra $L__BB2_625; + +$L__BB2_884: + ld.param.u64 %rd46, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_13]; + ld.param.u64 %rd45, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_12]; + ld.param.u32 %r1360, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_14]; + ld.param.u64 %rd44, [_Z20kernel_MLEFit_XYNBZ_PKfffffffffiiPfS1_S1_i_param_11]; + mov.u32 %r1359, %tid.x; + mov.u32 %r1358, %ntid.x; + mov.u32 %r1357, %ctaid.x; + mad.lo.s32 %r1356, %r1357, %r1358, %r1359; + rcp.rn.f32 %f2830, %f3306; + mov.f32 %f2831, 0f3F800000; + mul.f32 %f2832, %f2830, %f3305; + mul.f32 %f2833, %f2830, %f3304; + mul.f32 %f2834, %f2830, %f3303; + mul.f32 %f2835, %f2830, %f3302; + fma.rn.f32 %f2836, %f2832, %f3305, 0f00000000; + sub.f32 %f2838, %f3301, %f2836; + fma.rn.f32 %f2839, %f2833, %f3305, 0f00000000; + rcp.rn.f32 %f2840, %f2838; + sub.f32 %f2841, %f3300, %f2839; + mul.f32 %f2842, %f2840, %f2841; + fma.rn.f32 %f2843, %f2834, %f3305, 0f00000000; + sub.f32 %f2844, %f3299, %f2843; + mul.f32 %f2845, %f2840, %f2844; + fma.rn.f32 %f2846, %f2835, %f3305, 0f00000000; + sub.f32 %f2847, %f3298, %f2846; + mul.f32 %f2848, %f2840, %f2847; + fma.rn.f32 %f2849, %f2832, %f3304, 0f00000000; + sub.f32 %f2850, %f3300, %f2849; + fma.rn.f32 %f2851, %f2833, %f3304, 0f00000000; + fma.rn.f32 %f2852, %f2842, %f2850, %f2851; + sub.f32 %f2853, %f3297, %f2852; + fma.rn.f32 %f2854, %f2834, %f3304, 0f00000000; + fma.rn.f32 %f2855, %f2845, %f2850, %f2854; + rcp.rn.f32 %f2856, %f2853; + sub.f32 %f2857, %f3296, %f2855; + mul.f32 %f2858, %f2856, %f2857; + fma.rn.f32 %f2859, %f2835, %f3304, 0f00000000; + fma.rn.f32 %f2860, %f2848, %f2850, %f2859; + sub.f32 %f2861, %f3295, %f2860; + mul.f32 %f2862, %f2856, %f2861; + fma.rn.f32 %f2863, %f2832, %f3303, 0f00000000; + sub.f32 %f2864, %f3299, %f2863; + fma.rn.f32 %f2865, %f2833, %f3303, 0f00000000; + fma.rn.f32 %f2866, %f2842, %f2864, %f2865; + sub.f32 %f2867, %f3296, %f2866; + fma.rn.f32 %f2868, %f2834, %f3303, 0f00000000; + fma.rn.f32 %f2869, %f2845, %f2864, %f2868; + fma.rn.f32 %f2870, %f2858, %f2867, %f2869; + sub.f32 %f2871, %f3307, %f2870; + fma.rn.f32 %f2872, %f2835, %f3303, 0f00000000; + fma.rn.f32 %f2873, %f2848, %f2864, %f2872; + fma.rn.f32 %f2874, %f2862, %f2867, %f2873; + rcp.rn.f32 %f2875, %f2871; + sub.f32 %f2876, %f3308, %f2874; + mul.f32 %f2877, %f2875, %f2876; + fma.rn.f32 %f2878, %f2832, %f3302, 0f00000000; + sub.f32 %f2879, %f3298, %f2878; + fma.rn.f32 %f2880, %f2833, %f3302, 0f00000000; + fma.rn.f32 %f2881, %f2842, %f2879, %f2880; + sub.f32 %f2882, %f3295, %f2881; + fma.rn.f32 %f2883, %f2834, %f3302, 0f00000000; + fma.rn.f32 %f2884, %f2845, %f2879, %f2883; + fma.rn.f32 %f2885, %f2858, %f2882, %f2884; + sub.f32 %f2886, %f3308, %f2885; + fma.rn.f32 %f2887, %f2835, %f3302, 0f00000000; + fma.rn.f32 %f2888, %f2848, %f2879, %f2887; + fma.rn.f32 %f2889, %f2862, %f2882, %f2888; + fma.rn.f32 %f2890, %f2877, %f2886, %f2889; + sub.f32 %f2891, %f3309, %f2890; + add.f32 %f2892, %f2832, 0f00000000; + sub.f32 %f2893, %f545, %f2892; + add.f32 %f2894, %f2833, 0f00000000; + fma.rn.f32 %f2895, %f2842, %f2893, %f2894; + sub.f32 %f2896, %f545, %f2895; + add.f32 %f2897, %f2834, 0f00000000; + fma.rn.f32 %f2898, %f2845, %f2893, %f2897; + fma.rn.f32 %f2899, %f2858, %f2896, %f2898; + sub.f32 %f2900, %f545, %f2899; + add.f32 %f2901, %f2835, 0f00000000; + fma.rn.f32 %f2902, %f2848, %f2893, %f2901; + fma.rn.f32 %f2903, %f2862, %f2896, %f2902; + fma.rn.f32 %f2904, %f2877, %f2900, %f2903; + sub.f32 %f2905, %f545, %f2904; + div.rn.f32 %f2906, %f2905, %f2891; + fma.rn.f32 %f2907, %f2886, %f2906, 0f00000000; + sub.f32 %f2908, %f2900, %f2907; + mul.f32 %f2909, %f2875, %f2908; + fma.rn.f32 %f2910, %f2867, %f2909, 0f00000000; + fma.rn.f32 %f2911, %f2882, %f2906, %f2910; + sub.f32 %f2912, %f2896, %f2911; + mul.f32 %f2913, %f2856, %f2912; + fma.rn.f32 %f2914, %f2850, %f2913, 0f00000000; + fma.rn.f32 %f2915, %f2864, %f2909, %f2914; + fma.rn.f32 %f2916, %f2879, %f2906, %f2915; + sub.f32 %f2917, %f2893, %f2916; + mul.f32 %f2918, %f2840, %f2917; + fma.rn.f32 %f2919, %f3305, %f2918, 0f00000000; + fma.rn.f32 %f2920, %f3304, %f2913, %f2919; + fma.rn.f32 %f2921, %f3303, %f2909, %f2920; + fma.rn.f32 %f2922, %f3302, %f2906, %f2921; + sub.f32 %f2923, %f2831, %f2922; + mul.f32 %f2924, %f2830, %f2923; + fma.rn.f32 %f2925, %f2832, 0f00000000, 0f00000000; + sub.f32 %f2926, %f2831, %f2925; + fma.rn.f32 %f2927, %f2833, 0f00000000, 0f00000000; + fma.rn.f32 %f2928, %f2842, %f2926, %f2927; + sub.f32 %f2929, %f545, %f2928; + fma.rn.f32 %f2930, %f2834, 0f00000000, 0f00000000; + fma.rn.f32 %f2931, %f2845, %f2926, %f2930; + fma.rn.f32 %f2932, %f2858, %f2929, %f2931; + sub.f32 %f2933, %f545, %f2932; + fma.rn.f32 %f2934, %f2835, 0f00000000, 0f00000000; + fma.rn.f32 %f2935, %f2848, %f2926, %f2934; + fma.rn.f32 %f2936, %f2862, %f2929, %f2935; + fma.rn.f32 %f2937, %f2877, %f2933, %f2936; + sub.f32 %f2938, %f545, %f2937; + div.rn.f32 %f2939, %f2938, %f2891; + fma.rn.f32 %f2940, %f2886, %f2939, 0f00000000; + sub.f32 %f2941, %f2933, %f2940; + mul.f32 %f2942, %f2875, %f2941; + fma.rn.f32 %f2943, %f2867, %f2942, 0f00000000; + fma.rn.f32 %f2944, %f2882, %f2939, %f2943; + sub.f32 %f2945, %f2929, %f2944; + mul.f32 %f2946, %f2856, %f2945; + fma.rn.f32 %f2947, %f2850, %f2946, 0f00000000; + fma.rn.f32 %f2948, %f2864, %f2942, %f2947; + fma.rn.f32 %f2949, %f2879, %f2939, %f2948; + sub.f32 %f2950, %f2926, %f2949; + mul.f32 %f2951, %f2840, %f2950; + sub.f32 %f2952, %f545, %f2925; + fma.rn.f32 %f2953, %f2842, %f2952, %f2927; + sub.f32 %f2954, %f2831, %f2953; + fma.rn.f32 %f2955, %f2845, %f2952, %f2930; + fma.rn.f32 %f2956, %f2858, %f2954, %f2955; + sub.f32 %f2957, %f545, %f2956; + fma.rn.f32 %f2958, %f2848, %f2952, %f2934; + fma.rn.f32 %f2959, %f2862, %f2954, %f2958; + fma.rn.f32 %f2960, %f2877, %f2957, %f2959; + sub.f32 %f2961, %f545, %f2960; + div.rn.f32 %f2962, %f2961, %f2891; + fma.rn.f32 %f2963, %f2886, %f2962, 0f00000000; + sub.f32 %f2964, %f2957, %f2963; + mul.f32 %f2965, %f2875, %f2964; + fma.rn.f32 %f2966, %f2867, %f2965, 0f00000000; + fma.rn.f32 %f2967, %f2882, %f2962, %f2966; + sub.f32 %f2968, %f2954, %f2967; + mul.f32 %f2969, %f2856, %f2968; + sub.f32 %f2970, %f545, %f2953; + fma.rn.f32 %f2971, %f2858, %f2970, %f2955; + sub.f32 %f2972, %f2831, %f2971; + fma.rn.f32 %f2973, %f2862, %f2970, %f2958; + fma.rn.f32 %f2974, %f2877, %f2972, %f2973; + sub.f32 %f2975, %f545, %f2974; + div.rn.f32 %f2976, %f2975, %f2891; + fma.rn.f32 %f2977, %f2886, %f2976, 0f00000000; + sub.f32 %f2978, %f2972, %f2977; + mul.f32 %f2979, %f2875, %f2978; + sub.f32 %f2980, %f545, %f2971; + fma.rn.f32 %f2981, %f2877, %f2980, %f2973; + sub.f32 %f2982, %f2831, %f2981; + div.rn.f32 %f2983, %f2982, %f2891; + cvta.to.global.u64 %rd26, %rd44; + mul.wide.s32 %rd27, %r1356, 4; + add.s64 %rd28, %rd26, %rd27; + st.global.f32 [%rd28], %f3278; + add.s32 %r1341, %r1356, %r1360; + mul.wide.s32 %rd29, %r1360, 4; + add.s64 %rd30, %rd28, %rd29; + st.global.f32 [%rd30], %f3277; + add.s32 %r1342, %r1341, %r1360; + shl.b32 %r1343, %r1360, 3; + cvt.s64.s32 %rd31, %r1343; + add.s64 %rd32, %rd28, %rd31; + st.global.f32 [%rd32], %f3276; + add.s32 %r1344, %r1342, %r1360; + mul.wide.s32 %rd33, %r1344, 4; + add.s64 %rd34, %rd26, %rd33; + st.global.f32 [%rd34], %f3275; + add.s64 %rd35, %rd32, %rd31; + st.global.f32 [%rd35], %f3274; + cvta.to.global.u64 %rd36, %rd45; + add.s64 %rd37, %rd36, %rd27; + st.global.f32 [%rd37], %f2924; + add.s64 %rd38, %rd37, %rd29; + st.global.f32 [%rd38], %f2951; + add.s64 %rd39, %rd37, %rd31; + st.global.f32 [%rd39], %f2969; + add.s64 %rd40, %rd36, %rd33; + st.global.f32 [%rd40], %f2979; + add.s64 %rd41, %rd39, %rd31; + st.global.f32 [%rd41], %f2983; + cvta.to.global.u64 %rd42, %rd46; + add.s64 %rd43, %rd42, %rd27; + st.global.f32 [%rd43], %f3341; + +$L__BB2_885: ret; -} +} // .globl _Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i .visible .entry _Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i( .param .u64 _Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_0, @@ -10873,1019 +19156,1862 @@ BB2_226: .param .u32 _Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_7 ) { - .local .align 16 .b8 __local_depot3[144]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<375>; - .reg .f32 %f<3383>; - .reg .b32 %r<335>; - .reg .b64 %rd<148>; - - - mov.u64 %SPL, __local_depot3; - ld.param.u64 %rd69, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_0]; - ld.param.f32 %f3316, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_1]; - ld.param.u32 %r71, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_2]; - ld.param.u32 %r72, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_3]; - ld.param.u32 %r73, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_7]; - cvta.to.global.u64 %rd1, %rd69; - add.u64 %rd2, %SPL, 0; - mov.u32 %r74, %ntid.x; - mov.u32 %r75, %ctaid.x; - mov.u32 %r76, %tid.x; - mad.lo.s32 %r1, %r74, %r75, %r76; - setp.ge.s32 %p13, %r1, %r73; - @%p13 bra BB3_227; - - mov.u32 %r77, 0; - mov.u64 %rd127, %rd2; - mov.u32 %r307, %r77; - -BB3_2: - st.local.u32 [%rd127], %r77; - add.s64 %rd127, %rd127, 4; - add.s32 %r307, %r307, 1; - setp.lt.u32 %p14, %r307, 36; - @%p14 bra BB3_2; - - mul.lo.s32 %r79, %r71, %r71; - mul.lo.s32 %r4, %r79, %r1; - mov.f32 %f617, 0f00000000; - setp.lt.s32 %p15, %r71, 1; - mov.f32 %f1, %f617; - mov.f32 %f2, %f617; - mov.f32 %f3, %f617; - @%p15 bra BB3_17; - - and.b32 %r5, %r71, 3; - shl.b32 %r6, %r71, 2; - mov.f32 %f620, 0f00000000; - mov.u32 %r80, 0; - mov.u32 %r308, %r80; - mov.f32 %f1, %f620; - mov.f32 %f2, %f620; - mov.f32 %f3, %f620; - -BB3_5: - cvt.rn.f32.s32 %f4, %r308; - setp.eq.s32 %p16, %r5, 0; - @%p16 bra BB3_6; - - setp.eq.s32 %p17, %r5, 1; - @%p17 bra BB3_8; - bra.uni BB3_9; - -BB3_8: - mov.u32 %r310, %r80; - bra.uni BB3_13; - -BB3_6: - mov.u32 %r312, %r80; - mov.f32 %f3215, %f1; - mov.f32 %f3216, %f2; - mov.f32 %f3217, %f3; - mov.f32 %f1, %f620; - mov.f32 %f2, %f620; - mov.f32 %f3, %f620; - bra.uni BB3_14; - -BB3_9: - setp.eq.s32 %p18, %r5, 2; - @%p18 bra BB3_10; - bra.uni BB3_11; - -BB3_10: - mov.u32 %r309, %r80; - bra.uni BB3_12; - -BB3_11: - add.s32 %r85, %r308, %r4; - mul.wide.s32 %rd75, %r85, 4; - add.s64 %rd76, %rd1, %rd75; - ld.global.f32 %f624, [%rd76]; - fma.rn.f32 %f3, %f4, %f624, %f3; - fma.rn.f32 %f2, %f624, 0f00000000, %f2; - add.f32 %f1, %f1, %f624; - mov.u32 %r309, 1; - -BB3_12: - neg.s32 %r86, %r309; - and.b32 %r87, %r86, %r71; - add.s32 %r88, %r87, %r308; - add.s32 %r89, %r88, %r4; - mul.wide.s32 %rd77, %r89, 4; - add.s64 %rd78, %rd1, %rd77; - ld.global.f32 %f625, [%rd78]; - fma.rn.f32 %f3, %f4, %f625, %f3; - cvt.rn.f32.s32 %f626, %r309; - fma.rn.f32 %f2, %f626, %f625, %f2; - add.f32 %f1, %f1, %f625; - add.s32 %r310, %r309, 1; - -BB3_13: - mad.lo.s32 %r90, %r310, %r71, %r308; - add.s32 %r91, %r90, %r4; - mul.wide.s32 %rd79, %r91, 4; - add.s64 %rd80, %rd1, %rd79; - ld.global.f32 %f627, [%rd80]; - fma.rn.f32 %f3217, %f4, %f627, %f3; - cvt.rn.f32.s32 %f628, %r310; - fma.rn.f32 %f3216, %f628, %f627, %f2; - add.f32 %f3215, %f1, %f627; - add.s32 %r312, %r310, 1; - mov.f32 %f1, %f3215; - mov.f32 %f2, %f3216; - mov.f32 %f3, %f3217; - -BB3_14: - setp.lt.u32 %p19, %r71, 4; - @%p19 bra BB3_16; - -BB3_15: - mad.lo.s32 %r92, %r312, %r71, %r308; - add.s32 %r93, %r92, %r4; - mul.wide.s32 %rd81, %r93, 4; - add.s64 %rd82, %rd1, %rd81; - ld.global.f32 %f629, [%rd82]; - fma.rn.f32 %f630, %f4, %f629, %f3217; - cvt.rn.f32.s32 %f631, %r312; - fma.rn.f32 %f632, %f631, %f629, %f3216; - add.f32 %f633, %f3215, %f629; - cvt.s64.s32 %rd83, %r6; - add.s64 %rd84, %rd82, %rd83; - ld.global.f32 %f634, [%rd84]; - fma.rn.f32 %f635, %f4, %f634, %f630; - add.s32 %r94, %r312, 1; - cvt.rn.f32.s32 %f636, %r94; - fma.rn.f32 %f637, %f636, %f634, %f632; - add.f32 %f638, %f633, %f634; - add.s64 %rd85, %rd84, %rd83; - ld.global.f32 %f639, [%rd85]; - fma.rn.f32 %f640, %f4, %f639, %f635; - add.s32 %r95, %r312, 2; - cvt.rn.f32.s32 %f641, %r95; - fma.rn.f32 %f642, %f641, %f639, %f637; - add.f32 %f643, %f638, %f639; - add.s64 %rd86, %rd85, %rd83; - ld.global.f32 %f644, [%rd86]; - fma.rn.f32 %f3217, %f4, %f644, %f640; - add.s32 %r96, %r312, 3; - cvt.rn.f32.s32 %f645, %r96; - fma.rn.f32 %f3216, %f645, %f644, %f642; - add.f32 %f3215, %f643, %f644; - add.s32 %r312, %r312, 4; - setp.lt.s32 %p20, %r312, %r71; - mov.f32 %f1, %f3215; - mov.f32 %f2, %f3216; - mov.f32 %f3, %f3217; - @%p20 bra BB3_15; - -BB3_16: - add.s32 %r308, %r308, 1; - setp.lt.s32 %p21, %r308, %r71; - @%p21 bra BB3_5; - -BB3_17: - div.rn.f32 %f3321, %f3, %f1; - div.rn.f32 %f3320, %f2, %f1; - mov.f32 %f648, 0f3F000000; - div.rn.f32 %f649, %f648, %f3316; - div.rn.f32 %f40, %f649, %f3316; - mov.f32 %f3226, 0f51BA43B7; - mov.f32 %f3227, %f617; - @%p15 bra BB3_36; - - and.b32 %r16, %r71, 3; - mov.f32 %f3227, 0f00000000; - mov.u32 %r97, 0; - mov.f32 %f3226, 0f51BA43B7; - mov.u32 %r313, %r97; - -BB3_19: - mov.u32 %r314, %r97; - -BB3_20: - cvt.rn.f32.s32 %f654, %r314; - mul.f32 %f655, %f654, %f654; - mul.f32 %f45, %f40, %f655; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f653, 0f00000000; - mov.f32 %f3244, %f653; - mov.f32 %f3245, %f653; - mov.u32 %r315, %r97; - -BB3_21: - sub.s32 %r101, %r315, %r313; - cvt.rn.f32.s32 %f50, %r101; - mul.lo.s32 %r20, %r315, %r71; - setp.eq.s32 %p23, %r16, 0; - @%p23 bra BB3_22; - - setp.eq.s32 %p24, %r16, 1; - @%p24 bra BB3_26; - bra.uni BB3_24; - -BB3_26: - mul.f32 %f670, %f50, %f50; - mul.f32 %f3235, %f40, %f670; - neg.f32 %f671, %f3235; - mul.f32 %f672, %f3235, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f673, %f672; - mov.f32 %f674, 0fBF317200; - fma.rn.f32 %f675, %f673, %f674, %f671; - mov.f32 %f676, 0fB5BFBE8E; - fma.rn.f32 %f677, %f673, %f676, %f675; - mul.f32 %f678, %f677, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f679, %f678; - add.f32 %f680, %f673, 0f00000000; - ex2.approx.f32 %f681, %f680; - mul.f32 %f3234, %f679, %f681; - mov.u32 %r317, 0; - bra.uni BB3_29; - -BB3_22: - mov.f32 %f3238, %f3244; - mov.f32 %f3239, %f3245; - mov.u32 %r319, %r97; - mov.f32 %f3244, %f653; - mov.f32 %f3245, %f653; - bra.uni BB3_30; - -BB3_24: - setp.ne.s32 %p25, %r16, 2; - @%p25 bra BB3_27; - - mul.f32 %f658, %f50, %f50; - mul.f32 %f3235, %f40, %f658; - neg.f32 %f659, %f3235; - mul.f32 %f660, %f3235, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f661, %f660; - mov.f32 %f662, 0fBF317200; - fma.rn.f32 %f663, %f661, %f662, %f659; - mov.f32 %f664, 0fB5BFBE8E; - fma.rn.f32 %f665, %f661, %f664, %f663; - mul.f32 %f666, %f665, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f667, %f666; - add.f32 %f668, %f661, 0f00000000; - ex2.approx.f32 %f669, %f668; - mul.f32 %f3234, %f667, %f669; - mov.u32 %r316, 0; - bra.uni BB3_28; - -BB3_27: - setp.lt.f32 %p26, %f45, 0fC2D20000; - mul.f32 %f682, %f50, %f50; - mul.f32 %f3235, %f40, %f682; - neg.f32 %f683, %f3235; - mul.f32 %f684, %f3235, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f685, %f684; - mov.f32 %f686, 0fBF317200; - fma.rn.f32 %f687, %f685, %f686, %f683; - mov.f32 %f688, 0fB5BFBE8E; - fma.rn.f32 %f689, %f685, %f688, %f687; - mul.f32 %f690, %f689, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f691, %f690; - add.f32 %f692, %f685, 0f00000000; - ex2.approx.f32 %f693, %f692; - mul.f32 %f3234, %f691, %f693; - setp.gt.f32 %p27, %f3235, 0f42D20000; - selp.f32 %f694, 0f00000000, %f3234, %p27; - setp.lt.f32 %p28, %f3235, 0fC2D20000; - selp.f32 %f695, 0f7F800000, %f694, %p28; - cvt.rzi.f32.f32 %f696, %f47; - fma.rn.f32 %f697, %f696, %f686, %f46; - fma.rn.f32 %f698, %f696, %f688, %f697; - mul.f32 %f699, %f698, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f700, %f699; - add.f32 %f701, %f696, 0f00000000; - ex2.approx.f32 %f702, %f701; - mul.f32 %f703, %f700, %f702; - setp.gt.f32 %p29, %f45, 0f42D20000; - selp.f32 %f704, 0f00000000, %f703, %p29; - selp.f32 %f705, 0f7F800000, %f704, %p26; - mul.f32 %f706, %f695, %f705; - add.s32 %r105, %r20, %r4; - mul.wide.s32 %rd87, %r105, 4; - add.s64 %rd88, %rd1, %rd87; - ld.global.f32 %f707, [%rd88]; - fma.rn.f32 %f3245, %f707, %f706, %f3245; - add.f32 %f3244, %f3244, %f706; - mov.u32 %r316, 1; - -BB3_28: - sub.s32 %r106, %r314, %r316; - cvt.rn.f32.s32 %f708, %r106; - mul.f32 %f709, %f708, %f708; - setp.gt.f32 %p30, %f3235, 0f42D20000; - selp.f32 %f710, 0f00000000, %f3234, %p30; - setp.lt.f32 %p31, %f3235, 0fC2D20000; - selp.f32 %f711, 0f7F800000, %f710, %p31; - mul.f32 %f712, %f40, %f709; - neg.f32 %f713, %f712; - mul.f32 %f714, %f712, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f715, %f714; - mov.f32 %f716, 0fBF317200; - fma.rn.f32 %f717, %f715, %f716, %f713; - mov.f32 %f718, 0fB5BFBE8E; - fma.rn.f32 %f719, %f715, %f718, %f717; - mul.f32 %f720, %f719, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f721, %f720; - add.f32 %f722, %f715, 0f00000000; - ex2.approx.f32 %f723, %f722; - mul.f32 %f724, %f721, %f723; - setp.gt.f32 %p32, %f712, 0f42D20000; - selp.f32 %f725, 0f00000000, %f724, %p32; - setp.lt.f32 %p33, %f712, 0fC2D20000; - selp.f32 %f726, 0f7F800000, %f725, %p33; - mul.f32 %f727, %f711, %f726; - add.s32 %r107, %r316, %r20; - add.s32 %r108, %r107, %r4; - mul.wide.s32 %rd89, %r108, 4; - add.s64 %rd90, %rd1, %rd89; - ld.global.f32 %f728, [%rd90]; - fma.rn.f32 %f3245, %f728, %f727, %f3245; - add.f32 %f3244, %f3244, %f727; - add.s32 %r317, %r316, 1; - -BB3_29: - sub.s32 %r109, %r314, %r317; - cvt.rn.f32.s32 %f729, %r109; - mul.f32 %f730, %f729, %f729; - setp.gt.f32 %p34, %f3235, 0f42D20000; - selp.f32 %f731, 0f00000000, %f3234, %p34; - setp.lt.f32 %p35, %f3235, 0fC2D20000; - selp.f32 %f732, 0f7F800000, %f731, %p35; - mul.f32 %f733, %f40, %f730; - neg.f32 %f734, %f733; - mul.f32 %f735, %f733, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f736, %f735; - mov.f32 %f737, 0fBF317200; - fma.rn.f32 %f738, %f736, %f737, %f734; - mov.f32 %f739, 0fB5BFBE8E; - fma.rn.f32 %f740, %f736, %f739, %f738; - mul.f32 %f741, %f740, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f742, %f741; - add.f32 %f743, %f736, 0f00000000; - ex2.approx.f32 %f744, %f743; - mul.f32 %f745, %f742, %f744; - setp.gt.f32 %p36, %f733, 0f42D20000; - selp.f32 %f746, 0f00000000, %f745, %p36; - setp.lt.f32 %p37, %f733, 0fC2D20000; - selp.f32 %f747, 0f7F800000, %f746, %p37; - mul.f32 %f748, %f732, %f747; - add.s32 %r110, %r317, %r20; - add.s32 %r111, %r110, %r4; - mul.wide.s32 %rd91, %r111, 4; - add.s64 %rd92, %rd1, %rd91; - ld.global.f32 %f749, [%rd92]; - fma.rn.f32 %f3239, %f749, %f748, %f3245; - add.f32 %f3238, %f3244, %f748; - add.s32 %r319, %r317, 1; - mov.f32 %f3244, %f3238; - mov.f32 %f3245, %f3239; - -BB3_30: - setp.lt.u32 %p38, %r71, 4; - @%p38 bra BB3_33; - - mul.f32 %f750, %f50, %f50; - mul.f32 %f751, %f40, %f750; - neg.f32 %f752, %f751; - mul.f32 %f753, %f751, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f754, %f753; - mov.f32 %f755, 0fBF317200; - fma.rn.f32 %f756, %f754, %f755, %f752; - mov.f32 %f757, 0fB5BFBE8E; - fma.rn.f32 %f758, %f754, %f757, %f756; + .reg .pred %p<746>; + .reg .f32 %f<3142>; + .reg .b32 %r<856>; + .reg .f64 %fd<643>; + .reg .b64 %rd<54>; + + + ld.param.u64 %rd7, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_0]; + ld.param.f32 %f3043, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_1]; + ld.param.u32 %r108, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_2]; + ld.param.u32 %r110, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_7]; + cvta.to.global.u64 %rd1, %rd7; + mov.u32 %r111, %ntid.x; + mov.u32 %r112, %ctaid.x; + mov.u32 %r113, %tid.x; + mad.lo.s32 %r1, %r112, %r111, %r113; + setp.ge.s32 %p43, %r1, %r110; + @%p43 bra $L__BB3_463; + + mul.lo.s32 %r114, %r108, %r108; + mul.lo.s32 %r2, %r114, %r1; + setp.lt.s32 %p44, %r108, 1; + mov.f32 %f2968, 0f00000000; + mov.f32 %f2959, %f2968; + mov.f32 %f2960, %f2968; + mov.f32 %f2961, %f2968; + @%p44 bra $L__BB3_11; + + add.s32 %r3, %r108, -1; + and.b32 %r4, %r108, 3; + sub.s32 %r5, %r108, %r4; + shl.b32 %r6, %r108, 2; + mov.u32 %r115, 0; + setp.lt.u32 %p45, %r3, 3; + setp.eq.s32 %p47, %r4, 0; + setp.eq.s32 %p48, %r4, 1; + setp.eq.s32 %p49, %r4, 2; + cvt.s64.s32 %rd13, %r6; + mov.u32 %r841, %r115; + +$L__BB3_3: + cvt.rn.f32.s32 %f4, %r841; + mov.u32 %r844, %r115; + @%p45 bra $L__BB3_6; + + mov.u32 %r844, %r115; + mov.u32 %r843, %r5; + +$L__BB3_5: + mad.lo.s32 %r118, %r844, %r108, %r841; + add.s32 %r119, %r118, %r2; + mul.wide.s32 %rd11, %r119, 4; + add.s64 %rd12, %rd1, %rd11; + ld.global.f32 %f504, [%rd12]; + fma.rn.f32 %f505, %f504, %f4, %f2959; + cvt.rn.f32.s32 %f506, %r844; + fma.rn.f32 %f507, %f504, %f506, %f2960; + add.f32 %f508, %f2961, %f504; + add.s64 %rd14, %rd12, %rd13; + ld.global.f32 %f509, [%rd14]; + fma.rn.f32 %f510, %f509, %f4, %f505; + add.s32 %r120, %r844, 1; + cvt.rn.f32.s32 %f511, %r120; + fma.rn.f32 %f512, %f509, %f511, %f507; + add.f32 %f513, %f508, %f509; + add.s64 %rd15, %rd14, %rd13; + ld.global.f32 %f514, [%rd15]; + fma.rn.f32 %f515, %f514, %f4, %f510; + add.s32 %r121, %r844, 2; + cvt.rn.f32.s32 %f516, %r121; + fma.rn.f32 %f517, %f514, %f516, %f512; + add.f32 %f518, %f513, %f514; + add.s64 %rd16, %rd15, %rd13; + ld.global.f32 %f519, [%rd16]; + fma.rn.f32 %f2959, %f519, %f4, %f515; + add.s32 %r122, %r844, 3; + cvt.rn.f32.s32 %f520, %r122; + fma.rn.f32 %f2960, %f519, %f520, %f517; + add.f32 %f2961, %f518, %f519; + add.s32 %r844, %r844, 4; + add.s32 %r843, %r843, -4; + setp.ne.s32 %p46, %r843, 0; + @%p46 bra $L__BB3_5; + +$L__BB3_6: + @%p47 bra $L__BB3_10; + + mad.lo.s32 %r13, %r844, %r108, %r841; + add.s32 %r123, %r13, %r2; + mul.wide.s32 %rd17, %r123, 4; + add.s64 %rd18, %rd1, %rd17; + ld.global.f32 %f521, [%rd18]; + fma.rn.f32 %f2959, %f521, %f4, %f2959; + cvt.rn.f32.s32 %f522, %r844; + fma.rn.f32 %f2960, %f521, %f522, %f2960; + add.f32 %f2961, %f2961, %f521; + @%p48 bra $L__BB3_10; + + add.s32 %r14, %r13, %r108; + add.s32 %r124, %r14, %r2; + mul.wide.s32 %rd19, %r124, 4; + add.s64 %rd20, %rd1, %rd19; + ld.global.f32 %f523, [%rd20]; + fma.rn.f32 %f2959, %f523, %f4, %f2959; + add.s32 %r125, %r844, 1; + cvt.rn.f32.s32 %f524, %r125; + fma.rn.f32 %f2960, %f523, %f524, %f2960; + add.f32 %f2961, %f2961, %f523; + @%p49 bra $L__BB3_10; + + add.s32 %r126, %r844, 2; + add.s32 %r127, %r14, %r108; + add.s32 %r128, %r127, %r2; + mul.wide.s32 %rd21, %r128, 4; + add.s64 %rd22, %rd1, %rd21; + ld.global.f32 %f525, [%rd22]; + fma.rn.f32 %f2959, %f525, %f4, %f2959; + cvt.rn.f32.s32 %f526, %r126; + fma.rn.f32 %f2960, %f525, %f526, %f2960; + add.f32 %f2961, %f2961, %f525; + +$L__BB3_10: + add.s32 %r841, %r841, 1; + setp.lt.s32 %p50, %r841, %r108; + @%p50 bra $L__BB3_3; + +$L__BB3_11: + div.rn.f32 %f3048, %f2959, %f2961; + div.rn.f32 %f3047, %f2960, %f2961; + mov.f32 %f3045, 0f51BA43B7; + @%p44 bra $L__BB3_51; + + mov.f32 %f531, 0f3F000000; + div.rn.f32 %f532, %f531, %f3043; + div.rn.f32 %f533, %f532, %f3043; + cvt.f64.f32 %fd1, %f533; + mov.f64 %fd246, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd246; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p52, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p52; + mov.u32 %r129, 0; + or.b32 %r20, %r19, -2147483648; + mul.wide.s32 %rd23, %r2, 4; + add.s64 %rd2, %rd1, %rd23; + setp.eq.s32 %p54, %r17, 1062207488; + setp.lt.s32 %p55, %r16, 0; + setp.ne.s32 %p60, %r18, 1071644672; + setp.eq.s32 %p87, %r18, 2146435072; + mov.u32 %r845, %r129; + +$L__BB3_13: + mov.u32 %r846, %r129; + +$L__BB3_14: + mov.u32 %r132, 1; + sub.s32 %r24, %r132, %r846; + mov.f32 %f2971, 0f00000000; + mov.f32 %f2972, %f2971; + mov.u32 %r847, %r129; + +$L__BB3_15: + add.s32 %r849, %r846, -1; + sub.s32 %r26, %r847, %r845; + cvt.rn.f32.s32 %f536, %r26; + cvt.f64.f32 %fd2, %f536; + { + .reg .b32 %temp; + mov.b64 {%temp, %r27}, %fd2; + } + abs.f64 %fd247, %fd2; + { // callseq 67, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd247; + .param .b64 param1; + st.param.f64 [param1+0], %fd246; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 67 + setp.lt.s32 %p53, %r27, 0; + and.pred %p1, %p53, %p54; + selp.b32 %r134, %r27, 0, %p54; + or.b32 %r135, %r134, 2146435072; + selp.b32 %r28, %r135, %r134, %p55; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r136}, %fd4; + } + and.b32 %r29, %r136, 2146435072; + setp.ne.s32 %p56, %r29, 2146435072; + setp.gtu.f64 %p57, %fd247, 0d7FF0000000000000; + setp.gt.f64 %p58, %fd247, 0d3FF0000000000000; + selp.b32 %r137, 2146435072, 0, %p58; + xor.b32 %r138, %r137, 2146435072; + selp.b32 %r139, %r138, %r137, %p55; + setp.eq.s32 %p59, %r26, -1; + selp.b32 %r30, 1072693248, %r139, %p59; + and.b32 %r31, %r27, 2147483647; + and.pred %p61, %p60, %p1; + selp.b32 %r32, %r20, %r19, %p61; + or.pred %p2, %p56, %p57; + mul.lo.s32 %r140, %r108, %r847; + mul.wide.s32 %rd24, %r140, 4; + add.s64 %rd53, %rd2, %rd24; + mov.u32 %r848, %r24; + mov.u32 %r850, %r129; + +$L__BB3_16: + not.pred %p62, %p1; + mov.f64 %fd578, %fd3; + @%p62 bra $L__BB3_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r141}, %fd3; + } + xor.b32 %r142, %r141, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r143, %temp}, %fd3; + } + mov.b64 %fd578, {%r143, %r142}; + +$L__BB3_18: + setp.eq.s32 %p63, %r26, 0; + @%p63 bra $L__BB3_22; + + setp.gt.s32 %p64, %r27, -1; + @%p64 bra $L__BB3_23; + + cvt.rzi.f64.f64 %fd250, %fd246; + setp.eq.f64 %p65, %fd250, 0d4000000000000000; + @%p65 bra $L__BB3_23; + + mov.f64 %fd578, 0dFFF8000000000000; + bra.uni $L__BB3_23; + +$L__BB3_22: + mov.u32 %r144, 0; + mov.b64 %fd578, {%r144, %r28}; + +$L__BB3_23: + selp.f64 %fd579, %fd578, %fd4, %p56; + @%p2 bra $L__BB3_28; + + { + .reg .b32 %temp; + mov.b64 {%r145, %temp}, %fd246; + } + setp.eq.s32 %p68, %r145, 0; + and.pred %p69, %p87, %p68; + @%p69 bra $L__BB3_27; + bra.uni $L__BB3_25; + +$L__BB3_27: + mov.u32 %r148, 0; + mov.b64 %fd579, {%r148, %r30}; + bra.uni $L__BB3_28; + +$L__BB3_25: + setp.ne.s32 %p70, %r31, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r146, %temp}, %fd2; + } + setp.ne.s32 %p71, %r146, 0; + or.pred %p72, %p70, %p71; + mov.f64 %fd579, %fd578; + @%p72 bra $L__BB3_28; + + mov.u32 %r147, 0; + mov.b64 %fd579, {%r147, %r32}; + +$L__BB3_28: + setp.eq.s32 %p73, %r26, 1; + selp.f64 %fd253, 0d3FF0000000000000, %fd579, %p73; + mov.f64 %fd254, 0d3FF0000000000000; + mul.f64 %fd13, %fd253, %fd1; + neg.f64 %fd255, %fd13; + mov.f64 %fd256, 0d4338000000000000; + mov.f64 %fd257, 0d3FF71547652B82FE; + fma.rn.f64 %fd258, %fd255, %fd257, %fd256; + { + .reg .b32 %temp; + mov.b64 {%r36, %temp}, %fd258; + } + mov.f64 %fd259, 0dC338000000000000; + add.rn.f64 %fd260, %fd258, %fd259; + mov.f64 %fd261, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd262, %fd260, %fd261, %fd255; + mov.f64 %fd263, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd264, %fd260, %fd263, %fd262; + mov.f64 %fd265, 0d3E928AF3FCA213EA; + mov.f64 %fd266, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd267, %fd266, %fd264, %fd265; + mov.f64 %fd268, 0d3EC71DEE62401315; + fma.rn.f64 %fd269, %fd267, %fd264, %fd268; + mov.f64 %fd270, 0d3EFA01997C89EB71; + fma.rn.f64 %fd271, %fd269, %fd264, %fd270; + mov.f64 %fd272, 0d3F2A01A014761F65; + fma.rn.f64 %fd273, %fd271, %fd264, %fd272; + mov.f64 %fd274, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd275, %fd273, %fd264, %fd274; + mov.f64 %fd276, 0d3F81111111122322; + fma.rn.f64 %fd277, %fd275, %fd264, %fd276; + mov.f64 %fd278, 0d3FA55555555502A1; + fma.rn.f64 %fd279, %fd277, %fd264, %fd278; + mov.f64 %fd280, 0d3FC5555555555511; + fma.rn.f64 %fd281, %fd279, %fd264, %fd280; + mov.f64 %fd282, 0d3FE000000000000B; + fma.rn.f64 %fd283, %fd281, %fd264, %fd282; + fma.rn.f64 %fd284, %fd283, %fd264, %fd254; + fma.rn.f64 %fd285, %fd284, %fd264, %fd254; + { + .reg .b32 %temp; + mov.b64 {%r37, %temp}, %fd285; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r38}, %fd285; + } + shl.b32 %r149, %r36, 20; + add.s32 %r150, %r38, %r149; + mov.b64 %fd580, {%r37, %r150}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r151}, %fd255; + } + mov.b32 %f537, %r151; + abs.f32 %f42, %f537; + setp.lt.f32 %p74, %f42, 0f4086232B; + @%p74 bra $L__BB3_31; + + setp.gt.f64 %p75, %fd13, 0d8000000000000000; + mov.f64 %fd286, 0d7FF0000000000000; + sub.f64 %fd287, %fd286, %fd13; + selp.f64 %fd580, 0d0000000000000000, %fd287, %p75; + setp.geu.f32 %p76, %f42, 0f40874800; + @%p76 bra $L__BB3_31; + + shr.u32 %r152, %r36, 31; + add.s32 %r153, %r36, %r152; + shr.s32 %r154, %r153, 1; + shl.b32 %r155, %r154, 20; + add.s32 %r156, %r38, %r155; + mov.b64 %fd288, {%r37, %r156}; + sub.s32 %r157, %r36, %r154; + shl.b32 %r158, %r157, 20; + add.s32 %r159, %r158, 1072693248; + mov.u32 %r160, 0; + mov.b64 %fd289, {%r160, %r159}; + mul.f64 %fd580, %fd288, %fd289; + +$L__BB3_31: + add.s32 %r161, %r849, 1; + cvt.rn.f32.s32 %f538, %r161; + cvt.f64.f32 %fd18, %f538; + { + .reg .b32 %temp; + mov.b64 {%temp, %r39}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 68, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd246; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd582, [retval0+0]; + } // callseq 68 + setp.lt.s32 %p77, %r39, 0; + and.pred %p3, %p77, %p54; + not.pred %p79, %p3; + @%p79 bra $L__BB3_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r162}, %fd582; + } + xor.b32 %r163, %r162, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r164, %temp}, %fd582; + } + mov.b64 %fd582, {%r164, %r163}; + +$L__BB3_33: + setp.eq.s32 %p80, %r848, 1; + @%p80 bra $L__BB3_37; + bra.uni $L__BB3_34; + +$L__BB3_37: + mov.u32 %r165, 0; + selp.b32 %r166, %r39, 0, %p54; + or.b32 %r167, %r166, 2146435072; + selp.b32 %r168, %r167, %r166, %p55; + mov.b64 %fd582, {%r165, %r168}; + bra.uni $L__BB3_38; + +$L__BB3_34: + setp.gt.s32 %p81, %r39, -1; + @%p81 bra $L__BB3_38; + + cvt.rzi.f64.f64 %fd292, %fd246; + setp.eq.f64 %p82, %fd292, 0d4000000000000000; + @%p82 bra $L__BB3_38; + + mov.f64 %fd582, 0dFFF8000000000000; + +$L__BB3_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r169}, %fd25; + } + and.b32 %r170, %r169, 2146435072; + setp.ne.s32 %p85, %r170, 2146435072; + mov.f64 %fd583, %fd582; + @%p85 bra $L__BB3_44; + + setp.gtu.f64 %p86, %fd19, 0d7FF0000000000000; + mov.f64 %fd583, %fd25; + @%p86 bra $L__BB3_44; + + { + .reg .b32 %temp; + mov.b64 {%r171, %temp}, %fd246; + } + setp.eq.s32 %p88, %r171, 0; + and.pred %p89, %p87, %p88; + @%p89 bra $L__BB3_43; + bra.uni $L__BB3_41; + +$L__BB3_43: + mov.u32 %r176, 0; + setp.gt.f64 %p96, %fd19, 0d3FF0000000000000; + selp.b32 %r177, 2146435072, 0, %p96; + xor.b32 %r178, %r177, 2146435072; + selp.b32 %r179, %r178, %r177, %p55; + setp.eq.s32 %p97, %r849, -2; + selp.b32 %r180, 1072693248, %r179, %p97; + mov.b64 %fd583, {%r176, %r180}; + bra.uni $L__BB3_44; + +$L__BB3_41: + { + .reg .b32 %temp; + mov.b64 {%r172, %temp}, %fd18; + } + and.b32 %r173, %r39, 2147483647; + setp.ne.s32 %p90, %r173, 2146435072; + setp.ne.s32 %p91, %r172, 0; + or.pred %p92, %p90, %p91; + mov.f64 %fd583, %fd582; + @%p92 bra $L__BB3_44; + + and.pred %p94, %p60, %p3; + selp.b32 %r174, %r20, %r19, %p94; + mov.u32 %r175, 0; + mov.b64 %fd583, {%r175, %r174}; + +$L__BB3_44: + mov.f64 %fd576, 0d3FF0000000000000; + mov.f64 %fd575, 0d3FE000000000000B; + mov.f64 %fd574, 0d3FC5555555555511; + mov.f64 %fd573, 0d3FA55555555502A1; + mov.f64 %fd572, 0d3F81111111122322; + mov.f64 %fd571, 0d3F56C16C1852B7AF; + mov.f64 %fd570, 0d3F2A01A014761F65; + mov.f64 %fd569, 0d3EFA01997C89EB71; + mov.f64 %fd568, 0d3EC71DEE62401315; + mov.f64 %fd567, 0d3E928AF3FCA213EA; + mov.f64 %fd566, 0d3E5ADE1569CE2BDF; + mov.f64 %fd565, 0dBC7ABC9E3B39803F; + mov.f64 %fd564, 0dBFE62E42FEFA39EF; + mov.f64 %fd563, 0dC338000000000000; + mov.f64 %fd562, 0d4338000000000000; + mov.f64 %fd561, 0d3FF71547652B82FE; + setp.eq.s32 %p98, %r849, 0; + selp.f64 %fd295, 0d3FF0000000000000, %fd583, %p98; + mul.f64 %fd29, %fd295, %fd1; + neg.f64 %fd297, %fd29; + fma.rn.f64 %fd300, %fd297, %fd561, %fd562; + { + .reg .b32 %temp; + mov.b64 {%r40, %temp}, %fd300; + } + add.rn.f64 %fd302, %fd300, %fd563; + fma.rn.f64 %fd304, %fd302, %fd564, %fd297; + fma.rn.f64 %fd306, %fd302, %fd565, %fd304; + fma.rn.f64 %fd309, %fd566, %fd306, %fd567; + fma.rn.f64 %fd311, %fd309, %fd306, %fd568; + fma.rn.f64 %fd313, %fd311, %fd306, %fd569; + fma.rn.f64 %fd315, %fd313, %fd306, %fd570; + fma.rn.f64 %fd317, %fd315, %fd306, %fd571; + fma.rn.f64 %fd319, %fd317, %fd306, %fd572; + fma.rn.f64 %fd321, %fd319, %fd306, %fd573; + fma.rn.f64 %fd323, %fd321, %fd306, %fd574; + fma.rn.f64 %fd325, %fd323, %fd306, %fd575; + fma.rn.f64 %fd326, %fd325, %fd306, %fd576; + fma.rn.f64 %fd327, %fd326, %fd306, %fd576; + { + .reg .b32 %temp; + mov.b64 {%r41, %temp}, %fd327; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r42}, %fd327; + } + shl.b32 %r181, %r40, 20; + add.s32 %r182, %r42, %r181; + mov.b64 %fd584, {%r41, %r182}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r183}, %fd297; + } + mov.b32 %f539, %r183; + abs.f32 %f43, %f539; + setp.lt.f32 %p99, %f43, 0f4086232B; + @%p99 bra $L__BB3_47; + + setp.gt.f64 %p100, %fd29, 0d8000000000000000; + mov.f64 %fd328, 0d7FF0000000000000; + sub.f64 %fd329, %fd328, %fd29; + selp.f64 %fd584, 0d0000000000000000, %fd329, %p100; + setp.geu.f32 %p101, %f43, 0f40874800; + @%p101 bra $L__BB3_47; + + shr.u32 %r184, %r40, 31; + add.s32 %r185, %r40, %r184; + shr.s32 %r186, %r185, 1; + shl.b32 %r187, %r186, 20; + add.s32 %r188, %r42, %r187; + mov.b64 %fd330, {%r41, %r188}; + sub.s32 %r189, %r40, %r186; + shl.b32 %r190, %r189, 20; + add.s32 %r191, %r190, 1072693248; + mov.u32 %r192, 0; + mov.b64 %fd331, {%r192, %r191}; + mul.f64 %fd584, %fd330, %fd331; + +$L__BB3_47: + ld.global.f32 %f540, [%rd53]; + cvt.f64.f32 %fd332, %f540; + mul.f64 %fd333, %fd580, %fd584; + cvt.f64.f32 %fd334, %f2972; + fma.rn.f64 %fd335, %fd333, %fd332, %fd334; + cvt.rn.f32.f64 %f2972, %fd335; + cvt.f64.f32 %fd336, %f2971; + add.f64 %fd337, %fd333, %fd336; + cvt.rn.f32.f64 %f2971, %fd337; + add.s32 %r849, %r849, -1; + add.s32 %r848, %r848, 1; + add.s64 %rd53, %rd53, 4; + add.s32 %r850, %r850, 1; + setp.lt.s32 %p102, %r850, %r108; + @%p102 bra $L__BB3_16; + + add.s32 %r847, %r847, 1; + setp.lt.s32 %p103, %r847, %r108; + @%p103 bra $L__BB3_15; + + div.rn.f32 %f541, %f2972, %f2971; + max.f32 %f2968, %f2968, %f541; + min.f32 %f3045, %f3045, %f541; + add.s32 %r846, %r846, 1; + setp.lt.s32 %p104, %r846, %r108; + @%p104 bra $L__BB3_14; + + add.s32 %r845, %r845, 1; + setp.lt.s32 %p105, %r845, %r108; + @%p105 bra $L__BB3_13; + +$L__BB3_51: + ld.param.u32 %r839, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_3]; + mov.f32 %f2946, 0f00000000; + sub.f32 %f542, %f2968, %f3045; + add.f32 %f543, %f542, %f542; + fma.rn.f32 %f544, %f542, 0f40000000, %f543; + mul.f32 %f545, %f544, 0f40490FD8; + mul.f32 %f546, %f545, %f3043; + mul.f32 %f547, %f546, %f3043; + max.f32 %f3046, %f2946, %f547; + setp.lt.s32 %p106, %r839, 1; + mov.f32 %f3044, %f3043; + @%p106 bra $L__BB3_373; + + cvt.rn.f32.s32 %f549, %r108; + mul.f32 %f51, %f549, 0f3F000000; + mov.u32 %r851, 0; + mov.f64 %fd339, 0d4008000000000000; + mov.f64 %fd345, 0d4014000000000000; + mov.f32 %f3044, %f3043; + +$L__BB3_53: + mov.f32 %f2993, 0f00000000; + mov.f32 %f2994, %f2993; + mov.f32 %f2995, %f2993; + mov.f32 %f2996, %f2993; + mov.f32 %f2997, %f2993; + mov.f32 %f2998, %f2993; + mov.f32 %f2999, %f2993; + mov.f32 %f3000, %f2993; + mov.f32 %f3001, %f2993; + mov.f32 %f3002, %f2993; + mov.f32 %f3003, %f2993; + mov.f32 %f3004, %f2993; + @%p44 bra $L__BB3_372; + + mov.f32 %f2993, 0f00000000; + mov.f32 %f574, 0f3F000000; + div.rn.f32 %f575, %f574, %f3044; + div.rn.f32 %f58, %f575, %f3044; + div.rn.f32 %f576, %f574, %f3043; + div.rn.f32 %f59, %f576, %f3043; + div.rn.f32 %f577, %f3046, 0fC0206C98; + div.rn.f32 %f60, %f577, %f3044; + cvt.f64.f32 %fd34, %f577; + div.rn.f32 %f61, %f577, %f3043; + div.rn.f32 %f62, %f60, %f3044; + mov.f32 %f578, 0fC0000000; + div.rn.f32 %f63, %f578, %f3044; + div.rn.f32 %f579, %f3046, 0f40206C98; + cvt.f64.f32 %fd35, %f579; + div.rn.f32 %f64, %f61, %f3043; + div.rn.f32 %f65, %f578, %f3043; + mov.u32 %r852, 0; + +$L__BB3_55: + mov.u32 %r853, 0; + mov.f32 %f2820, 0f00000000; + cvt.rn.f32.s32 %f580, %r852; + sub.f32 %f78, %f580, %f3048; + add.f32 %f79, %f78, 0f3F000000; + sqrt.rn.f32 %f581, %f58; + mul.f32 %f582, %f79, %f581; + abs.f32 %f80, %f582; + setp.ge.f32 %p108, %f80, 0f3F8060FE; + mul.f32 %f583, %f582, %f582; + selp.f32 %f584, %f80, %f583, %p108; + selp.f32 %f585, 0f3789CA3C, 0f38B1E96A, %p108; + selp.f32 %f586, 0fB9F560B9, 0fBA574D20, %p108; + fma.rn.f32 %f587, %f585, %f584, %f586; + selp.f32 %f588, 0f3BAC840B, 0f3BAAD5EA, %p108; + fma.rn.f32 %f589, %f587, %f584, %f588; + selp.f32 %f590, 0fBD0C8162, 0fBCDC1BE7, %p108; + fma.rn.f32 %f591, %f589, %f584, %f590; + selp.f32 %f592, 0f3E1CF906, 0f3DE718AF, %p108; + fma.rn.f32 %f593, %f591, %f584, %f592; + selp.f32 %f594, 0f3F6A937E, 0fBEC093AC, %p108; + fma.rn.f32 %f595, %f593, %f584, %f594; + selp.f32 %f596, 0f3F20D842, 0f3E0375D3, %p108; + fma.rn.f32 %f597, %f595, %f584, %f596; + neg.f32 %f598, %f80; + selp.f32 %f599, %f598, %f582, %p108; + fma.rn.f32 %f81, %f597, %f599, %f599; + mov.b32 %r196, %f582; + and.b32 %r51, %r196, -2147483648; + add.f32 %f82, %f78, 0fBF000000; + mul.f32 %f600, %f82, %f581; + abs.f32 %f83, %f600; + setp.ge.f32 %p109, %f83, 0f3F8060FE; + mul.f32 %f601, %f600, %f600; + selp.f32 %f602, %f83, %f601, %p109; + selp.f32 %f603, 0f3789CA3C, 0f38B1E96A, %p109; + selp.f32 %f604, 0fB9F560B9, 0fBA574D20, %p109; + fma.rn.f32 %f605, %f603, %f602, %f604; + selp.f32 %f606, 0f3BAC840B, 0f3BAAD5EA, %p109; + fma.rn.f32 %f607, %f605, %f602, %f606; + selp.f32 %f608, 0fBD0C8162, 0fBCDC1BE7, %p109; + fma.rn.f32 %f609, %f607, %f602, %f608; + selp.f32 %f610, 0f3E1CF906, 0f3DE718AF, %p109; + fma.rn.f32 %f611, %f609, %f602, %f610; + selp.f32 %f612, 0f3F6A937E, 0fBEC093AC, %p109; + fma.rn.f32 %f613, %f611, %f602, %f612; + selp.f32 %f614, 0f3F20D842, 0f3E0375D3, %p109; + fma.rn.f32 %f615, %f613, %f602, %f614; + neg.f32 %f616, %f83; + selp.f32 %f617, %f616, %f600, %p109; + fma.rn.f32 %f84, %f615, %f617, %f617; + mov.b32 %r197, %f600; + and.b32 %r52, %r197, -2147483648; + sqrt.rn.f32 %f85, %f59; + add.f32 %f618, %f580, 0f3F000000; + sub.f32 %f86, %f618, %f3048; + div.rn.f32 %f87, %f86, %f3044; + mov.f32 %f619, 0f3F800000; + cvt.rzi.f32.f32 %f620, %f619; + add.f32 %f621, %f620, %f620; + mov.f32 %f622, 0f40000000; + sub.f32 %f623, %f622, %f621; + abs.f32 %f88, %f623; + setp.eq.f32 %p110, %f88, 0f3F800000; + abs.f32 %f89, %f87; + setp.lt.f32 %p111, %f89, 0f00800000; + mul.f32 %f624, %f89, 0f4B800000; + selp.f32 %f625, %f624, %f89, %p111; + selp.f32 %f626, 0fC3170000, 0fC2FE0000, %p111; + mov.b32 %r198, %f625; + and.b32 %r199, %r198, 8388607; + or.b32 %r200, %r199, 1065353216; + mov.b32 %f627, %r200; + shr.u32 %r201, %r198, 23; + cvt.rn.f32.u32 %f628, %r201; + add.f32 %f629, %f626, %f628; + setp.gt.f32 %p112, %f627, 0f3FB504F3; + mul.f32 %f630, %f627, 0f3F000000; + add.f32 %f631, %f629, 0f3F800000; + selp.f32 %f632, %f631, %f629, %p112; + selp.f32 %f633, %f630, %f627, %p112; + add.f32 %f634, %f633, 0fBF800000; + add.f32 %f635, %f633, 0f3F800000; + rcp.approx.ftz.f32 %f636, %f635; + add.f32 %f637, %f634, %f634; + mul.f32 %f638, %f637, %f636; + mul.f32 %f639, %f638, %f638; + mov.f32 %f640, 0f3C4CAF63; + mov.f32 %f641, 0f3B18F0FE; + fma.rn.f32 %f642, %f641, %f639, %f640; + mov.f32 %f643, 0f3DAAAABD; + fma.rn.f32 %f644, %f642, %f639, %f643; + mul.rn.f32 %f645, %f644, %f639; + mul.rn.f32 %f646, %f645, %f638; + sub.f32 %f647, %f634, %f638; + add.f32 %f648, %f647, %f647; + neg.f32 %f649, %f638; + fma.rn.f32 %f650, %f649, %f634, %f648; + mul.rn.f32 %f651, %f636, %f650; + add.f32 %f652, %f646, %f638; + sub.f32 %f653, %f638, %f652; + add.f32 %f654, %f646, %f653; + add.f32 %f655, %f651, %f654; + add.f32 %f656, %f652, %f655; + sub.f32 %f657, %f652, %f656; + add.f32 %f658, %f655, %f657; + mov.f32 %f659, 0f3F317200; + mul.rn.f32 %f660, %f632, %f659; + mov.f32 %f661, 0f35BFBE8E; + mul.rn.f32 %f662, %f632, %f661; + add.f32 %f663, %f660, %f656; + sub.f32 %f664, %f660, %f663; + add.f32 %f665, %f656, %f664; + add.f32 %f666, %f658, %f665; + add.f32 %f667, %f662, %f666; + add.f32 %f668, %f663, %f667; + sub.f32 %f669, %f663, %f668; + add.f32 %f670, %f667, %f669; + mul.rn.f32 %f671, %f622, %f668; + neg.f32 %f672, %f671; + fma.rn.f32 %f673, %f622, %f668, %f672; + fma.rn.f32 %f674, %f622, %f670, %f673; + fma.rn.f32 %f676, %f2820, %f668, %f674; + add.rn.f32 %f677, %f671, %f676; + neg.f32 %f678, %f677; + add.rn.f32 %f679, %f671, %f678; + add.rn.f32 %f680, %f679, %f676; + mov.b32 %r202, %f677; + setp.eq.s32 %p113, %r202, 1118925336; + add.s32 %r203, %r202, -1; + mov.b32 %f681, %r203; + add.f32 %f682, %f680, 0f37000000; + selp.f32 %f90, %f682, %f680, %p113; + selp.f32 %f683, %f681, %f677, %p113; + mov.f32 %f684, 0f3FB8AA3B; + mul.rn.f32 %f685, %f683, %f684; + cvt.rzi.f32.f32 %f686, %f685; + abs.f32 %f687, %f686; + setp.gt.f32 %p114, %f687, 0f42FC0000; + mov.b32 %r204, %f686; + and.b32 %r205, %r204, -2147483648; + or.b32 %r206, %r205, 1123811328; + mov.b32 %f688, %r206; + selp.f32 %f689, %f688, %f686, %p114; + mov.f32 %f690, 0fBF317218; + fma.rn.f32 %f691, %f689, %f690, %f683; + mov.f32 %f692, 0f3102E308; + fma.rn.f32 %f693, %f689, %f692, %f691; + mul.f32 %f694, %f693, 0f3FB8AA3B; + add.f32 %f695, %f689, 0f4B40007F; + mov.b32 %r207, %f695; + shl.b32 %r208, %r207, 23; + mov.b32 %f696, %r208; + ex2.approx.ftz.f32 %f697, %f694; + mul.f32 %f91, %f697, %f696; + setp.lt.f32 %p115, %f87, 0f00000000; + and.pred %p4, %p115, %p110; + div.rn.f32 %f92, %f82, %f3044; + abs.f32 %f93, %f92; + setp.lt.f32 %p116, %f93, 0f00800000; + mul.f32 %f698, %f93, 0f4B800000; + selp.f32 %f699, %f698, %f93, %p116; + selp.f32 %f700, 0fC3170000, 0fC2FE0000, %p116; + mov.b32 %r209, %f699; + and.b32 %r210, %r209, 8388607; + or.b32 %r211, %r210, 1065353216; + mov.b32 %f701, %r211; + shr.u32 %r212, %r209, 23; + cvt.rn.f32.u32 %f702, %r212; + add.f32 %f703, %f700, %f702; + setp.gt.f32 %p117, %f701, 0f3FB504F3; + mul.f32 %f704, %f701, 0f3F000000; + add.f32 %f705, %f703, 0f3F800000; + selp.f32 %f706, %f705, %f703, %p117; + selp.f32 %f707, %f704, %f701, %p117; + add.f32 %f708, %f707, 0fBF800000; + add.f32 %f709, %f707, 0f3F800000; + rcp.approx.ftz.f32 %f710, %f709; + add.f32 %f711, %f708, %f708; + mul.f32 %f712, %f711, %f710; + mul.f32 %f713, %f712, %f712; + fma.rn.f32 %f714, %f641, %f713, %f640; + fma.rn.f32 %f715, %f714, %f713, %f643; + mul.rn.f32 %f716, %f715, %f713; + mul.rn.f32 %f717, %f716, %f712; + sub.f32 %f718, %f708, %f712; + add.f32 %f719, %f718, %f718; + neg.f32 %f720, %f712; + fma.rn.f32 %f721, %f720, %f708, %f719; + mul.rn.f32 %f722, %f710, %f721; + add.f32 %f723, %f717, %f712; + sub.f32 %f724, %f712, %f723; + add.f32 %f725, %f717, %f724; + add.f32 %f726, %f722, %f725; + add.f32 %f727, %f723, %f726; + sub.f32 %f728, %f723, %f727; + add.f32 %f729, %f726, %f728; + mul.rn.f32 %f730, %f706, %f659; + mul.rn.f32 %f731, %f706, %f661; + add.f32 %f732, %f730, %f727; + sub.f32 %f733, %f730, %f732; + add.f32 %f734, %f727, %f733; + add.f32 %f735, %f729, %f734; + add.f32 %f736, %f731, %f735; + add.f32 %f737, %f732, %f736; + sub.f32 %f738, %f732, %f737; + add.f32 %f739, %f736, %f738; + mul.rn.f32 %f740, %f622, %f737; + neg.f32 %f741, %f740; + fma.rn.f32 %f742, %f622, %f737, %f741; + fma.rn.f32 %f743, %f622, %f739, %f742; + fma.rn.f32 %f744, %f2820, %f737, %f743; + add.rn.f32 %f745, %f740, %f744; + neg.f32 %f746, %f745; + add.rn.f32 %f747, %f740, %f746; + add.rn.f32 %f748, %f747, %f744; + mov.b32 %r213, %f745; + setp.eq.s32 %p118, %r213, 1118925336; + add.s32 %r214, %r213, -1; + mov.b32 %f749, %r214; + add.f32 %f750, %f748, 0f37000000; + selp.f32 %f94, %f750, %f748, %p118; + selp.f32 %f751, %f749, %f745, %p118; + mul.rn.f32 %f752, %f751, %f684; + cvt.rzi.f32.f32 %f753, %f752; + abs.f32 %f754, %f753; + setp.gt.f32 %p119, %f754, 0f42FC0000; + mov.b32 %r215, %f753; + and.b32 %r216, %r215, -2147483648; + or.b32 %r217, %r216, 1123811328; + mov.b32 %f755, %r217; + selp.f32 %f756, %f755, %f753, %p119; + fma.rn.f32 %f757, %f756, %f690, %f751; + fma.rn.f32 %f758, %f756, %f692, %f757; mul.f32 %f759, %f758, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f760, %f759; - add.f32 %f761, %f754, 0f00000000; - ex2.approx.f32 %f762, %f761; - mul.f32 %f763, %f760, %f762; - setp.gt.f32 %p39, %f751, 0f42D20000; - selp.f32 %f764, 0f00000000, %f763, %p39; - setp.lt.f32 %p40, %f751, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f764, %p40; - mov.f32 %f3244, %f3238; - mov.f32 %f3245, %f3239; - -BB3_32: - sub.s32 %r112, %r314, %r319; - cvt.rn.f32.s32 %f765, %r112; - mul.f32 %f766, %f765, %f765; - mul.f32 %f767, %f40, %f766; - neg.f32 %f768, %f767; - mul.f32 %f769, %f767, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f770, %f769; - fma.rn.f32 %f772, %f770, %f755, %f768; - fma.rn.f32 %f774, %f770, %f757, %f772; - mul.f32 %f775, %f774, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f776, %f775; - add.f32 %f777, %f770, 0f00000000; - ex2.approx.f32 %f778, %f777; - mul.f32 %f779, %f776, %f778; - setp.gt.f32 %p41, %f767, 0f42D20000; - selp.f32 %f780, 0f00000000, %f779, %p41; - setp.lt.f32 %p42, %f767, 0fC2D20000; - selp.f32 %f781, 0f7F800000, %f780, %p42; - mul.f32 %f782, %f75, %f781; - add.s32 %r113, %r319, %r20; - add.s32 %r114, %r113, %r4; - mul.wide.s32 %rd93, %r114, 4; - add.s64 %rd94, %rd1, %rd93; - ld.global.f32 %f783, [%rd94]; - fma.rn.f32 %f784, %f783, %f782, %f3245; - add.f32 %f785, %f3244, %f782; - add.s32 %r115, %r319, 1; - sub.s32 %r116, %r314, %r115; - cvt.rn.f32.s32 %f786, %r116; - mul.f32 %f787, %f786, %f786; - mul.f32 %f788, %f40, %f787; - neg.f32 %f789, %f788; - mul.f32 %f790, %f788, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f791, %f790; - fma.rn.f32 %f792, %f791, %f755, %f789; - fma.rn.f32 %f793, %f791, %f757, %f792; - mul.f32 %f794, %f793, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f795, %f794; - add.f32 %f796, %f791, 0f00000000; - ex2.approx.f32 %f797, %f796; - mul.f32 %f798, %f795, %f797; - setp.gt.f32 %p43, %f788, 0f42D20000; - selp.f32 %f799, 0f00000000, %f798, %p43; - setp.lt.f32 %p44, %f788, 0fC2D20000; - selp.f32 %f800, 0f7F800000, %f799, %p44; - mul.f32 %f801, %f75, %f800; - ld.global.f32 %f802, [%rd94+4]; - fma.rn.f32 %f803, %f802, %f801, %f784; - add.f32 %f804, %f785, %f801; - add.s32 %r117, %r319, 2; - sub.s32 %r118, %r314, %r117; - cvt.rn.f32.s32 %f805, %r118; - mul.f32 %f806, %f805, %f805; - mul.f32 %f807, %f40, %f806; + add.f32 %f760, %f756, 0f4B40007F; + mov.b32 %r218, %f760; + shl.b32 %r219, %r218, 23; + mov.b32 %f761, %r219; + ex2.approx.ftz.f32 %f762, %f759; + mul.f32 %f95, %f762, %f761; + cvt.f64.f32 %fd338, %f3044; + { + .reg .b32 %temp; + mov.b64 {%temp, %r53}, %fd338; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r54}, %fd339; + } + and.b32 %r55, %r54, 2146435072; + setp.eq.s32 %p121, %r55, 1073741824; + abs.f64 %fd340, %fd338; + { // callseq 69, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd340; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd36, [retval0+0]; + } // callseq 69 + setp.lt.s32 %p122, %r53, 0; + and.pred %p6, %p122, %p121; + setp.lt.s32 %p123, %r54, 0; + add.f64 %fd341, %fd338, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r220}, %fd341; + } + and.b32 %r56, %r220, 2146435072; + setp.ne.s32 %p124, %r56, 2146435072; + setp.gtu.f64 %p125, %fd340, 0d7FF0000000000000; + and.b32 %r57, %r54, 2147483647; + setp.gt.f64 %p126, %fd340, 0d3FF0000000000000; + selp.b32 %r221, 2146435072, 0, %p126; + xor.b32 %r222, %r221, 2146435072; + selp.b32 %r223, %r222, %r221, %p123; + setp.eq.f32 %p127, %f3044, 0fBF800000; + selp.b32 %r58, 1072693248, %r223, %p127; + setp.gt.s32 %p128, %r54, -1; + selp.b32 %r59, 2146435072, 0, %p128; + cvt.f64.f32 %fd342, %f3043; + { + .reg .b32 %temp; + mov.b64 {%temp, %r60}, %fd342; + } + abs.f64 %fd343, %fd342; + { // callseq 70, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd343; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd37, [retval0+0]; + } // callseq 70 + setp.lt.s32 %p129, %r60, 0; + and.pred %p7, %p129, %p121; + add.f64 %fd344, %fd342, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r224}, %fd344; + } + and.b32 %r61, %r224, 2146435072; + setp.ne.s32 %p130, %r61, 2146435072; + setp.gtu.f64 %p131, %fd343, 0d7FF0000000000000; + add.f32 %f763, %f580, 0f3F800000; + sub.f32 %f764, %f763, %f3048; + div.rn.f32 %f96, %f764, %f3044; + abs.f32 %f97, %f96; + setp.lt.f32 %p132, %f97, 0f00800000; + mul.f32 %f765, %f97, 0f4B800000; + selp.f32 %f766, %f765, %f97, %p132; + selp.f32 %f767, 0fC3170000, 0fC2FE0000, %p132; + mov.b32 %r225, %f766; + and.b32 %r226, %r225, 8388607; + or.b32 %r227, %r226, 1065353216; + mov.b32 %f768, %r227; + shr.u32 %r228, %r225, 23; + cvt.rn.f32.u32 %f769, %r228; + add.f32 %f770, %f767, %f769; + setp.gt.f32 %p133, %f768, 0f3FB504F3; + mul.f32 %f771, %f768, 0f3F000000; + add.f32 %f772, %f770, 0f3F800000; + selp.f32 %f773, %f772, %f770, %p133; + selp.f32 %f774, %f771, %f768, %p133; + add.f32 %f775, %f774, 0fBF800000; + add.f32 %f776, %f774, 0f3F800000; + rcp.approx.ftz.f32 %f777, %f776; + add.f32 %f778, %f775, %f775; + mul.f32 %f779, %f778, %f777; + mul.f32 %f780, %f779, %f779; + fma.rn.f32 %f781, %f641, %f780, %f640; + fma.rn.f32 %f782, %f781, %f780, %f643; + mul.rn.f32 %f783, %f782, %f780; + mul.rn.f32 %f784, %f783, %f779; + sub.f32 %f785, %f775, %f779; + add.f32 %f786, %f785, %f785; + neg.f32 %f787, %f779; + fma.rn.f32 %f788, %f787, %f775, %f786; + mul.rn.f32 %f789, %f777, %f788; + add.f32 %f790, %f784, %f779; + sub.f32 %f791, %f779, %f790; + add.f32 %f792, %f784, %f791; + add.f32 %f793, %f789, %f792; + add.f32 %f794, %f790, %f793; + sub.f32 %f795, %f790, %f794; + add.f32 %f796, %f793, %f795; + mul.rn.f32 %f797, %f773, %f659; + mul.rn.f32 %f798, %f773, %f661; + add.f32 %f799, %f797, %f794; + sub.f32 %f800, %f797, %f799; + add.f32 %f801, %f794, %f800; + add.f32 %f802, %f796, %f801; + add.f32 %f803, %f798, %f802; + add.f32 %f804, %f799, %f803; + sub.f32 %f805, %f799, %f804; + add.f32 %f806, %f803, %f805; + mul.rn.f32 %f807, %f622, %f804; neg.f32 %f808, %f807; - mul.f32 %f809, %f807, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f810, %f809; - fma.rn.f32 %f811, %f810, %f755, %f808; - fma.rn.f32 %f812, %f810, %f757, %f811; - mul.f32 %f813, %f812, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f814, %f813; - add.f32 %f815, %f810, 0f00000000; - ex2.approx.f32 %f816, %f815; - mul.f32 %f817, %f814, %f816; - setp.gt.f32 %p45, %f807, 0f42D20000; - selp.f32 %f818, 0f00000000, %f817, %p45; - setp.lt.f32 %p46, %f807, 0fC2D20000; - selp.f32 %f819, 0f7F800000, %f818, %p46; - mul.f32 %f820, %f75, %f819; - ld.global.f32 %f821, [%rd94+8]; - fma.rn.f32 %f822, %f821, %f820, %f803; - add.f32 %f823, %f804, %f820; - add.s32 %r119, %r319, 3; - sub.s32 %r120, %r314, %r119; - cvt.rn.f32.s32 %f824, %r120; - mul.f32 %f825, %f824, %f824; - mul.f32 %f826, %f40, %f825; - neg.f32 %f827, %f826; - mul.f32 %f828, %f826, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f829, %f828; - fma.rn.f32 %f830, %f829, %f755, %f827; - fma.rn.f32 %f831, %f829, %f757, %f830; - mul.f32 %f832, %f831, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f833, %f832; - add.f32 %f834, %f829, 0f00000000; - ex2.approx.f32 %f835, %f834; - mul.f32 %f836, %f833, %f835; - setp.gt.f32 %p47, %f826, 0f42D20000; - selp.f32 %f837, 0f00000000, %f836, %p47; - setp.lt.f32 %p48, %f826, 0fC2D20000; - selp.f32 %f838, 0f7F800000, %f837, %p48; - mul.f32 %f839, %f75, %f838; - ld.global.f32 %f840, [%rd94+12]; - fma.rn.f32 %f3245, %f840, %f839, %f822; - add.f32 %f3244, %f823, %f839; - add.s32 %r319, %r319, 4; - setp.lt.s32 %p49, %r319, %r71; - @%p49 bra BB3_32; - -BB3_33: - add.s32 %r315, %r315, 1; - setp.lt.s32 %p50, %r315, %r71; - @%p50 bra BB3_21; - - div.rn.f32 %f841, %f3245, %f3244; - max.f32 %f3227, %f3227, %f841; - min.f32 %f3226, %f3226, %f841; - add.s32 %r314, %r314, 1; - setp.lt.s32 %p51, %r314, %r71; - @%p51 bra BB3_20; - - add.s32 %r313, %r313, 1; - setp.lt.s32 %p52, %r313, %r71; - @%p52 bra BB3_19; - -BB3_36: - sub.f32 %f842, %f3227, %f3226; - add.f32 %f843, %f842, %f842; - fma.rn.f32 %f844, %f842, 0f40000000, %f843; - mul.f32 %f845, %f844, 0f40490FD8; - mul.f32 %f846, %f845, %f3316; - mul.f32 %f847, %f846, %f3316; - max.f32 %f3319, %f617, %f847; - setp.lt.s32 %p53, %r72, 1; - @%p53 bra BB3_37; - - cvt.rn.f32.s32 %f849, %r71; - mul.f32 %f87, %f849, 0f3F000000; - mov.u32 %r320, 0; - mov.f32 %f3317, %f3316; - -BB3_39: - mov.f32 %f3266, 0f00000000; - mov.f32 %f3267, %f3266; - mov.f32 %f3268, %f3266; - mov.f32 %f3269, %f3266; - mov.f32 %f3270, %f3266; - mov.f32 %f3271, %f3266; - mov.f32 %f3272, %f3266; - mov.f32 %f3273, %f3266; - mov.f32 %f3274, %f3266; - mov.f32 %f3275, %f3266; - mov.f32 %f3276, %f3266; - mov.f32 %f3277, %f3266; - @%p15 bra BB3_120; - - div.rn.f32 %f875, %f648, %f3317; - div.rn.f32 %f94, %f875, %f3317; - div.rn.f32 %f876, %f648, %f3316; - div.rn.f32 %f95, %f876, %f3316; - neg.f32 %f877, %f3319; - div.rn.f32 %f878, %f877, 0f40206C98; - div.rn.f32 %f96, %f878, %f3317; - div.rn.f32 %f97, %f878, %f3316; - div.rn.f32 %f98, %f96, %f3317; - mov.f32 %f879, 0fC0000000; - div.rn.f32 %f99, %f879, %f3317; - div.rn.f32 %f100, %f97, %f3316; - div.rn.f32 %f101, %f879, %f3316; - mul.f32 %f880, %f3317, %f3317; - mul.f32 %f881, %f3317, %f880; - div.rn.f32 %f102, %f878, %f881; - mul.f32 %f882, %f3316, %f3316; - mul.f32 %f883, %f3316, %f882; - div.rn.f32 %f103, %f878, %f883; - mul.f32 %f884, %f880, %f880; - mul.f32 %f885, %f3317, %f884; - div.rn.f32 %f886, %f3319, 0f40206C98; - div.rn.f32 %f104, %f886, %f885; - mul.f32 %f887, %f882, %f882; - mul.f32 %f888, %f3316, %f887; - div.rn.f32 %f105, %f886, %f888; - mov.u32 %r321, 0; - mov.f32 %f3266, 0f00000000; - mov.f32 %f3267, %f3266; - mov.f32 %f3268, %f3266; - mov.f32 %f3269, %f3266; - mov.f32 %f3270, %f3266; - mov.f32 %f3271, %f3266; - mov.f32 %f3272, %f3266; - mov.f32 %f3273, %f3266; - mov.f32 %f3274, %f3266; - mov.f32 %f3275, %f3266; - mov.f32 %f3276, %f3266; - mov.f32 %f3277, %f3266; - -BB3_41: - mov.u32 %r322, 0; - cvt.rn.f32.s32 %f889, %r321; - sub.f32 %f118, %f889, %f3321; - add.f32 %f119, %f118, 0f3F800000; - sqrt.rn.f32 %f890, %f94; - mul.f32 %f120, %f119, %f890; - abs.f32 %f121, %f120; - mul.f32 %f122, %f120, %f120; - mul.f32 %f123, %f118, %f890; - abs.f32 %f124, %f123; - sqrt.rn.f32 %f126, %f95; - add.f32 %f891, %f889, 0f3F800000; - sub.f32 %f127, %f891, %f3321; - div.rn.f32 %f128, %f127, %f3317; - mov.f32 %f892, 0f3F800000; - cvt.rzi.f32.f32 %f893, %f892; - add.f32 %f894, %f893, %f893; - mov.f32 %f895, 0f40000000; - sub.f32 %f896, %f895, %f894; - abs.f32 %f129, %f896; - setp.eq.f32 %p55, %f129, 0f3F800000; - abs.f32 %f130, %f128; - setp.lt.f32 %p56, %f130, 0f00800000; - mul.f32 %f897, %f130, 0f4B800000; - selp.f32 %f898, 0fC3170000, 0fC2FE0000, %p56; - selp.f32 %f899, %f897, %f130, %p56; - mov.b32 %r124, %f899; - and.b32 %r125, %r124, 8388607; - or.b32 %r126, %r125, 1065353216; - mov.b32 %f900, %r126; - shr.u32 %r127, %r124, 23; - cvt.rn.f32.u32 %f901, %r127; - add.f32 %f902, %f898, %f901; - setp.gt.f32 %p57, %f900, 0f3FB504F3; - mul.f32 %f903, %f900, 0f3F000000; - add.f32 %f904, %f902, 0f3F800000; - selp.f32 %f905, %f903, %f900, %p57; - selp.f32 %f906, %f904, %f902, %p57; - add.f32 %f131, %f905, 0fBF800000; - add.f32 %f132, %f905, 0f3F800000; - add.f32 %f133, %f131, %f131; - mov.f32 %f907, 0f3F317200; - mul.rn.f32 %f134, %f906, %f907; - mov.f32 %f908, 0f35BFBE8E; - mul.rn.f32 %f135, %f906, %f908; - setp.lt.f32 %p58, %f128, 0f00000000; - and.pred %p1, %p58, %p55; - add.f32 %f909, %f128, %f128; - selp.f32 %f136, %f909, 0f00000000, %p55; - div.rn.f32 %f137, %f118, %f3317; - abs.f32 %f138, %f137; - setp.lt.f32 %p59, %f138, 0f00800000; - mul.f32 %f911, %f138, 0f4B800000; - selp.f32 %f912, 0fC3170000, 0fC2FE0000, %p59; - selp.f32 %f913, %f911, %f138, %p59; - mov.b32 %r128, %f913; - and.b32 %r129, %r128, 8388607; - or.b32 %r130, %r129, 1065353216; - mov.b32 %f914, %r130; - shr.u32 %r131, %r128, 23; - cvt.rn.f32.u32 %f915, %r131; - add.f32 %f916, %f912, %f915; - setp.gt.f32 %p60, %f914, 0f3FB504F3; - mul.f32 %f917, %f914, 0f3F000000; - add.f32 %f918, %f916, 0f3F800000; - selp.f32 %f919, %f917, %f914, %p60; - selp.f32 %f920, %f918, %f916, %p60; - add.f32 %f139, %f919, 0fBF800000; - add.f32 %f140, %f919, 0f3F800000; - add.f32 %f141, %f139, %f139; - mul.rn.f32 %f142, %f920, %f907; - mul.rn.f32 %f143, %f920, %f908; - setp.lt.f32 %p61, %f137, 0f00000000; - and.pred %p2, %p61, %p55; - add.f32 %f921, %f137, %f137; - selp.f32 %f144, %f921, 0f00000000, %p55; - mul.f32 %f923, %f119, %f119; - mul.f32 %f145, %f119, %f923; - -BB3_42: - setp.ltu.f32 %p62, %f121, 0f3F800000; - @%p62 bra BB3_44; - bra.uni BB3_43; - -BB3_44: - mov.f32 %f943, 0f3BA0C9F8; - mov.f32 %f944, 0fBA1268FB; - fma.rn.f32 %f945, %f944, %f122, %f943; - mov.f32 %f946, 0fBCDABFD4; - fma.rn.f32 %f947, %f945, %f122, %f946; - mov.f32 %f948, 0f3DE70331; - fma.rn.f32 %f949, %f947, %f122, %f948; - mov.f32 %f950, 0fBEC09330; - fma.rn.f32 %f951, %f949, %f122, %f950; - mov.f32 %f952, 0f3F906EBA; - fma.rn.f32 %f953, %f951, %f122, %f952; - mul.f32 %f3278, %f120, %f953; - bra.uni BB3_45; - -BB3_43: - mov.f32 %f3036, 0f3F800000; - setp.ltu.f32 %p63, %f121, 0f407AD445; - mov.f32 %f925, 0f3A03BB71; - mov.f32 %f926, 0fB7B730FB; - fma.rn.f32 %f927, %f926, %f121, %f925; - mov.f32 %f928, 0fBBACA3B3; - fma.rn.f32 %f929, %f927, %f121, %f928; - mov.f32 %f930, 0f3D0A7445; - fma.rn.f32 %f931, %f929, %f121, %f930; - mov.f32 %f932, 0fBE1B3B75; - fma.rn.f32 %f933, %f931, %f121, %f932; - mov.f32 %f934, 0fBF6B385A; - fma.rn.f32 %f935, %f933, %f121, %f934; - mov.f32 %f936, 0fBFD0316E; - fma.rn.f32 %f937, %f935, %f121, %f936; - mov.f32 %f938, 0fBA031CCE; - fma.rn.f32 %f939, %f937, %f121, %f938; - ex2.approx.ftz.f32 %f940, %f939; - sub.f32 %f942, %f3036, %f940; - mov.b32 %r132, %f942; - selp.b32 %r133, %r132, 1065353216, %p63; - mov.b32 %r134, %f120; - and.b32 %r135, %r134, -2147483648; - or.b32 %r136, %r133, %r135; - mov.b32 %f3278, %r136; - -BB3_45: - setp.ltu.f32 %p64, %f124, 0f3F800000; - @%p64 bra BB3_47; - bra.uni BB3_46; - -BB3_47: - cvt.rn.f32.s32 %f3077, %r321; - sub.f32 %f3076, %f3077, %f3321; - mul.f32 %f3075, %f3076, %f890; - mul.f32 %f3074, %f3075, %f3075; - mov.f32 %f972, 0f3BA0C9F8; - mov.f32 %f973, 0fBA1268FB; - fma.rn.f32 %f974, %f973, %f3074, %f972; - mov.f32 %f975, 0fBCDABFD4; - fma.rn.f32 %f976, %f974, %f3074, %f975; - mov.f32 %f977, 0f3DE70331; - fma.rn.f32 %f978, %f976, %f3074, %f977; - mov.f32 %f979, 0fBEC09330; - fma.rn.f32 %f980, %f978, %f3074, %f979; - mov.f32 %f981, 0f3F906EBA; - fma.rn.f32 %f982, %f980, %f3074, %f981; - mul.f32 %f3279, %f3075, %f982; - bra.uni BB3_48; - -BB3_46: - cvt.rn.f32.s32 %f3090, %r321; - sub.f32 %f3089, %f3090, %f3321; - mul.f32 %f3088, %f3089, %f890; - mov.f32 %f3037, 0f3F800000; - setp.ltu.f32 %p65, %f124, 0f407AD445; - mov.f32 %f954, 0f3A03BB71; - mov.f32 %f955, 0fB7B730FB; - fma.rn.f32 %f956, %f955, %f124, %f954; - mov.f32 %f957, 0fBBACA3B3; - fma.rn.f32 %f958, %f956, %f124, %f957; - mov.f32 %f959, 0f3D0A7445; - fma.rn.f32 %f960, %f958, %f124, %f959; - mov.f32 %f961, 0fBE1B3B75; - fma.rn.f32 %f962, %f960, %f124, %f961; - mov.f32 %f963, 0fBF6B385A; - fma.rn.f32 %f964, %f962, %f124, %f963; - mov.f32 %f965, 0fBFD0316E; - fma.rn.f32 %f966, %f964, %f124, %f965; - mov.f32 %f967, 0fBA031CCE; - fma.rn.f32 %f968, %f966, %f124, %f967; - ex2.approx.ftz.f32 %f969, %f968; - sub.f32 %f971, %f3037, %f969; - mov.b32 %r137, %f971; - selp.b32 %r138, %r137, 1065353216, %p65; - mov.b32 %r139, %f3088; - and.b32 %r140, %r139, -2147483648; - or.b32 %r141, %r138, %r140; - mov.b32 %f3279, %r141; - -BB3_48: - sub.f32 %f983, %f3278, %f3279; - mul.f32 %f165, %f983, 0f3F000000; - cvt.rn.f32.s32 %f166, %r322; - sub.f32 %f167, %f166, %f3320; - add.f32 %f168, %f167, 0f3F800000; - mul.f32 %f169, %f168, %f126; - abs.f32 %f170, %f169; - setp.ltu.f32 %p66, %f170, 0f3F800000; - @%p66 bra BB3_50; - bra.uni BB3_49; - -BB3_50: - mul.f32 %f1002, %f169, %f169; - mov.f32 %f1003, 0f3BA0C9F8; - mov.f32 %f1004, 0fBA1268FB; - fma.rn.f32 %f1005, %f1004, %f1002, %f1003; - mov.f32 %f1006, 0fBCDABFD4; - fma.rn.f32 %f1007, %f1005, %f1002, %f1006; - mov.f32 %f1008, 0f3DE70331; - fma.rn.f32 %f1009, %f1007, %f1002, %f1008; - mov.f32 %f1010, 0fBEC09330; - fma.rn.f32 %f1011, %f1009, %f1002, %f1010; - mov.f32 %f1012, 0f3F906EBA; - fma.rn.f32 %f1013, %f1011, %f1002, %f1012; - mul.f32 %f3280, %f169, %f1013; - bra.uni BB3_51; - -BB3_49: - mov.f32 %f3038, 0f3F800000; - mov.f32 %f984, 0f3A03BB71; - mov.f32 %f985, 0fB7B730FB; - fma.rn.f32 %f986, %f985, %f170, %f984; - mov.f32 %f987, 0fBBACA3B3; - fma.rn.f32 %f988, %f986, %f170, %f987; - mov.f32 %f989, 0f3D0A7445; - fma.rn.f32 %f990, %f988, %f170, %f989; - mov.f32 %f991, 0fBE1B3B75; - fma.rn.f32 %f992, %f990, %f170, %f991; - mov.f32 %f993, 0fBF6B385A; - fma.rn.f32 %f994, %f992, %f170, %f993; - mov.f32 %f995, 0fBFD0316E; - fma.rn.f32 %f996, %f994, %f170, %f995; - mov.f32 %f997, 0fBA031CCE; - fma.rn.f32 %f998, %f996, %f170, %f997; - ex2.approx.ftz.f32 %f999, %f998; - sub.f32 %f1001, %f3038, %f999; - mov.b32 %r142, %f1001; - setp.ltu.f32 %p67, %f170, 0f407AD445; - selp.b32 %r143, %r142, 1065353216, %p67; - mov.b32 %r144, %f169; - and.b32 %r145, %r144, -2147483648; - or.b32 %r146, %r143, %r145; - mov.b32 %f3280, %r146; - -BB3_51: - cvt.rn.f32.s32 %f3092, %r322; - sub.f32 %f3091, %f3092, %f3320; - mul.f32 %f174, %f3091, %f126; - abs.f32 %f175, %f174; - setp.ltu.f32 %p68, %f175, 0f3F800000; - @%p68 bra BB3_53; - bra.uni BB3_52; - -BB3_53: - mul.f32 %f1032, %f174, %f174; - mov.f32 %f1033, 0f3BA0C9F8; - mov.f32 %f1034, 0fBA1268FB; - fma.rn.f32 %f1035, %f1034, %f1032, %f1033; - mov.f32 %f1036, 0fBCDABFD4; - fma.rn.f32 %f1037, %f1035, %f1032, %f1036; - mov.f32 %f1038, 0f3DE70331; - fma.rn.f32 %f1039, %f1037, %f1032, %f1038; - mov.f32 %f1040, 0fBEC09330; - fma.rn.f32 %f1041, %f1039, %f1032, %f1040; - mov.f32 %f1042, 0f3F906EBA; - fma.rn.f32 %f1043, %f1041, %f1032, %f1042; - mul.f32 %f3281, %f174, %f1043; - bra.uni BB3_54; - -BB3_52: - mov.f32 %f3039, 0f3F800000; - mov.f32 %f1014, 0f3A03BB71; - mov.f32 %f1015, 0fB7B730FB; - fma.rn.f32 %f1016, %f1015, %f175, %f1014; - mov.f32 %f1017, 0fBBACA3B3; - fma.rn.f32 %f1018, %f1016, %f175, %f1017; - mov.f32 %f1019, 0f3D0A7445; - fma.rn.f32 %f1020, %f1018, %f175, %f1019; - mov.f32 %f1021, 0fBE1B3B75; - fma.rn.f32 %f1022, %f1020, %f175, %f1021; - mov.f32 %f1023, 0fBF6B385A; - fma.rn.f32 %f1024, %f1022, %f175, %f1023; - mov.f32 %f1025, 0fBFD0316E; - fma.rn.f32 %f1026, %f1024, %f175, %f1025; - mov.f32 %f1027, 0fBA031CCE; - fma.rn.f32 %f1028, %f1026, %f175, %f1027; - ex2.approx.ftz.f32 %f1029, %f1028; - sub.f32 %f1031, %f3039, %f1029; - mov.b32 %r147, %f1031; - setp.ltu.f32 %p69, %f175, 0f407AD445; - selp.b32 %r148, %r147, 1065353216, %p69; - mov.b32 %r149, %f174; - and.b32 %r150, %r149, -2147483648; - or.b32 %r151, %r148, %r150; - mov.b32 %f3281, %r151; - -BB3_54: - sub.f32 %f1046, %f3280, %f3281; - mul.f32 %f179, %f1046, 0f3F000000; - mul.f32 %f1047, %f165, %f3319; - fma.rn.f32 %f180, %f179, %f1047, %f3226; - mad.lo.s32 %r152, %r322, %r71, %r321; - add.s32 %r153, %r152, %r4; - mul.wide.s32 %rd96, %r153, 4; - add.s64 %rd97, %rd1, %rd96; - ld.global.f32 %f181, [%rd97]; - // inline asm - rcp.approx.ftz.f32 %f1044,%f132; - // inline asm - mul.f32 %f1048, %f1044, %f133; - mul.f32 %f1049, %f1048, %f1048; - mov.f32 %f1050, 0f3C4CAF63; - mov.f32 %f1051, 0f3B18F0FE; - fma.rn.f32 %f1052, %f1051, %f1049, %f1050; - mov.f32 %f1053, 0f3DAAAABD; - fma.rn.f32 %f1054, %f1052, %f1049, %f1053; - mul.rn.f32 %f1055, %f1054, %f1049; - mul.rn.f32 %f1056, %f1055, %f1048; - sub.f32 %f1057, %f131, %f1048; - neg.f32 %f1058, %f1048; - add.f32 %f1059, %f1057, %f1057; - fma.rn.f32 %f1060, %f1058, %f131, %f1059; - mul.rn.f32 %f1061, %f1044, %f1060; - add.f32 %f1062, %f1056, %f1048; - sub.f32 %f1063, %f1048, %f1062; - add.f32 %f1064, %f1056, %f1063; - add.f32 %f1065, %f1061, %f1064; - add.f32 %f1066, %f1062, %f1065; - sub.f32 %f1067, %f1062, %f1066; - add.f32 %f1068, %f1065, %f1067; - add.f32 %f1069, %f134, %f1066; - sub.f32 %f1070, %f134, %f1069; - add.f32 %f1071, %f1066, %f1070; - add.f32 %f1072, %f1068, %f1071; - add.f32 %f1073, %f135, %f1072; - add.f32 %f1074, %f1069, %f1073; - sub.f32 %f1075, %f1069, %f1074; - add.f32 %f1076, %f1073, %f1075; - mul.rn.f32 %f1078, %f895, %f1074; - neg.f32 %f1079, %f1078; - fma.rn.f32 %f1080, %f895, %f1074, %f1079; - fma.rn.f32 %f1081, %f895, %f1076, %f1080; - mov.f32 %f1082, 0f00000000; - fma.rn.f32 %f1083, %f1082, %f1074, %f1081; - add.rn.f32 %f1084, %f1078, %f1083; - neg.f32 %f1085, %f1084; - add.rn.f32 %f1086, %f1078, %f1085; - add.rn.f32 %f1087, %f1086, %f1083; - mov.b32 %r154, %f1084; - setp.eq.s32 %p70, %r154, 1118925336; - add.s32 %r155, %r154, -1; - mov.b32 %f1088, %r155; - add.f32 %f1089, %f1087, 0f37000000; - selp.f32 %f1090, %f1088, %f1084, %p70; - selp.f32 %f182, %f1089, %f1087, %p70; - mul.f32 %f1091, %f1090, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1092, %f1091; - mov.f32 %f1093, 0fBF317200; - fma.rn.f32 %f1094, %f1092, %f1093, %f1090; - mov.f32 %f1095, 0fB5BFBE8E; - fma.rn.f32 %f1096, %f1092, %f1095, %f1094; - mul.f32 %f1097, %f1096, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1098, %f1097; - add.f32 %f1099, %f1092, 0f00000000; - ex2.approx.f32 %f1100, %f1099; - mul.f32 %f1101, %f1098, %f1100; - setp.lt.f32 %p71, %f1090, 0fC2D20000; - selp.f32 %f1102, 0f00000000, %f1101, %p71; - setp.gt.f32 %p72, %f1090, 0f42D20000; - selp.f32 %f3282, 0f7F800000, %f1102, %p72; - setp.eq.f32 %p73, %f3282, 0f7F800000; - @%p73 bra BB3_56; - - fma.rn.f32 %f3282, %f3282, %f182, %f3282; - -BB3_56: - setp.geu.f32 %p350, %f128, 0f00000000; - mov.b32 %r156, %f3282; - xor.b32 %r157, %r156, -2147483648; - mov.b32 %f1103, %r157; - selp.f32 %f186, %f1103, %f3282, %p1; - setp.eq.f32 %p74, %f128, 0f00000000; - selp.f32 %f3283, %f136, %f186, %p74; - @%p350 bra BB3_58; - - cvt.rzi.f32.f32 %f1105, %f895; - setp.neu.f32 %p75, %f1105, 0f40000000; - selp.f32 %f3283, 0f7FFFFFFF, %f186, %p75; - -BB3_58: - abs.f32 %f3047, %f128; - add.f32 %f3046, %f3047, 0f40000000; - mov.b32 %r296, %f3046; - mov.f32 %f3045, 0f00000000; - mov.f32 %f3044, 0f3DAAAABD; - mov.f32 %f3043, 0f3C4CAF63; - mov.f32 %f3042, 0f3B18F0FE; - mov.f32 %f3041, 0fB5BFBE8E; - mov.f32 %f3040, 0fBF317200; - add.f32 %f1108, %f128, 0f40000000; - setp.gtu.f32 %p76, %f3047, 0f7F800000; - selp.f32 %f1109, %f1108, %f3283, %p76; - selp.f32 %f1110, 0fFF800000, 0f7F800000, %p1; - setp.neu.f32 %p77, %f3047, 0f7F800000; - selp.f32 %f1111, %f1109, %f1110, %p77; - setp.gt.s32 %p78, %r296, 2139095039; - selp.f32 %f1112, %f1111, %f3283, %p78; - mul.f32 %f1113, %f1112, 0fBF000000; - setp.eq.f32 %p79, %f128, 0f3F800000; - selp.f32 %f1114, 0fBF000000, %f1113, %p79; - mul.f32 %f1115, %f1114, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1116, %f1115; - fma.rn.f32 %f1118, %f1116, %f3040, %f1114; - fma.rn.f32 %f1120, %f1116, %f3041, %f1118; - mul.f32 %f1121, %f1120, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1122, %f1121; - add.f32 %f1123, %f1116, 0f00000000; - ex2.approx.f32 %f1124, %f1123; - mul.f32 %f1125, %f1122, %f1124; - setp.lt.f32 %p80, %f1114, 0fC2D20000; - selp.f32 %f1126, 0f00000000, %f1125, %p80; - setp.gt.f32 %p81, %f1114, 0f42D20000; - selp.f32 %f190, 0f7F800000, %f1126, %p81; - // inline asm - rcp.approx.ftz.f32 %f1106,%f140; - // inline asm - mul.f32 %f1127, %f1106, %f141; + fma.rn.f32 %f809, %f622, %f804, %f808; + fma.rn.f32 %f810, %f622, %f806, %f809; + fma.rn.f32 %f811, %f2820, %f804, %f810; + add.rn.f32 %f812, %f807, %f811; + neg.f32 %f813, %f812; + add.rn.f32 %f814, %f807, %f813; + add.rn.f32 %f815, %f814, %f811; + mov.b32 %r229, %f812; + setp.eq.s32 %p134, %r229, 1118925336; + add.s32 %r230, %r229, -1; + mov.b32 %f816, %r230; + add.f32 %f817, %f815, 0f37000000; + selp.f32 %f98, %f817, %f815, %p134; + selp.f32 %f818, %f816, %f812, %p134; + mul.rn.f32 %f819, %f818, %f684; + cvt.rzi.f32.f32 %f820, %f819; + abs.f32 %f821, %f820; + setp.gt.f32 %p135, %f821, 0f42FC0000; + mov.b32 %r231, %f820; + and.b32 %r232, %r231, -2147483648; + or.b32 %r233, %r232, 1123811328; + mov.b32 %f822, %r233; + selp.f32 %f823, %f822, %f820, %p135; + fma.rn.f32 %f824, %f823, %f690, %f818; + fma.rn.f32 %f825, %f823, %f692, %f824; + mul.f32 %f826, %f825, 0f3FB8AA3B; + add.f32 %f827, %f823, 0f4B40007F; + mov.b32 %r234, %f827; + shl.b32 %r235, %r234, 23; + mov.b32 %f828, %r235; + ex2.approx.ftz.f32 %f829, %f826; + mul.f32 %f99, %f829, %f828; + setp.gt.f64 %p137, %fd343, 0d3FF0000000000000; + selp.b32 %r236, 2146435072, 0, %p137; + xor.b32 %r237, %r236, 2146435072; + selp.b32 %r238, %r237, %r236, %p123; + setp.eq.f32 %p138, %f3043, 0fBF800000; + selp.b32 %r62, 1072693248, %r238, %p138; + div.rn.f32 %f100, %f78, %f3044; + abs.f32 %f101, %f100; + setp.lt.f32 %p139, %f101, 0f00800000; + mul.f32 %f830, %f101, 0f4B800000; + selp.f32 %f831, %f830, %f101, %p139; + selp.f32 %f832, 0fC3170000, 0fC2FE0000, %p139; + mov.b32 %r239, %f831; + and.b32 %r240, %r239, 8388607; + or.b32 %r241, %r240, 1065353216; + mov.b32 %f833, %r241; + shr.u32 %r242, %r239, 23; + cvt.rn.f32.u32 %f834, %r242; + add.f32 %f835, %f832, %f834; + setp.gt.f32 %p140, %f833, 0f3FB504F3; + mul.f32 %f836, %f833, 0f3F000000; + add.f32 %f837, %f835, 0f3F800000; + selp.f32 %f838, %f837, %f835, %p140; + selp.f32 %f839, %f836, %f833, %p140; + add.f32 %f840, %f839, 0fBF800000; + add.f32 %f841, %f839, 0f3F800000; + rcp.approx.ftz.f32 %f842, %f841; + add.f32 %f843, %f840, %f840; + mul.f32 %f844, %f843, %f842; + mul.f32 %f845, %f844, %f844; + fma.rn.f32 %f846, %f641, %f845, %f640; + fma.rn.f32 %f847, %f846, %f845, %f643; + mul.rn.f32 %f848, %f847, %f845; + mul.rn.f32 %f849, %f848, %f844; + sub.f32 %f850, %f840, %f844; + add.f32 %f851, %f850, %f850; + neg.f32 %f852, %f844; + fma.rn.f32 %f853, %f852, %f840, %f851; + mul.rn.f32 %f854, %f842, %f853; + add.f32 %f855, %f849, %f844; + sub.f32 %f856, %f844, %f855; + add.f32 %f857, %f849, %f856; + add.f32 %f858, %f854, %f857; + add.f32 %f859, %f855, %f858; + sub.f32 %f860, %f855, %f859; + add.f32 %f861, %f858, %f860; + mul.rn.f32 %f862, %f838, %f659; + mul.rn.f32 %f863, %f838, %f661; + add.f32 %f864, %f862, %f859; + sub.f32 %f865, %f862, %f864; + add.f32 %f866, %f859, %f865; + add.f32 %f867, %f861, %f866; + add.f32 %f868, %f863, %f867; + add.f32 %f869, %f864, %f868; + sub.f32 %f870, %f864, %f869; + add.f32 %f871, %f868, %f870; + mul.rn.f32 %f872, %f622, %f869; + neg.f32 %f873, %f872; + fma.rn.f32 %f874, %f622, %f869, %f873; + fma.rn.f32 %f875, %f622, %f871, %f874; + fma.rn.f32 %f876, %f2820, %f869, %f875; + add.rn.f32 %f877, %f872, %f876; + neg.f32 %f878, %f877; + add.rn.f32 %f879, %f872, %f878; + add.rn.f32 %f880, %f879, %f876; + mov.b32 %r243, %f877; + setp.eq.s32 %p141, %r243, 1118925336; + add.s32 %r244, %r243, -1; + mov.b32 %f881, %r244; + add.f32 %f882, %f880, 0f37000000; + selp.f32 %f102, %f882, %f880, %p141; + selp.f32 %f883, %f881, %f877, %p141; + mul.rn.f32 %f884, %f883, %f684; + cvt.rzi.f32.f32 %f885, %f884; + abs.f32 %f886, %f885; + setp.gt.f32 %p142, %f886, 0f42FC0000; + mov.b32 %r245, %f885; + and.b32 %r246, %r245, -2147483648; + or.b32 %r247, %r246, 1123811328; + mov.b32 %f887, %r247; + selp.f32 %f888, %f887, %f885, %p142; + fma.rn.f32 %f889, %f888, %f690, %f883; + fma.rn.f32 %f890, %f888, %f692, %f889; + mul.f32 %f891, %f890, 0f3FB8AA3B; + add.f32 %f892, %f888, 0f4B40007F; + mov.b32 %r248, %f892; + shl.b32 %r249, %r248, 23; + mov.b32 %f893, %r249; + ex2.approx.ftz.f32 %f894, %f891; + mul.f32 %f103, %f894, %f893; + { + .reg .b32 %temp; + mov.b64 {%temp, %r250}, %fd345; + } + and.b32 %r251, %r250, 2146435072; + setp.eq.s32 %p144, %r251, 1074790400; + { // callseq 71, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd340; + .param .b64 param1; + st.param.f64 [param1+0], %fd345; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd38, [retval0+0]; + } // callseq 71 + and.pred %p10, %p122, %p144; + selp.b32 %r252, %r53, 0, %p144; + setp.lt.s32 %p145, %r250, 0; + or.b32 %r253, %r252, 2146435072; + selp.b32 %r63, %r253, %r252, %p145; + add.f64 %fd346, %fd338, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r254}, %fd346; + } + and.b32 %r64, %r254, 2146435072; + setp.ne.s32 %p146, %r64, 2146435072; + cvt.f64.f32 %fd39, %f79; + { + .reg .b32 %temp; + mov.b64 {%temp, %r65}, %fd39; + } + abs.f64 %fd347, %fd39; + { // callseq 72, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd347; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd40, [retval0+0]; + } // callseq 72 + setp.lt.s32 %p147, %r65, 0; + and.pred %p11, %p147, %p121; + and.b32 %r66, %r250, 2147483647; + selp.b32 %r255, %r222, %r221, %p145; + selp.b32 %r67, 1072693248, %r255, %p127; + add.f64 %fd41, %fd39, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r256}, %fd41; + } + and.b32 %r68, %r256, 2146435072; + setp.ne.s32 %p148, %r68, 2146435072; + setp.gt.s32 %p149, %r250, -1; + selp.b32 %r257, 2146435072, 0, %p149; + setp.ne.s32 %p150, %r66, 1071644672; + and.pred %p151, %p150, %p10; + or.b32 %r258, %r257, -2147483648; + selp.b32 %r69, %r258, %r257, %p151; + setp.gtu.f64 %p152, %fd347, 0d7FF0000000000000; + cvt.f64.f32 %fd42, %f82; + { + .reg .b32 %temp; + mov.b64 {%temp, %r70}, %fd42; + } + abs.f64 %fd348, %fd42; + { // callseq 73, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd348; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd43, [retval0+0]; + } // callseq 73 + setp.lt.s32 %p153, %r70, 0; + and.pred %p12, %p153, %p121; + setp.gt.f64 %p154, %fd347, 0d3FF0000000000000; + selp.b32 %r259, 2146435072, 0, %p154; + xor.b32 %r260, %r259, 2146435072; + selp.b32 %r261, %r260, %r259, %p123; + setp.eq.f32 %p155, %f79, 0fBF800000; + selp.b32 %r71, 1072693248, %r261, %p155; + add.f64 %fd44, %fd42, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r262}, %fd44; + } + and.b32 %r72, %r262, 2146435072; + setp.ne.s32 %p156, %r72, 2146435072; + setp.gtu.f64 %p157, %fd348, 0d7FF0000000000000; + setp.gt.f64 %p158, %fd348, 0d3FF0000000000000; + selp.b32 %r263, 2146435072, 0, %p158; + xor.b32 %r264, %r263, 2146435072; + selp.b32 %r265, %r264, %r263, %p123; + setp.eq.f32 %p159, %f82, 0fBF800000; + selp.b32 %r73, 1072693248, %r265, %p159; + { // callseq 74, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd343; + .param .b64 param1; + st.param.f64 [param1+0], %fd345; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd45, [retval0+0]; + } // callseq 74 + and.pred %p13, %p129, %p144; + selp.b32 %r266, %r60, 0, %p144; + or.b32 %r267, %r266, 2146435072; + selp.b32 %r74, %r267, %r266, %p145; + add.f64 %fd349, %fd342, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r268}, %fd349; + } + and.b32 %r75, %r268, 2146435072; + setp.ne.s32 %p160, %r75, 2146435072; + selp.b32 %r269, %r237, %r236, %p145; + selp.b32 %r76, 1072693248, %r269, %p138; + and.pred %p161, %p150, %p13; + selp.b32 %r77, %r258, %r257, %p161; + mov.f64 %fd350, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r78}, %fd350; + } + and.b32 %r79, %r78, 2147483647; + setp.gt.s32 %p162, %r78, -1; + selp.b32 %r80, 2146435072, 0, %p162; + or.pred %p16, %p124, %p125; + or.pred %p17, %p130, %p131; + or.pred %p20, %p146, %p125; + or.pred %p21, %p148, %p152; + or.pred %p22, %p156, %p157; + or.pred %p23, %p160, %p131; + shr.s32 %r270, %r78, 31; + and.b32 %r81, %r270, 2146435072; + +$L__BB3_56: + cvt.rn.f32.s32 %f2825, %r852; + sub.f32 %f2824, %f2825, %f3048; + add.f32 %f2823, %f2824, 0f3F000000; + mul.f32 %f2822, %f2823, %f581; + abs.f32 %f2821, %f2822; + setp.ltu.f32 %p163, %f2821, 0f3F8060FE; + mov.f32 %f3005, %f81; + @%p163 bra $L__BB3_58; + + mov.f32 %f2930, 0f3F800000; + ex2.approx.ftz.f32 %f895, %f81; + sub.f32 %f897, %f2930, %f895; + mov.b32 %r271, %f897; + or.b32 %r272, %r51, %r271; + mov.b32 %f3005, %r272; + +$L__BB3_58: + cvt.rn.f32.s32 %f2830, %r852; + sub.f32 %f2829, %f2830, %f3048; + add.f32 %f2828, %f2829, 0fBF000000; + mul.f32 %f2827, %f2828, %f581; + abs.f32 %f2826, %f2827; + setp.ltu.f32 %p164, %f2826, 0f3F8060FE; + mov.f32 %f3006, %f84; + @%p164 bra $L__BB3_60; + + mov.f32 %f2929, 0f3F800000; + ex2.approx.ftz.f32 %f898, %f84; + sub.f32 %f900, %f2929, %f898; + mov.b32 %r273, %f900; + or.b32 %r274, %r52, %r273; + mov.b32 %f3006, %r274; + +$L__BB3_60: + sub.f32 %f901, %f3005, %f3006; + mul.f32 %f120, %f901, 0f3F000000; + cvt.rn.f32.s32 %f121, %r853; + sub.f32 %f122, %f121, %f3047; + add.f32 %f123, %f122, 0f3F000000; + mul.f32 %f124, %f123, %f85; + abs.f32 %f902, %f124; + setp.ltu.f32 %p165, %f902, 0f3F8060FE; + setp.ge.f32 %p166, %f902, 0f3F8060FE; + mul.f32 %f903, %f124, %f124; + selp.f32 %f904, %f902, %f903, %p166; + selp.f32 %f905, 0f3789CA3C, 0f38B1E96A, %p166; + selp.f32 %f906, 0fB9F560B9, 0fBA574D20, %p166; + fma.rn.f32 %f907, %f905, %f904, %f906; + selp.f32 %f908, 0f3BAC840B, 0f3BAAD5EA, %p166; + fma.rn.f32 %f909, %f907, %f904, %f908; + selp.f32 %f910, 0fBD0C8162, 0fBCDC1BE7, %p166; + fma.rn.f32 %f911, %f909, %f904, %f910; + selp.f32 %f912, 0f3E1CF906, 0f3DE718AF, %p166; + fma.rn.f32 %f913, %f911, %f904, %f912; + selp.f32 %f914, 0f3F6A937E, 0fBEC093AC, %p166; + fma.rn.f32 %f915, %f913, %f904, %f914; + selp.f32 %f916, 0f3F20D842, 0f3E0375D3, %p166; + fma.rn.f32 %f917, %f915, %f904, %f916; + neg.f32 %f918, %f902; + selp.f32 %f919, %f918, %f124, %p166; + fma.rn.f32 %f3007, %f917, %f919, %f919; + @%p165 bra $L__BB3_62; + + mov.f32 %f2928, 0f3F800000; + ex2.approx.ftz.f32 %f920, %f3007; + sub.f32 %f922, %f2928, %f920; + mov.b32 %r275, %f922; + mov.b32 %r276, %f124; + and.b32 %r277, %r276, -2147483648; + or.b32 %r278, %r277, %r275; + mov.b32 %f3007, %r278; + +$L__BB3_62: + cvt.rn.f32.s32 %f2832, %r853; + sub.f32 %f2831, %f2832, %f3047; + add.f32 %f128, %f2831, 0fBF000000; + mul.f32 %f129, %f128, %f85; + abs.f32 %f923, %f129; + setp.ltu.f32 %p167, %f923, 0f3F8060FE; + setp.ge.f32 %p168, %f923, 0f3F8060FE; + mul.f32 %f924, %f129, %f129; + selp.f32 %f925, %f923, %f924, %p168; + selp.f32 %f926, 0f3789CA3C, 0f38B1E96A, %p168; + selp.f32 %f927, 0fB9F560B9, 0fBA574D20, %p168; + fma.rn.f32 %f928, %f926, %f925, %f927; + selp.f32 %f929, 0f3BAC840B, 0f3BAAD5EA, %p168; + fma.rn.f32 %f930, %f928, %f925, %f929; + selp.f32 %f931, 0fBD0C8162, 0fBCDC1BE7, %p168; + fma.rn.f32 %f932, %f930, %f925, %f931; + selp.f32 %f933, 0f3E1CF906, 0f3DE718AF, %p168; + fma.rn.f32 %f934, %f932, %f925, %f933; + selp.f32 %f935, 0f3F6A937E, 0fBEC093AC, %p168; + fma.rn.f32 %f936, %f934, %f925, %f935; + selp.f32 %f937, 0f3F20D842, 0f3E0375D3, %p168; + fma.rn.f32 %f938, %f936, %f925, %f937; + neg.f32 %f939, %f923; + selp.f32 %f940, %f939, %f129, %p168; + fma.rn.f32 %f3008, %f938, %f940, %f940; + @%p167 bra $L__BB3_64; + + mov.f32 %f2927, 0f3F800000; + ex2.approx.ftz.f32 %f941, %f3008; + sub.f32 %f943, %f2927, %f941; + mov.b32 %r279, %f943; + mov.b32 %r280, %f129; + and.b32 %r281, %r280, -2147483648; + or.b32 %r282, %r281, %r279; + mov.b32 %f3008, %r282; + +$L__BB3_64: + sub.f32 %f945, %f3007, %f3008; + mul.f32 %f133, %f945, 0f3F000000; + mul.f32 %f946, %f120, %f3046; + fma.rn.f32 %f134, %f133, %f946, %f3045; + mad.lo.s32 %r283, %r853, %r108, %r852; + add.s32 %r284, %r283, %r2; + mul.wide.s32 %rd26, %r284, 4; + add.s64 %rd27, %rd1, %rd26; + ld.global.f32 %f135, [%rd27]; + setp.eq.f32 %p169, %f91, 0f7F800000; + mov.f32 %f3009, 0f7F800000; + @%p169 bra $L__BB3_66; + + fma.rn.f32 %f3009, %f91, %f90, %f91; + +$L__BB3_66: + setp.geu.f32 %p728, %f87, 0f00000000; + mov.b32 %r285, %f3009; + xor.b32 %r286, %r285, -2147483648; + mov.b32 %f947, %r286; + selp.f32 %f138, %f947, %f3009, %p4; + add.f32 %f948, %f87, %f87; + selp.f32 %f949, %f948, 0f00000000, %p110; + setp.eq.f32 %p171, %f87, 0f00000000; + selp.f32 %f3010, %f949, %f138, %p171; + @%p728 bra $L__BB3_69; + + cvt.rzi.f32.f32 %f951, %f622; + setp.eq.f32 %p172, %f951, 0f40000000; + mov.f32 %f3010, %f138; + @%p172 bra $L__BB3_69; + + mov.f32 %f3010, 0f7FFFFFFF; + +$L__BB3_69: + mov.f32 %f2835, 0f3FB8AA3B; + mov.f32 %f2834, 0f3F000000; + abs.f32 %f2833, %f87; + add.f32 %f954, %f2833, 0f40000000; + mov.b32 %r287, %f954; + setp.gt.s32 %p173, %r287, 2139095039; + add.f32 %f955, %f87, 0f40000000; + setp.gtu.f32 %p174, %f2833, 0f7F800000; + mov.f32 %f3011, 0f7F800000; + selp.f32 %f956, %f955, %f3010, %p174; + selp.f32 %f957, 0fFF800000, 0f7F800000, %p4; + setp.neu.f32 %p175, %f2833, 0f7F800000; + selp.f32 %f958, %f956, %f957, %p175; + selp.f32 %f959, %f958, %f3010, %p173; + mul.f32 %f960, %f959, 0fBF000000; + setp.eq.f32 %p176, %f87, 0f3F800000; + selp.f32 %f961, 0fBF000000, %f960, %p176; + mov.f32 %f963, 0f3BBB989D; + fma.rn.f32 %f964, %f961, %f963, %f2834; + mov.f32 %f966, 0f437C0000; + cvt.sat.f32.f32 %f967, %f964; + mov.f32 %f968, 0f4B400001; + fma.rm.f32 %f969, %f967, %f966, %f968; + add.f32 %f970, %f969, 0fCB40007F; + neg.f32 %f971, %f970; + fma.rn.f32 %f972, %f961, %f2835, %f971; + mov.f32 %f973, 0f32A57060; + fma.rn.f32 %f974, %f961, %f973, %f972; + mov.b32 %r288, %f969; + shl.b32 %r289, %r288, 23; + mov.b32 %f975, %r289; + ex2.approx.ftz.f32 %f976, %f974; + mul.f32 %f141, %f976, %f975; + setp.eq.f32 %p177, %f95, 0f7F800000; + @%p177 bra $L__BB3_71; + + fma.rn.f32 %f3011, %f95, %f94, %f95; + +$L__BB3_71: + setp.geu.f32 %p731, %f92, 0f00000000; + setp.lt.f32 %p730, %f92, 0f00000000; + and.pred %p729, %p730, %p110; + mov.b32 %r290, %f3011; + xor.b32 %r291, %r290, -2147483648; + mov.b32 %f977, %r291; + selp.f32 %f144, %f977, %f3011, %p729; + add.f32 %f978, %f92, %f92; + selp.f32 %f979, %f978, 0f00000000, %p110; + setp.eq.f32 %p179, %f92, 0f00000000; + selp.f32 %f3012, %f979, %f144, %p179; + @%p731 bra $L__BB3_74; + + cvt.rzi.f32.f32 %f981, %f622; + setp.eq.f32 %p180, %f981, 0f40000000; + mov.f32 %f3012, %f144; + @%p180 bra $L__BB3_74; + + mov.f32 %f3012, 0f7FFFFFFF; + +$L__BB3_74: + mov.f32 %f2842, 0f32A57060; + mov.f32 %f2841, 0f4B400001; + mov.f32 %f2840, 0f437C0000; + mov.f32 %f2839, 0f3BBB989D; + abs.f32 %f2838, %f92; + setp.lt.f32 %p733, %f92, 0f00000000; + and.pred %p732, %p733, %p110; + mov.f32 %f2837, 0f3FB8AA3B; + mov.f32 %f2836, 0f3F000000; + add.f32 %f983, %f2838, 0f40000000; + mov.b32 %r292, %f983; + setp.gt.s32 %p181, %r292, 2139095039; + add.f32 %f984, %f92, 0f40000000; + setp.gtu.f32 %p182, %f2838, 0f7F800000; + selp.f32 %f985, %f984, %f3012, %p182; + selp.f32 %f986, 0fFF800000, 0f7F800000, %p732; + setp.neu.f32 %p183, %f2838, 0f7F800000; + selp.f32 %f987, %f985, %f986, %p183; + selp.f32 %f988, %f987, %f3012, %p181; + mul.f32 %f989, %f988, 0fBF000000; + setp.eq.f32 %p184, %f92, 0f3F800000; + selp.f32 %f990, 0fBF000000, %f989, %p184; + fma.rn.f32 %f993, %f990, %f2839, %f2836; + cvt.sat.f32.f32 %f996, %f993; + fma.rm.f32 %f998, %f996, %f2840, %f2841; + add.f32 %f999, %f998, 0fCB40007F; + neg.f32 %f1000, %f999; + fma.rn.f32 %f1001, %f990, %f2837, %f1000; + fma.rn.f32 %f1003, %f990, %f2842, %f1001; + mov.b32 %r293, %f998; + shl.b32 %r294, %r293, 23; + mov.b32 %f1004, %r294; + ex2.approx.ftz.f32 %f1005, %f1003; + mul.f32 %f147, %f1005, %f1004; + sub.f32 %f1006, %f141, %f147; + mul.f32 %f1007, %f60, %f1006; + mul.f32 %f148, %f133, %f1007; + not.pred %p185, %p6; + mov.f64 %fd586, %fd36; + @%p185 bra $L__BB3_76; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r295}, %fd36; + } + xor.b32 %r296, %r295, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r297, %temp}, %fd36; + } + mov.b64 %fd586, {%r297, %r296}; + +$L__BB3_76: + setp.eq.f32 %p186, %f3044, 0f00000000; + @%p186 bra $L__BB3_80; + bra.uni $L__BB3_77; + +$L__BB3_80: + mov.u32 %r298, 0; + selp.b32 %r300, %r53, 0, %p121; + or.b32 %r301, %r300, 2146435072; + selp.b32 %r302, %r301, %r300, %p123; + mov.b64 %fd586, {%r298, %r302}; + bra.uni $L__BB3_81; + +$L__BB3_77: + setp.gt.s32 %p187, %r53, -1; + @%p187 bra $L__BB3_81; + + cvt.rzi.f64.f64 %fd352, %fd339; + setp.eq.f64 %p188, %fd352, 0d4008000000000000; + @%p188 bra $L__BB3_81; + + mov.f64 %fd586, 0dFFF8000000000000; + +$L__BB3_81: + cvt.f64.f32 %fd550, %f3044; + add.f64 %fd549, %fd550, 0d4008000000000000; + selp.f64 %fd587, %fd586, %fd549, %p124; + @%p16 bra $L__BB3_86; + + setp.eq.s32 %p192, %r57, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r303, %temp}, %fd339; + } + setp.eq.s32 %p193, %r303, 0; + and.pred %p194, %p192, %p193; + @%p194 bra $L__BB3_85; + bra.uni $L__BB3_83; + +$L__BB3_85: + mov.u32 %r310, 0; + mov.b64 %fd587, {%r310, %r58}; + bra.uni $L__BB3_86; + +$L__BB3_83: + cvt.f64.f32 %fd551, %f3044; + and.b32 %r304, %r53, 2147483647; + setp.ne.s32 %p195, %r304, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r305, %temp}, %fd551; + } + setp.ne.s32 %p196, %r305, 0; + or.pred %p197, %p195, %p196; + mov.f64 %fd587, %fd586; + @%p197 bra $L__BB3_86; + + setp.ne.s32 %p198, %r57, 1071644672; + and.pred %p199, %p198, %p6; + or.b32 %r307, %r59, -2147483648; + selp.b32 %r308, %r307, %r59, %p199; + mov.u32 %r309, 0; + mov.b64 %fd587, {%r309, %r308}; + +$L__BB3_86: + mov.f32 %f2853, 0f3102E308; + mov.f32 %f2852, 0fBF317218; + mov.f32 %f2851, 0f35BFBE8E; + mov.f32 %f2850, 0f3F317200; + mov.f32 %f2849, 0f3DAAAABD; + mov.f32 %f2848, 0f3C4CAF63; + mov.f32 %f2847, 0f3B18F0FE; + cvt.rn.f32.s32 %f2846, %r852; + add.f32 %f2845, %f2846, 0f3F000000; + sub.f32 %f2844, %f2845, %f3048; + mov.f32 %f2843, 0f3FB8AA3B; + setp.eq.f32 %p200, %f3044, 0f3F800000; + selp.f64 %fd358, 0d3FF0000000000000, %fd587, %p200; + div.rn.f64 %fd359, %fd34, %fd358; + mul.f32 %f1009, %f82, %f147; + mul.f32 %f1010, %f2844, %f141; + sub.f32 %f1011, %f1010, %f1009; + cvt.f64.f32 %fd360, %f1011; + mul.f64 %fd361, %fd359, %fd360; + cvt.f64.f32 %fd54, %f133; + mul.f64 %fd362, %fd361, %fd54; + cvt.rn.f32.f64 %f149, %fd362; + add.f32 %f1012, %f121, 0f3F000000; + sub.f32 %f150, %f1012, %f3047; + div.rn.f32 %f151, %f150, %f3043; + abs.f32 %f152, %f151; + setp.lt.f32 %p201, %f152, 0f00800000; + mul.f32 %f1013, %f152, 0f4B800000; + selp.f32 %f1014, %f1013, %f152, %p201; + selp.f32 %f1015, 0fC3170000, 0fC2FE0000, %p201; + mov.b32 %r311, %f1014; + and.b32 %r312, %r311, 8388607; + or.b32 %r313, %r312, 1065353216; + mov.b32 %f1016, %r313; + shr.u32 %r314, %r311, 23; + cvt.rn.f32.u32 %f1017, %r314; + add.f32 %f1018, %f1015, %f1017; + setp.gt.f32 %p202, %f1016, 0f3FB504F3; + mul.f32 %f1019, %f1016, 0f3F000000; + add.f32 %f1020, %f1018, 0f3F800000; + selp.f32 %f1021, %f1020, %f1018, %p202; + selp.f32 %f1022, %f1019, %f1016, %p202; + add.f32 %f1023, %f1022, 0fBF800000; + add.f32 %f1024, %f1022, 0f3F800000; + rcp.approx.ftz.f32 %f1025, %f1024; + add.f32 %f1026, %f1023, %f1023; + mul.f32 %f1028, %f1026, %f1025; + mul.f32 %f1029, %f1028, %f1028; + fma.rn.f32 %f1032, %f2847, %f1029, %f2848; + fma.rn.f32 %f1034, %f1032, %f1029, %f2849; + mul.rn.f32 %f1035, %f1034, %f1029; + mul.rn.f32 %f1036, %f1035, %f1028; + sub.f32 %f1037, %f1023, %f1028; + add.f32 %f1038, %f1037, %f1037; + neg.f32 %f1039, %f1028; + fma.rn.f32 %f1040, %f1039, %f1023, %f1038; + mul.rn.f32 %f1041, %f1025, %f1040; + add.f32 %f1042, %f1036, %f1028; + sub.f32 %f1043, %f1028, %f1042; + add.f32 %f1044, %f1036, %f1043; + add.f32 %f1045, %f1041, %f1044; + add.f32 %f1046, %f1042, %f1045; + sub.f32 %f1047, %f1042, %f1046; + add.f32 %f1048, %f1045, %f1047; + mul.rn.f32 %f1050, %f1021, %f2850; + mul.rn.f32 %f1052, %f1021, %f2851; + add.f32 %f1053, %f1050, %f1046; + sub.f32 %f1054, %f1050, %f1053; + add.f32 %f1055, %f1046, %f1054; + add.f32 %f1056, %f1048, %f1055; + add.f32 %f1057, %f1052, %f1056; + add.f32 %f1058, %f1053, %f1057; + sub.f32 %f1059, %f1053, %f1058; + add.f32 %f1060, %f1057, %f1059; + mul.rn.f32 %f1061, %f622, %f1058; + neg.f32 %f1062, %f1061; + fma.rn.f32 %f1063, %f622, %f1058, %f1062; + fma.rn.f32 %f1064, %f622, %f1060, %f1063; + mov.f32 %f1065, 0f00000000; + fma.rn.f32 %f1066, %f1065, %f1058, %f1064; + add.rn.f32 %f1067, %f1061, %f1066; + neg.f32 %f1068, %f1067; + add.rn.f32 %f1069, %f1061, %f1068; + add.rn.f32 %f1070, %f1069, %f1066; + mov.b32 %r315, %f1067; + setp.eq.s32 %p203, %r315, 1118925336; + add.s32 %r316, %r315, -1; + mov.b32 %f1071, %r316; + add.f32 %f1072, %f1070, 0f37000000; + selp.f32 %f153, %f1072, %f1070, %p203; + selp.f32 %f1073, %f1071, %f1067, %p203; + mul.rn.f32 %f1075, %f1073, %f2843; + cvt.rzi.f32.f32 %f1076, %f1075; + abs.f32 %f1077, %f1076; + setp.gt.f32 %p204, %f1077, 0f42FC0000; + mov.b32 %r317, %f1076; + and.b32 %r318, %r317, -2147483648; + or.b32 %r319, %r318, 1123811328; + mov.b32 %f1078, %r319; + selp.f32 %f1079, %f1078, %f1076, %p204; + fma.rn.f32 %f1081, %f1079, %f2852, %f1073; + fma.rn.f32 %f1083, %f1079, %f2853, %f1081; + mul.f32 %f1084, %f1083, 0f3FB8AA3B; + add.f32 %f1085, %f1079, 0f4B40007F; + mov.b32 %r320, %f1085; + shl.b32 %r321, %r320, 23; + mov.b32 %f1086, %r321; + ex2.approx.ftz.f32 %f1087, %f1084; + mul.f32 %f154, %f1087, %f1086; + setp.eq.f32 %p205, %f154, 0f7F800000; + mov.f32 %f3013, 0f7F800000; + @%p205 bra $L__BB3_88; + + fma.rn.f32 %f3013, %f154, %f153, %f154; + +$L__BB3_88: + setp.lt.f32 %p206, %f151, 0f00000000; + and.pred %p24, %p206, %p110; + setp.eq.f32 %p208, %f151, 0f00000000; + @%p208 bra $L__BB3_92; + bra.uni $L__BB3_89; + +$L__BB3_92: + add.f32 %f1092, %f151, %f151; + selp.f32 %f3015, %f1092, 0f00000000, %p110; + bra.uni $L__BB3_93; + +$L__BB3_89: + mov.b32 %r322, %f3013; + xor.b32 %r323, %r322, -2147483648; + mov.b32 %f1088, %r323; + selp.f32 %f3015, %f1088, %f3013, %p24; + setp.geu.f32 %p209, %f151, 0f00000000; + @%p209 bra $L__BB3_93; + + cvt.rzi.f32.f32 %f1090, %f622; + setp.eq.f32 %p210, %f1090, 0f40000000; + @%p210 bra $L__BB3_93; + + mov.f32 %f3015, 0f7FFFFFFF; + +$L__BB3_93: + abs.f32 %f2933, %f151; + add.f32 %f1093, %f2933, 0f40000000; + mov.b32 %r324, %f1093; + setp.lt.s32 %p212, %r324, 2139095040; + @%p212 bra $L__BB3_98; + + abs.f32 %f2938, %f151; + setp.gtu.f32 %p213, %f2938, 0f7F800000; + @%p213 bra $L__BB3_97; + bra.uni $L__BB3_95; + +$L__BB3_97: + add.f32 %f3015, %f151, 0f40000000; + bra.uni $L__BB3_98; + +$L__BB3_95: + abs.f32 %f2939, %f151; + setp.neu.f32 %p214, %f2939, 0f7F800000; + @%p214 bra $L__BB3_98; + + selp.f32 %f3015, 0fFF800000, 0f7F800000, %p24; + +$L__BB3_98: + mov.f32 %f2867, 0f00000000; + mov.f32 %f2866, 0f3102E308; + mov.f32 %f2865, 0fBF317218; + mov.f32 %f2864, 0f35BFBE8E; + mov.f32 %f2863, 0f3F317200; + mov.f32 %f2862, 0f3DAAAABD; + mov.f32 %f2861, 0f3C4CAF63; + mov.f32 %f2860, 0f3B18F0FE; + mov.f32 %f2859, 0f32A57060; + mov.f32 %f2858, 0f4B400001; + mov.f32 %f2857, 0f437C0000; + mov.f32 %f2856, 0f3BBB989D; + mov.f32 %f2855, 0f3FB8AA3B; + mov.f32 %f2854, 0f3F000000; + mul.f32 %f1095, %f3015, 0fBF000000; + setp.eq.f32 %p215, %f151, 0f3F800000; + selp.f32 %f1096, 0fBF000000, %f1095, %p215; + fma.rn.f32 %f1099, %f1096, %f2856, %f2854; + cvt.sat.f32.f32 %f1102, %f1099; + fma.rm.f32 %f1104, %f1102, %f2857, %f2858; + add.f32 %f1105, %f1104, 0fCB40007F; + neg.f32 %f1106, %f1105; + fma.rn.f32 %f1107, %f1096, %f2855, %f1106; + fma.rn.f32 %f1109, %f1096, %f2859, %f1107; + mov.b32 %r325, %f1104; + shl.b32 %r326, %r325, 23; + mov.b32 %f1110, %r326; + ex2.approx.ftz.f32 %f1111, %f1109; + mul.f32 %f163, %f1111, %f1110; + div.rn.f32 %f164, %f128, %f3043; + abs.f32 %f165, %f164; + setp.lt.f32 %p216, %f165, 0f00800000; + mul.f32 %f1112, %f165, 0f4B800000; + selp.f32 %f1113, %f1112, %f165, %p216; + selp.f32 %f1114, 0fC3170000, 0fC2FE0000, %p216; + mov.b32 %r327, %f1113; + and.b32 %r328, %r327, 8388607; + or.b32 %r329, %r328, 1065353216; + mov.b32 %f1115, %r329; + shr.u32 %r330, %r327, 23; + cvt.rn.f32.u32 %f1116, %r330; + add.f32 %f1117, %f1114, %f1116; + setp.gt.f32 %p217, %f1115, 0f3FB504F3; + mul.f32 %f1118, %f1115, 0f3F000000; + add.f32 %f1119, %f1117, 0f3F800000; + selp.f32 %f1120, %f1119, %f1117, %p217; + selp.f32 %f1121, %f1118, %f1115, %p217; + add.f32 %f1122, %f1121, 0fBF800000; + add.f32 %f1123, %f1121, 0f3F800000; + rcp.approx.ftz.f32 %f1124, %f1123; + add.f32 %f1125, %f1122, %f1122; + mul.f32 %f1127, %f1125, %f1124; mul.f32 %f1128, %f1127, %f1127; - fma.rn.f32 %f1131, %f3042, %f1128, %f3043; - fma.rn.f32 %f1133, %f1131, %f1128, %f3044; + fma.rn.f32 %f1131, %f2860, %f1128, %f2861; + fma.rn.f32 %f1133, %f1131, %f1128, %f2862; mul.rn.f32 %f1134, %f1133, %f1128; mul.rn.f32 %f1135, %f1134, %f1127; - sub.f32 %f1136, %f139, %f1127; - neg.f32 %f1137, %f1127; - add.f32 %f1138, %f1136, %f1136; - fma.rn.f32 %f1139, %f1137, %f139, %f1138; - mul.rn.f32 %f1140, %f1106, %f1139; + sub.f32 %f1136, %f1122, %f1127; + add.f32 %f1137, %f1136, %f1136; + neg.f32 %f1138, %f1127; + fma.rn.f32 %f1139, %f1138, %f1122, %f1137; + mul.rn.f32 %f1140, %f1124, %f1139; add.f32 %f1141, %f1135, %f1127; sub.f32 %f1142, %f1127, %f1141; add.f32 %f1143, %f1135, %f1142; @@ -11893,3071 +21019,4480 @@ BB3_58: add.f32 %f1145, %f1141, %f1144; sub.f32 %f1146, %f1141, %f1145; add.f32 %f1147, %f1144, %f1146; - add.f32 %f1148, %f142, %f1145; - sub.f32 %f1149, %f142, %f1148; - add.f32 %f1150, %f1145, %f1149; - add.f32 %f1151, %f1147, %f1150; - add.f32 %f1152, %f143, %f1151; - add.f32 %f1153, %f1148, %f1152; - sub.f32 %f1154, %f1148, %f1153; - add.f32 %f1155, %f1152, %f1154; - mul.rn.f32 %f1157, %f895, %f1153; - neg.f32 %f1158, %f1157; - fma.rn.f32 %f1159, %f895, %f1153, %f1158; - fma.rn.f32 %f1160, %f895, %f1155, %f1159; - fma.rn.f32 %f1162, %f3045, %f1153, %f1160; - add.rn.f32 %f1163, %f1157, %f1162; - neg.f32 %f1164, %f1163; - add.rn.f32 %f1165, %f1157, %f1164; - add.rn.f32 %f1166, %f1165, %f1162; - mov.b32 %r158, %f1163; - setp.eq.s32 %p82, %r158, 1118925336; - add.s32 %r159, %r158, -1; - mov.b32 %f1167, %r159; - add.f32 %f1168, %f1166, 0f37000000; - selp.f32 %f1169, %f1167, %f1163, %p82; - selp.f32 %f191, %f1168, %f1166, %p82; - mul.f32 %f1170, %f1169, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1171, %f1170; - fma.rn.f32 %f1172, %f1171, %f3040, %f1169; - fma.rn.f32 %f1173, %f1171, %f3041, %f1172; - mul.f32 %f1174, %f1173, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1175, %f1174; - add.f32 %f1176, %f1171, 0f00000000; - ex2.approx.f32 %f1177, %f1176; - mul.f32 %f1178, %f1175, %f1177; - setp.lt.f32 %p83, %f1169, 0fC2D20000; - selp.f32 %f1179, 0f00000000, %f1178, %p83; - setp.gt.f32 %p84, %f1169, 0f42D20000; - selp.f32 %f3284, 0f7F800000, %f1179, %p84; - setp.eq.f32 %p85, %f3284, 0f7F800000; - @%p85 bra BB3_60; - - fma.rn.f32 %f3284, %f3284, %f191, %f3284; - -BB3_60: - setp.geu.f32 %p351, %f137, 0f00000000; - mov.b32 %r160, %f3284; - xor.b32 %r161, %r160, -2147483648; - mov.b32 %f1180, %r161; - selp.f32 %f195, %f1180, %f3284, %p2; - setp.eq.f32 %p86, %f137, 0f00000000; - selp.f32 %f3285, %f144, %f195, %p86; - @%p351 bra BB3_62; - - cvt.rzi.f32.f32 %f1182, %f895; - setp.neu.f32 %p87, %f1182, 0f40000000; - selp.f32 %f3285, 0f7FFFFFFF, %f195, %p87; - -BB3_62: - abs.f32 %f3062, %f137; - add.f32 %f3061, %f3062, 0f40000000; - mov.b32 %r297, %f3061; - cvt.rn.f32.s32 %f3060, %r321; - cvt.rn.f32.s32 %f3059, %r322; - mov.f32 %f3058, 0f35BFBE8E; - mov.f32 %f3057, 0f3F317200; - add.f32 %f3056, %f3060, 0f3F800000; - sub.f32 %f3055, %f3056, %f3321; - sub.f32 %f3054, %f3060, %f3321; - mov.f32 %f3053, 0f00000000; - mov.f32 %f3052, 0f3DAAAABD; - mov.f32 %f3051, 0f3C4CAF63; - mov.f32 %f3050, 0f3B18F0FE; - mov.f32 %f3049, 0fB5BFBE8E; - mov.f32 %f3048, 0fBF317200; - add.f32 %f1185, %f137, 0f40000000; - setp.gtu.f32 %p88, %f3062, 0f7F800000; - selp.f32 %f1186, %f1185, %f3285, %p88; - selp.f32 %f1187, 0fFF800000, 0f7F800000, %p2; - setp.neu.f32 %p89, %f3062, 0f7F800000; - selp.f32 %f1188, %f1186, %f1187, %p89; - setp.gt.s32 %p90, %r297, 2139095039; - selp.f32 %f1189, %f1188, %f3285, %p90; - mul.f32 %f1190, %f1189, 0fBF000000; - setp.eq.f32 %p91, %f137, 0f3F800000; - selp.f32 %f1191, 0fBF000000, %f1190, %p91; - mul.f32 %f1192, %f1191, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1193, %f1192; - fma.rn.f32 %f1195, %f1193, %f3048, %f1191; - fma.rn.f32 %f1197, %f1193, %f3049, %f1195; - mul.f32 %f1198, %f1197, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1199, %f1198; - add.f32 %f1200, %f1193, 0f00000000; - ex2.approx.f32 %f1201, %f1200; - mul.f32 %f1202, %f1199, %f1201; - setp.lt.f32 %p92, %f1191, 0fC2D20000; - selp.f32 %f1203, 0f00000000, %f1202, %p92; - setp.gt.f32 %p93, %f1191, 0f42D20000; - selp.f32 %f1204, 0f7F800000, %f1203, %p93; - sub.f32 %f1205, %f190, %f1204; - mul.f32 %f1206, %f96, %f1205; - mul.f32 %f199, %f179, %f1206; - mul.f32 %f1207, %f3054, %f1204; - mul.f32 %f1208, %f3055, %f190; - sub.f32 %f1209, %f1208, %f1207; - mul.f32 %f1210, %f1209, %f102; - mul.f32 %f200, %f179, %f1210; - add.f32 %f1211, %f3059, 0f3F800000; - sub.f32 %f201, %f1211, %f3320; - div.rn.f32 %f202, %f201, %f3316; - abs.f32 %f203, %f202; - setp.lt.f32 %p94, %f203, 0f00800000; - mul.f32 %f1212, %f203, 0f4B800000; - selp.f32 %f1213, 0fC3170000, 0fC2FE0000, %p94; - selp.f32 %f1214, %f1212, %f203, %p94; - mov.b32 %r162, %f1214; - and.b32 %r163, %r162, 8388607; - or.b32 %r164, %r163, 1065353216; - mov.b32 %f1215, %r164; - shr.u32 %r165, %r162, 23; - cvt.rn.f32.u32 %f1216, %r165; - add.f32 %f1217, %f1213, %f1216; - setp.gt.f32 %p95, %f1215, 0f3FB504F3; - mul.f32 %f1218, %f1215, 0f3F000000; - add.f32 %f1219, %f1217, 0f3F800000; - selp.f32 %f1220, %f1218, %f1215, %p95; - selp.f32 %f1221, %f1219, %f1217, %p95; - add.f32 %f204, %f1220, 0fBF800000; - add.f32 %f1184, %f1220, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1183,%f1184; - // inline asm - add.f32 %f206, %f204, %f204; - mul.f32 %f1222, %f1183, %f206; - mul.f32 %f1223, %f1222, %f1222; - fma.rn.f32 %f1226, %f3050, %f1223, %f3051; - fma.rn.f32 %f1228, %f1226, %f1223, %f3052; - mul.rn.f32 %f1229, %f1228, %f1223; - mul.rn.f32 %f1230, %f1229, %f1222; - sub.f32 %f1231, %f204, %f1222; - neg.f32 %f1232, %f1222; - add.f32 %f1233, %f1231, %f1231; - fma.rn.f32 %f1234, %f1232, %f204, %f1233; - mul.rn.f32 %f1235, %f1183, %f1234; - add.f32 %f1236, %f1230, %f1222; - sub.f32 %f1237, %f1222, %f1236; - add.f32 %f1238, %f1230, %f1237; - add.f32 %f1239, %f1235, %f1238; - add.f32 %f1240, %f1236, %f1239; - sub.f32 %f1241, %f1236, %f1240; - add.f32 %f1242, %f1239, %f1241; - mul.rn.f32 %f207, %f1221, %f3057; - mul.rn.f32 %f208, %f1221, %f3058; - add.f32 %f1245, %f207, %f1240; - sub.f32 %f1246, %f207, %f1245; - add.f32 %f1247, %f1240, %f1246; - add.f32 %f1248, %f1242, %f1247; - add.f32 %f1249, %f208, %f1248; - add.f32 %f1250, %f1245, %f1249; - sub.f32 %f1251, %f1245, %f1250; - add.f32 %f1252, %f1249, %f1251; - mul.rn.f32 %f1254, %f895, %f1250; - neg.f32 %f1255, %f1254; - fma.rn.f32 %f1256, %f895, %f1250, %f1255; - fma.rn.f32 %f1257, %f895, %f1252, %f1256; - fma.rn.f32 %f1259, %f3053, %f1250, %f1257; - add.rn.f32 %f1260, %f1254, %f1259; - neg.f32 %f1261, %f1260; - add.rn.f32 %f1262, %f1254, %f1261; - add.rn.f32 %f1263, %f1262, %f1259; - mov.b32 %r166, %f1260; - setp.eq.s32 %p96, %r166, 1118925336; - add.s32 %r167, %r166, -1; - mov.b32 %f1264, %r167; - add.f32 %f1265, %f1263, 0f37000000; - selp.f32 %f1266, %f1264, %f1260, %p96; - selp.f32 %f209, %f1265, %f1263, %p96; - mul.f32 %f1267, %f1266, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1268, %f1267; - fma.rn.f32 %f1269, %f1268, %f3048, %f1266; - fma.rn.f32 %f1270, %f1268, %f3049, %f1269; - mul.f32 %f1271, %f1270, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1272, %f1271; - add.f32 %f1273, %f1268, 0f00000000; - ex2.approx.f32 %f1274, %f1273; - mul.f32 %f1275, %f1272, %f1274; - setp.lt.f32 %p97, %f1266, 0fC2D20000; - selp.f32 %f1276, 0f00000000, %f1275, %p97; - setp.gt.f32 %p98, %f1266, 0f42D20000; - selp.f32 %f3286, 0f7F800000, %f1276, %p98; - setp.eq.f32 %p99, %f3286, 0f7F800000; - @%p99 bra BB3_64; - - fma.rn.f32 %f3286, %f3286, %f209, %f3286; - -BB3_64: - setp.lt.f32 %p100, %f202, 0f00000000; - and.pred %p5, %p100, %p55; - mov.b32 %r168, %f3286; - xor.b32 %r169, %r168, -2147483648; - mov.b32 %f1277, %r169; - selp.f32 %f3288, %f1277, %f3286, %p5; - setp.eq.f32 %p102, %f202, 0f00000000; - @%p102 bra BB3_67; - bra.uni BB3_65; - -BB3_67: - add.f32 %f1280, %f202, %f202; - selp.f32 %f3288, %f1280, 0f00000000, %p55; - bra.uni BB3_68; - -BB3_65: - setp.geu.f32 %p103, %f202, 0f00000000; - @%p103 bra BB3_68; - - cvt.rzi.f32.f32 %f1279, %f895; - setp.neu.f32 %p104, %f1279, 0f40000000; - selp.f32 %f3288, 0f7FFFFFFF, %f3288, %p104; - -BB3_68: - abs.f32 %f3063, %f202; - add.f32 %f1281, %f3063, 0f40000000; - mov.b32 %r36, %f1281; - setp.lt.s32 %p106, %r36, 2139095040; - @%p106 bra BB3_73; - - abs.f32 %f3072, %f202; - setp.gtu.f32 %p107, %f3072, 0f7F800000; - @%p107 bra BB3_72; - bra.uni BB3_70; - -BB3_72: - add.f32 %f3288, %f202, 0f40000000; - bra.uni BB3_73; - -BB3_70: - abs.f32 %f3073, %f202; - setp.neu.f32 %p108, %f3073, 0f7F800000; - @%p108 bra BB3_73; - - selp.f32 %f3288, 0fFF800000, 0f7F800000, %p5; - -BB3_73: - cvt.rn.f32.s32 %f3085, %r322; - sub.f32 %f3084, %f3085, %f3320; - mov.f32 %f3071, 0f35BFBE8E; - mov.f32 %f3070, 0f3F317200; - mov.f32 %f3069, 0f00000000; - mov.f32 %f3068, 0f3DAAAABD; - mov.f32 %f3067, 0f3C4CAF63; - mov.f32 %f3066, 0f3B18F0FE; - mov.f32 %f3065, 0fB5BFBE8E; - mov.f32 %f3064, 0fBF317200; - mul.f32 %f1284, %f3288, 0fBF000000; - setp.eq.f32 %p109, %f202, 0f3F800000; - selp.f32 %f1285, 0fBF000000, %f1284, %p109; - mul.f32 %f1286, %f1285, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1287, %f1286; - fma.rn.f32 %f1289, %f1287, %f3064, %f1285; - fma.rn.f32 %f1291, %f1287, %f3065, %f1289; - mul.f32 %f1292, %f1291, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1293, %f1292; - add.f32 %f1294, %f1287, 0f00000000; - ex2.approx.f32 %f1295, %f1294; - mul.f32 %f1296, %f1293, %f1295; - setp.lt.f32 %p110, %f1285, 0fC2D20000; - selp.f32 %f1297, 0f00000000, %f1296, %p110; - setp.gt.f32 %p111, %f1285, 0f42D20000; - selp.f32 %f220, 0f7F800000, %f1297, %p111; - div.rn.f32 %f221, %f3084, %f3316; - abs.f32 %f222, %f221; - setp.lt.f32 %p112, %f222, 0f00800000; - mul.f32 %f1298, %f222, 0f4B800000; - selp.f32 %f1299, 0fC3170000, 0fC2FE0000, %p112; - selp.f32 %f1300, %f1298, %f222, %p112; - mov.b32 %r170, %f1300; - and.b32 %r171, %r170, 8388607; - or.b32 %r172, %r171, 1065353216; - mov.b32 %f1301, %r172; - shr.u32 %r173, %r170, 23; - cvt.rn.f32.u32 %f1302, %r173; - add.f32 %f1303, %f1299, %f1302; - setp.gt.f32 %p113, %f1301, 0f3FB504F3; - mul.f32 %f1304, %f1301, 0f3F000000; - add.f32 %f1305, %f1303, 0f3F800000; - selp.f32 %f1306, %f1304, %f1301, %p113; - selp.f32 %f1307, %f1305, %f1303, %p113; - add.f32 %f223, %f1306, 0fBF800000; - add.f32 %f1283, %f1306, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1282,%f1283; - // inline asm - add.f32 %f225, %f223, %f223; - mul.f32 %f1308, %f1282, %f225; - mul.f32 %f1309, %f1308, %f1308; - fma.rn.f32 %f1312, %f3066, %f1309, %f3067; - fma.rn.f32 %f1314, %f1312, %f1309, %f3068; - mul.rn.f32 %f1315, %f1314, %f1309; - mul.rn.f32 %f1316, %f1315, %f1308; - sub.f32 %f1317, %f223, %f1308; - neg.f32 %f1318, %f1308; - add.f32 %f1319, %f1317, %f1317; - fma.rn.f32 %f1320, %f1318, %f223, %f1319; - mul.rn.f32 %f1321, %f1282, %f1320; - add.f32 %f1322, %f1316, %f1308; - sub.f32 %f1323, %f1308, %f1322; - add.f32 %f1324, %f1316, %f1323; - add.f32 %f1325, %f1321, %f1324; - add.f32 %f1326, %f1322, %f1325; - sub.f32 %f1327, %f1322, %f1326; - add.f32 %f1328, %f1325, %f1327; - mul.rn.f32 %f226, %f1307, %f3070; - mul.rn.f32 %f227, %f1307, %f3071; - add.f32 %f1331, %f226, %f1326; - sub.f32 %f1332, %f226, %f1331; - add.f32 %f1333, %f1326, %f1332; - add.f32 %f1334, %f1328, %f1333; - add.f32 %f1335, %f227, %f1334; - add.f32 %f1336, %f1331, %f1335; - sub.f32 %f1337, %f1331, %f1336; - add.f32 %f1338, %f1335, %f1337; - mul.rn.f32 %f1340, %f895, %f1336; - neg.f32 %f1341, %f1340; - fma.rn.f32 %f1342, %f895, %f1336, %f1341; - fma.rn.f32 %f1343, %f895, %f1338, %f1342; - fma.rn.f32 %f1345, %f3069, %f1336, %f1343; - add.rn.f32 %f1346, %f1340, %f1345; - neg.f32 %f1347, %f1346; - add.rn.f32 %f1348, %f1340, %f1347; - add.rn.f32 %f1349, %f1348, %f1345; - mov.b32 %r174, %f1346; - setp.eq.s32 %p114, %r174, 1118925336; - add.s32 %r175, %r174, -1; - mov.b32 %f1350, %r175; - add.f32 %f1351, %f1349, 0f37000000; - selp.f32 %f1352, %f1350, %f1346, %p114; - selp.f32 %f228, %f1351, %f1349, %p114; - mul.f32 %f1353, %f1352, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1354, %f1353; - fma.rn.f32 %f1355, %f1354, %f3064, %f1352; - fma.rn.f32 %f1356, %f1354, %f3065, %f1355; - mul.f32 %f1357, %f1356, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1358, %f1357; - add.f32 %f1359, %f1354, 0f00000000; - ex2.approx.f32 %f1360, %f1359; - mul.f32 %f1361, %f1358, %f1360; - setp.lt.f32 %p115, %f1352, 0fC2D20000; - selp.f32 %f1362, 0f00000000, %f1361, %p115; - setp.gt.f32 %p116, %f1352, 0f42D20000; - selp.f32 %f3289, 0f7F800000, %f1362, %p116; - setp.eq.f32 %p117, %f3289, 0f7F800000; - @%p117 bra BB3_75; - - fma.rn.f32 %f3289, %f3289, %f228, %f3289; - -BB3_75: - setp.lt.f32 %p118, %f221, 0f00000000; - and.pred %p6, %p118, %p55; - mov.b32 %r176, %f3289; - xor.b32 %r177, %r176, -2147483648; - mov.b32 %f1363, %r177; - selp.f32 %f3291, %f1363, %f3289, %p6; - setp.eq.f32 %p120, %f221, 0f00000000; - @%p120 bra BB3_78; - bra.uni BB3_76; - -BB3_78: - add.f32 %f1366, %f221, %f221; - selp.f32 %f3291, %f1366, 0f00000000, %p55; - bra.uni BB3_79; - -BB3_76: - setp.geu.f32 %p121, %f221, 0f00000000; - @%p121 bra BB3_79; - - cvt.rzi.f32.f32 %f1365, %f895; - setp.neu.f32 %p122, %f1365, 0f40000000; - selp.f32 %f3291, 0f7FFFFFFF, %f3291, %p122; - -BB3_79: - abs.f32 %f2986, %f221; - add.f32 %f1367, %f2986, 0f40000000; - mov.b32 %r37, %f1367; - setp.lt.s32 %p124, %r37, 2139095040; - @%p124 bra BB3_84; - - abs.f32 %f3082, %f221; - setp.gtu.f32 %p125, %f3082, 0f7F800000; - @%p125 bra BB3_83; - bra.uni BB3_81; - -BB3_83: - add.f32 %f3291, %f221, 0f40000000; - bra.uni BB3_84; - -BB3_81: - abs.f32 %f3083, %f221; - setp.neu.f32 %p126, %f3083, 0f7F800000; - @%p126 bra BB3_84; - - selp.f32 %f3291, 0fFF800000, 0f7F800000, %p6; - -BB3_84: - cvt.rn.f32.s32 %f3087, %r322; - sub.f32 %f3086, %f3087, %f3320; - cvt.rn.f32.s32 %f2995, %r322; - add.f32 %f2994, %f2995, 0f3F800000; - sub.f32 %f2993, %f2994, %f3320; - mov.f32 %f2992, 0f00000000; - mov.f32 %f2991, 0f3DAAAABD; - mov.f32 %f2990, 0f3C4CAF63; - mov.f32 %f2989, 0f3B18F0FE; - mov.f32 %f2988, 0fB5BFBE8E; - mov.f32 %f2987, 0fBF317200; - mul.f32 %f1370, %f3291, 0fBF000000; - setp.eq.f32 %p127, %f221, 0f3F800000; - selp.f32 %f1371, 0fBF000000, %f1370, %p127; - mul.f32 %f1372, %f1371, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1373, %f1372; - fma.rn.f32 %f1375, %f1373, %f2987, %f1371; - fma.rn.f32 %f1377, %f1373, %f2988, %f1375; - mul.f32 %f1378, %f1377, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1379, %f1378; - add.f32 %f1380, %f1373, 0f00000000; - ex2.approx.f32 %f1381, %f1380; - mul.f32 %f1382, %f1379, %f1381; - setp.lt.f32 %p128, %f1371, 0fC2D20000; - selp.f32 %f1383, 0f00000000, %f1382, %p128; - setp.gt.f32 %p129, %f1371, 0f42D20000; - selp.f32 %f1384, 0f7F800000, %f1383, %p129; - sub.f32 %f1385, %f220, %f1384; - mul.f32 %f1386, %f97, %f1385; - mul.f32 %f239, %f165, %f1386; - mul.f32 %f1387, %f3086, %f1384; - mul.f32 %f1388, %f2993, %f220; - sub.f32 %f1389, %f1388, %f1387; - mul.f32 %f1390, %f1389, %f103; - mul.f32 %f240, %f165, %f1390; - // inline asm - rcp.approx.ftz.f32 %f1368,%f132; - // inline asm - mul.f32 %f1391, %f1368, %f133; - mul.f32 %f1392, %f1391, %f1391; - fma.rn.f32 %f1395, %f2989, %f1392, %f2990; - fma.rn.f32 %f1397, %f1395, %f1392, %f2991; - mul.rn.f32 %f1398, %f1397, %f1392; - mul.rn.f32 %f1399, %f1398, %f1391; - sub.f32 %f1400, %f131, %f1391; - neg.f32 %f1401, %f1391; - add.f32 %f1402, %f1400, %f1400; - fma.rn.f32 %f1403, %f1401, %f131, %f1402; - mul.rn.f32 %f1404, %f1368, %f1403; - add.f32 %f1405, %f1399, %f1391; - sub.f32 %f1406, %f1391, %f1405; - add.f32 %f1407, %f1399, %f1406; - add.f32 %f1408, %f1404, %f1407; - add.f32 %f1409, %f1405, %f1408; - sub.f32 %f1410, %f1405, %f1409; - add.f32 %f1411, %f1408, %f1410; - add.f32 %f1412, %f134, %f1409; - sub.f32 %f1413, %f134, %f1412; - add.f32 %f1414, %f1409, %f1413; + mul.rn.f32 %f1149, %f1120, %f2863; + mul.rn.f32 %f1151, %f1120, %f2864; + add.f32 %f1152, %f1149, %f1145; + sub.f32 %f1153, %f1149, %f1152; + add.f32 %f1154, %f1145, %f1153; + add.f32 %f1155, %f1147, %f1154; + add.f32 %f1156, %f1151, %f1155; + add.f32 %f1157, %f1152, %f1156; + sub.f32 %f1158, %f1152, %f1157; + add.f32 %f1159, %f1156, %f1158; + mul.rn.f32 %f1160, %f622, %f1157; + neg.f32 %f1161, %f1160; + fma.rn.f32 %f1162, %f622, %f1157, %f1161; + fma.rn.f32 %f1163, %f622, %f1159, %f1162; + fma.rn.f32 %f1165, %f2867, %f1157, %f1163; + add.rn.f32 %f1166, %f1160, %f1165; + neg.f32 %f1167, %f1166; + add.rn.f32 %f1168, %f1160, %f1167; + add.rn.f32 %f1169, %f1168, %f1165; + mov.b32 %r331, %f1166; + setp.eq.s32 %p218, %r331, 1118925336; + add.s32 %r332, %r331, -1; + mov.b32 %f1170, %r332; + add.f32 %f1171, %f1169, 0f37000000; + selp.f32 %f166, %f1171, %f1169, %p218; + selp.f32 %f1172, %f1170, %f1166, %p218; + mul.rn.f32 %f1173, %f1172, %f2855; + cvt.rzi.f32.f32 %f1174, %f1173; + abs.f32 %f1175, %f1174; + setp.gt.f32 %p219, %f1175, 0f42FC0000; + mov.b32 %r333, %f1174; + and.b32 %r334, %r333, -2147483648; + or.b32 %r335, %r334, 1123811328; + mov.b32 %f1176, %r335; + selp.f32 %f1177, %f1176, %f1174, %p219; + fma.rn.f32 %f1179, %f1177, %f2865, %f1172; + fma.rn.f32 %f1181, %f1177, %f2866, %f1179; + mul.f32 %f1182, %f1181, 0f3FB8AA3B; + add.f32 %f1183, %f1177, 0f4B40007F; + mov.b32 %r336, %f1183; + shl.b32 %r337, %r336, 23; + mov.b32 %f1184, %r337; + ex2.approx.ftz.f32 %f1185, %f1182; + mul.f32 %f167, %f1185, %f1184; + setp.eq.f32 %p220, %f167, 0f7F800000; + mov.f32 %f3016, 0f7F800000; + @%p220 bra $L__BB3_100; + + fma.rn.f32 %f3016, %f167, %f166, %f167; + +$L__BB3_100: + setp.lt.f32 %p221, %f164, 0f00000000; + and.pred %p25, %p221, %p110; + setp.eq.f32 %p223, %f164, 0f00000000; + @%p223 bra $L__BB3_104; + bra.uni $L__BB3_101; + +$L__BB3_104: + add.f32 %f1190, %f164, %f164; + selp.f32 %f3018, %f1190, 0f00000000, %p110; + bra.uni $L__BB3_105; + +$L__BB3_101: + mov.b32 %r338, %f3016; + xor.b32 %r339, %r338, -2147483648; + mov.b32 %f1186, %r339; + selp.f32 %f3018, %f1186, %f3016, %p25; + setp.geu.f32 %p224, %f164, 0f00000000; + @%p224 bra $L__BB3_105; + + cvt.rzi.f32.f32 %f1188, %f622; + setp.eq.f32 %p225, %f1188, 0f40000000; + @%p225 bra $L__BB3_105; + + mov.f32 %f3018, 0f7FFFFFFF; + +$L__BB3_105: + abs.f32 %f2940, %f164; + add.f32 %f1191, %f2940, 0f40000000; + mov.b32 %r340, %f1191; + setp.lt.s32 %p227, %r340, 2139095040; + @%p227 bra $L__BB3_110; + + abs.f32 %f2941, %f164; + setp.gtu.f32 %p228, %f2941, 0f7F800000; + @%p228 bra $L__BB3_109; + bra.uni $L__BB3_107; + +$L__BB3_109: + add.f32 %f3018, %f164, 0f40000000; + bra.uni $L__BB3_110; + +$L__BB3_107: + abs.f32 %f2942, %f164; + setp.neu.f32 %p229, %f2942, 0f7F800000; + @%p229 bra $L__BB3_110; + + selp.f32 %f3018, 0fFF800000, 0f7F800000, %p25; + +$L__BB3_110: + mov.f32 %f2873, 0f32A57060; + mov.f32 %f2872, 0f4B400001; + mov.f32 %f2871, 0f437C0000; + mov.f32 %f2870, 0f3BBB989D; + mov.f32 %f2869, 0f3FB8AA3B; + mov.f32 %f2868, 0f3F000000; + mul.f32 %f1192, %f3018, 0fBF000000; + setp.eq.f32 %p230, %f164, 0f3F800000; + selp.f32 %f1193, 0fBF000000, %f1192, %p230; + fma.rn.f32 %f1196, %f1193, %f2870, %f2868; + cvt.sat.f32.f32 %f1199, %f1196; + fma.rm.f32 %f1201, %f1199, %f2871, %f2872; + add.f32 %f1202, %f1201, 0fCB40007F; + neg.f32 %f1203, %f1202; + fma.rn.f32 %f1204, %f1193, %f2869, %f1203; + fma.rn.f32 %f1206, %f1193, %f2873, %f1204; + mov.b32 %r341, %f1201; + shl.b32 %r342, %r341, 23; + mov.b32 %f1207, %r342; + ex2.approx.ftz.f32 %f1208, %f1206; + mul.f32 %f176, %f1208, %f1207; + sub.f32 %f1209, %f163, %f176; + mul.f32 %f1210, %f61, %f1209; + mul.f32 %f177, %f120, %f1210; + not.pred %p231, %p7; + mov.f64 %fd589, %fd37; + @%p231 bra $L__BB3_112; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r343}, %fd37; + } + xor.b32 %r344, %r343, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r345, %temp}, %fd37; + } + mov.b64 %fd589, {%r345, %r344}; + +$L__BB3_112: + setp.eq.f32 %p232, %f3043, 0f00000000; + @%p232 bra $L__BB3_116; + bra.uni $L__BB3_113; + +$L__BB3_116: + mov.u32 %r346, 0; + selp.b32 %r348, %r60, 0, %p121; + or.b32 %r349, %r348, 2146435072; + selp.b32 %r350, %r349, %r348, %p123; + mov.b64 %fd589, {%r346, %r350}; + bra.uni $L__BB3_117; + +$L__BB3_113: + setp.gt.s32 %p233, %r60, -1; + @%p233 bra $L__BB3_117; + + cvt.rzi.f64.f64 %fd364, %fd339; + setp.eq.f64 %p234, %fd364, 0d4008000000000000; + @%p234 bra $L__BB3_117; + + mov.f64 %fd589, 0dFFF8000000000000; + +$L__BB3_117: + cvt.f64.f32 %fd553, %f3043; + add.f64 %fd552, %fd553, 0d4008000000000000; + selp.f64 %fd590, %fd589, %fd552, %p130; + @%p17 bra $L__BB3_122; + + setp.eq.s32 %p238, %r57, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r351, %temp}, %fd339; + } + setp.eq.s32 %p239, %r351, 0; + and.pred %p240, %p238, %p239; + @%p240 bra $L__BB3_121; + bra.uni $L__BB3_119; + +$L__BB3_121: + mov.u32 %r358, 0; + mov.b64 %fd590, {%r358, %r62}; + bra.uni $L__BB3_122; + +$L__BB3_119: + cvt.f64.f32 %fd554, %f3043; + and.b32 %r352, %r60, 2147483647; + setp.ne.s32 %p241, %r352, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r353, %temp}, %fd554; + } + setp.ne.s32 %p242, %r353, 0; + or.pred %p243, %p241, %p242; + mov.f64 %fd590, %fd589; + @%p243 bra $L__BB3_122; + + setp.ne.s32 %p244, %r57, 1071644672; + and.pred %p245, %p244, %p7; + or.b32 %r355, %r59, -2147483648; + selp.b32 %r356, %r355, %r59, %p245; + mov.u32 %r357, 0; + mov.b64 %fd590, {%r357, %r356}; + +$L__BB3_122: + cvt.rn.f32.s32 %f2936, %r853; + add.f32 %f2935, %f2936, 0f3F000000; + sub.f32 %f2934, %f2935, %f3047; + setp.eq.f32 %p246, %f3043, 0f3F800000; + selp.f64 %fd370, 0d3FF0000000000000, %fd590, %p246; + div.rn.f64 %fd371, %fd34, %fd370; + mul.f32 %f1212, %f128, %f176; + mul.f32 %f1213, %f2934, %f163; + sub.f32 %f1214, %f1213, %f1212; + cvt.f64.f32 %fd372, %f1214; + mul.f64 %fd373, %fd371, %fd372; + cvt.f64.f32 %fd374, %f120; + mul.f64 %fd375, %fd373, %fd374; + cvt.rn.f32.f64 %f178, %fd375; + setp.eq.f32 %p247, %f99, 0f7F800000; + mov.f32 %f3019, 0f7F800000; + @%p247 bra $L__BB3_124; + + fma.rn.f32 %f3019, %f99, %f98, %f99; + +$L__BB3_124: + setp.geu.f32 %p736, %f96, 0f00000000; + setp.lt.f32 %p735, %f96, 0f00000000; + and.pred %p734, %p735, %p110; + mov.b32 %r359, %f3019; + xor.b32 %r360, %r359, -2147483648; + mov.b32 %f1215, %r360; + selp.f32 %f181, %f1215, %f3019, %p734; + add.f32 %f1216, %f96, %f96; + selp.f32 %f1217, %f1216, 0f00000000, %p110; + setp.eq.f32 %p249, %f96, 0f00000000; + selp.f32 %f3020, %f1217, %f181, %p249; + @%p736 bra $L__BB3_127; + + cvt.rzi.f32.f32 %f1219, %f622; + setp.eq.f32 %p250, %f1219, 0f40000000; + mov.f32 %f3020, %f181; + @%p250 bra $L__BB3_127; + + mov.f32 %f3020, 0f7FFFFFFF; + +$L__BB3_127: + abs.f32 %f2880, %f96; + setp.lt.f32 %p738, %f96, 0f00000000; + and.pred %p737, %p738, %p110; + mov.f32 %f2879, 0f32A57060; + mov.f32 %f2878, 0f4B400001; + mov.f32 %f2877, 0f437C0000; + mov.f32 %f2876, 0f3BBB989D; + mov.f32 %f2875, 0f3FB8AA3B; + mov.f32 %f2874, 0f3F000000; + add.f32 %f1222, %f2880, 0f40000000; + mov.b32 %r361, %f1222; + setp.gt.s32 %p251, %r361, 2139095039; + add.f32 %f1223, %f96, 0f40000000; + setp.gtu.f32 %p252, %f2880, 0f7F800000; + mov.f32 %f3021, 0f7F800000; + selp.f32 %f1224, %f1223, %f3020, %p252; + selp.f32 %f1225, 0fFF800000, 0f7F800000, %p737; + setp.neu.f32 %p253, %f2880, 0f7F800000; + selp.f32 %f1226, %f1224, %f1225, %p253; + selp.f32 %f1227, %f1226, %f3020, %p251; + mul.f32 %f1228, %f1227, 0fBF000000; + setp.eq.f32 %p254, %f96, 0f3F800000; + selp.f32 %f1229, 0fBF000000, %f1228, %p254; + fma.rn.f32 %f1232, %f1229, %f2876, %f2874; + cvt.sat.f32.f32 %f1235, %f1232; + fma.rm.f32 %f1237, %f1235, %f2877, %f2878; + add.f32 %f1238, %f1237, 0fCB40007F; + neg.f32 %f1239, %f1238; + fma.rn.f32 %f1240, %f1229, %f2875, %f1239; + fma.rn.f32 %f1242, %f1229, %f2879, %f1240; + mov.b32 %r362, %f1237; + shl.b32 %r363, %r362, 23; + mov.b32 %f1243, %r363; + ex2.approx.ftz.f32 %f1244, %f1242; + mul.f32 %f184, %f1244, %f1243; + setp.eq.f32 %p255, %f103, 0f7F800000; + @%p255 bra $L__BB3_129; + + fma.rn.f32 %f3021, %f103, %f102, %f103; + +$L__BB3_129: + setp.geu.f32 %p741, %f100, 0f00000000; + setp.lt.f32 %p740, %f100, 0f00000000; + and.pred %p739, %p740, %p110; + mov.b32 %r364, %f3021; + xor.b32 %r365, %r364, -2147483648; + mov.b32 %f1245, %r365; + selp.f32 %f187, %f1245, %f3021, %p739; + add.f32 %f1246, %f100, %f100; + selp.f32 %f1247, %f1246, 0f00000000, %p110; + setp.eq.f32 %p257, %f100, 0f00000000; + selp.f32 %f3022, %f1247, %f187, %p257; + @%p741 bra $L__BB3_132; + + cvt.rzi.f32.f32 %f1249, %f622; + setp.eq.f32 %p258, %f1249, 0f40000000; + mov.f32 %f3022, %f187; + @%p258 bra $L__BB3_132; + + mov.f32 %f3022, 0f7FFFFFFF; + +$L__BB3_132: + cvt.rn.f32.s32 %f2889, %r852; + sub.f32 %f2888, %f2889, %f3048; + abs.f32 %f2887, %f100; + setp.lt.f32 %p743, %f100, 0f00000000; + and.pred %p742, %p743, %p110; + mov.f32 %f2886, 0f32A57060; + mov.f32 %f2885, 0f4B400001; + mov.f32 %f2884, 0f437C0000; + mov.f32 %f2883, 0f3BBB989D; + mov.f32 %f2882, 0f3FB8AA3B; + mov.f32 %f2881, 0f3F000000; + add.f32 %f1251, %f2887, 0f40000000; + mov.b32 %r366, %f1251; + setp.gt.s32 %p259, %r366, 2139095039; + add.f32 %f1252, %f100, 0f40000000; + setp.gtu.f32 %p260, %f2887, 0f7F800000; + selp.f32 %f1253, %f1252, %f3022, %p260; + selp.f32 %f1254, 0fFF800000, 0f7F800000, %p742; + setp.neu.f32 %p261, %f2887, 0f7F800000; + selp.f32 %f1255, %f1253, %f1254, %p261; + selp.f32 %f1256, %f1255, %f3022, %p259; + mul.f32 %f1257, %f1256, 0fBF000000; + setp.eq.f32 %p262, %f100, 0f3F800000; + selp.f32 %f1258, 0fBF000000, %f1257, %p262; + fma.rn.f32 %f1261, %f1258, %f2883, %f2881; + cvt.sat.f32.f32 %f1264, %f1261; + fma.rm.f32 %f1266, %f1264, %f2884, %f2885; + add.f32 %f1267, %f1266, 0fCB40007F; + neg.f32 %f1268, %f1267; + fma.rn.f32 %f1269, %f1258, %f2882, %f1268; + fma.rn.f32 %f1271, %f1258, %f2886, %f1269; + mov.b32 %r367, %f1266; + shl.b32 %r368, %r367, 23; + mov.b32 %f1272, %r368; + ex2.approx.ftz.f32 %f1273, %f1271; + mul.f32 %f190, %f1273, %f1272; + add.f32 %f1274, %f2888, 0f3F800000; + mul.f32 %f1275, %f1274, %f184; + mul.f32 %f1276, %f2888, %f190; + sub.f32 %f1277, %f1275, %f1276; + mul.f32 %f1278, %f62, %f1277; + mul.f32 %f191, %f133, %f1278; + not.pred %p263, %p10; + mov.f64 %fd592, %fd38; + @%p263 bra $L__BB3_134; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r369}, %fd38; + } + xor.b32 %r370, %r369, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r371, %temp}, %fd38; + } + mov.b64 %fd592, {%r371, %r370}; + +$L__BB3_134: + setp.eq.f32 %p744, %f3044, 0f00000000; + @%p744 bra $L__BB3_138; + bra.uni $L__BB3_135; + +$L__BB3_138: + mov.u32 %r372, 0; + mov.b64 %fd592, {%r372, %r63}; + bra.uni $L__BB3_139; + +$L__BB3_135: + setp.gt.s32 %p265, %r53, -1; + @%p265 bra $L__BB3_139; + + cvt.rzi.f64.f64 %fd377, %fd345; + setp.eq.f64 %p266, %fd377, 0d4014000000000000; + @%p266 bra $L__BB3_139; + + mov.f64 %fd592, 0dFFF8000000000000; + +$L__BB3_139: + cvt.f64.f32 %fd556, %f3044; + add.f64 %fd555, %fd556, 0d4014000000000000; + selp.f64 %fd593, %fd592, %fd555, %p146; + @%p20 bra $L__BB3_144; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r838}, %fd345; + } + and.b32 %r837, %r838, 2147483647; + setp.eq.s32 %p268, %r837, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r373, %temp}, %fd345; + } + setp.eq.s32 %p269, %r373, 0; + and.pred %p270, %p268, %p269; + @%p270 bra $L__BB3_143; + bra.uni $L__BB3_141; + +$L__BB3_143: + mov.u32 %r377, 0; + mov.b64 %fd593, {%r377, %r67}; + bra.uni $L__BB3_144; + +$L__BB3_141: + cvt.f64.f32 %fd557, %f3044; + and.b32 %r374, %r53, 2147483647; + setp.ne.s32 %p271, %r374, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r375, %temp}, %fd557; + } + setp.ne.s32 %p272, %r375, 0; + or.pred %p273, %p271, %p272; + mov.f64 %fd593, %fd592; + @%p273 bra $L__BB3_144; + + mov.u32 %r376, 0; + mov.b64 %fd593, {%r376, %r69}; + +$L__BB3_144: + not.pred %p274, %p11; + mov.f64 %fd595, %fd40; + @%p274 bra $L__BB3_146; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r378}, %fd40; + } + xor.b32 %r379, %r378, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r380, %temp}, %fd40; + } + mov.b64 %fd595, {%r380, %r379}; + +$L__BB3_146: + cvt.rn.f32.s32 %f2892, %r852; + sub.f32 %f2891, %f2892, %f3048; + add.f32 %f2890, %f2891, 0f3F000000; + setp.eq.f32 %p275, %f2890, 0f00000000; + @%p275 bra $L__BB3_150; + bra.uni $L__BB3_147; + +$L__BB3_150: + mov.u32 %r381, 0; + selp.b32 %r383, %r65, 0, %p121; + or.b32 %r384, %r383, 2146435072; + selp.b32 %r385, %r384, %r383, %p123; + mov.b64 %fd595, {%r381, %r385}; + bra.uni $L__BB3_151; + +$L__BB3_147: + setp.gt.s32 %p276, %r65, -1; + @%p276 bra $L__BB3_151; + + cvt.rzi.f64.f64 %fd384, %fd339; + setp.eq.f64 %p277, %fd384, 0d4008000000000000; + @%p277 bra $L__BB3_151; + + mov.f64 %fd595, 0dFFF8000000000000; + +$L__BB3_151: + selp.f64 %fd596, %fd595, %fd41, %p148; + @%p21 bra $L__BB3_156; + + setp.eq.s32 %p281, %r57, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r386, %temp}, %fd339; + } + setp.eq.s32 %p282, %r386, 0; + and.pred %p283, %p281, %p282; + @%p283 bra $L__BB3_155; + bra.uni $L__BB3_153; + +$L__BB3_155: + mov.u32 %r393, 0; + mov.b64 %fd596, {%r393, %r71}; + bra.uni $L__BB3_156; + +$L__BB3_153: + cvt.rn.f32.s32 %f2895, %r852; + sub.f32 %f2894, %f2895, %f3048; + add.f32 %f2893, %f2894, 0f3F000000; + cvt.f64.f32 %fd558, %f2893; + and.b32 %r387, %r65, 2147483647; + setp.ne.s32 %p284, %r387, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r388, %temp}, %fd558; + } + setp.ne.s32 %p285, %r388, 0; + or.pred %p286, %p284, %p285; + mov.f64 %fd596, %fd595; + @%p286 bra $L__BB3_156; + + setp.ne.s32 %p287, %r57, 1071644672; + and.pred %p288, %p287, %p11; + or.b32 %r390, %r59, -2147483648; + selp.b32 %r391, %r390, %r59, %p288; + mov.u32 %r392, 0; + mov.b64 %fd596, {%r392, %r391}; + +$L__BB3_156: + cvt.rn.f32.s32 %f2898, %r852; + sub.f32 %f2897, %f2898, %f3048; + add.f32 %f2896, %f2897, 0f3F000000; + setp.eq.f32 %p289, %f2896, 0f3F800000; + selp.f64 %fd387, 0d3FF0000000000000, %fd596, %p289; + cvt.f64.f32 %fd388, %f184; + mul.f64 %fd79, %fd387, %fd388; + not.pred %p290, %p12; + mov.f64 %fd598, %fd43; + @%p290 bra $L__BB3_158; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r394}, %fd43; + } + xor.b32 %r395, %r394, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r396, %temp}, %fd43; + } + mov.b64 %fd598, {%r396, %r395}; + +$L__BB3_158: + setp.eq.f32 %p291, %f82, 0f00000000; + @%p291 bra $L__BB3_162; + bra.uni $L__BB3_159; + +$L__BB3_162: + mov.u32 %r397, 0; + selp.b32 %r399, %r70, 0, %p121; + or.b32 %r400, %r399, 2146435072; + selp.b32 %r401, %r400, %r399, %p123; + mov.b64 %fd598, {%r397, %r401}; + bra.uni $L__BB3_163; + +$L__BB3_159: + setp.gt.s32 %p292, %r70, -1; + @%p292 bra $L__BB3_163; + + cvt.rzi.f64.f64 %fd390, %fd339; + setp.eq.f64 %p293, %fd390, 0d4008000000000000; + @%p293 bra $L__BB3_163; + + mov.f64 %fd598, 0dFFF8000000000000; + +$L__BB3_163: + selp.f64 %fd599, %fd598, %fd44, %p156; + @%p22 bra $L__BB3_168; + + setp.eq.s32 %p297, %r57, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r402, %temp}, %fd339; + } + setp.eq.s32 %p298, %r402, 0; + and.pred %p299, %p297, %p298; + @%p299 bra $L__BB3_167; + bra.uni $L__BB3_165; + +$L__BB3_167: + mov.u32 %r409, 0; + mov.b64 %fd599, {%r409, %r73}; + bra.uni $L__BB3_168; + +$L__BB3_165: + cvt.rn.f32.s32 %f2901, %r852; + sub.f32 %f2900, %f2901, %f3048; + add.f32 %f2899, %f2900, 0fBF000000; + cvt.f64.f32 %fd559, %f2899; + and.b32 %r403, %r70, 2147483647; + setp.ne.s32 %p300, %r403, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r404, %temp}, %fd559; + } + setp.ne.s32 %p301, %r404, 0; + or.pred %p302, %p300, %p301; + mov.f64 %fd599, %fd598; + @%p302 bra $L__BB3_168; + + setp.ne.s32 %p303, %r57, 1071644672; + and.pred %p304, %p303, %p12; + or.b32 %r406, %r59, -2147483648; + selp.b32 %r407, %r406, %r59, %p304; + mov.u32 %r408, 0; + mov.b64 %fd599, {%r408, %r407}; + +$L__BB3_168: + cvt.f64.f32 %fd560, %f133; + setp.eq.f32 %p745, %f3044, 0f3F800000; + cvt.rn.f32.s32 %f2937, %r853; + mov.f32 %f2910, 0f00000000; + mov.f32 %f2909, 0f3102E308; + mov.f32 %f2908, 0fBF317218; + mov.f32 %f2907, 0f35BFBE8E; + mov.f32 %f2906, 0f3F317200; + mov.f32 %f2905, 0f3DAAAABD; + mov.f32 %f2904, 0f3C4CAF63; + mov.f32 %f2903, 0f3B18F0FE; + mov.f32 %f2902, 0f3FB8AA3B; + setp.eq.f32 %p305, %f82, 0f3F800000; + selp.f64 %fd393, 0d3FF0000000000000, %fd599, %p305; + cvt.f64.f32 %fd394, %f190; + mul.f64 %fd395, %fd393, %fd394; + sub.f64 %fd396, %fd79, %fd395; + selp.f64 %fd397, 0d3FF0000000000000, %fd593, %p745; + div.rn.f64 %fd398, %fd35, %fd397; + mul.f64 %fd399, %fd398, %fd396; + mul.f64 %fd400, %fd399, %fd560; + mul.f32 %f1280, %f63, %f191; + cvt.f64.f32 %fd401, %f1280; + sub.f64 %fd402, %fd401, %fd400; + cvt.rn.f32.f64 %f192, %fd402; + add.f32 %f1281, %f2937, 0f3F800000; + sub.f32 %f1282, %f1281, %f3047; + div.rn.f32 %f193, %f1282, %f3043; + abs.f32 %f194, %f193; + setp.lt.f32 %p307, %f194, 0f00800000; + mul.f32 %f1283, %f194, 0f4B800000; + selp.f32 %f1284, %f1283, %f194, %p307; + selp.f32 %f1285, 0fC3170000, 0fC2FE0000, %p307; + mov.b32 %r410, %f1284; + and.b32 %r411, %r410, 8388607; + or.b32 %r412, %r411, 1065353216; + mov.b32 %f1286, %r412; + shr.u32 %r413, %r410, 23; + cvt.rn.f32.u32 %f1287, %r413; + add.f32 %f1288, %f1285, %f1287; + setp.gt.f32 %p308, %f1286, 0f3FB504F3; + mul.f32 %f1289, %f1286, 0f3F000000; + add.f32 %f1290, %f1288, 0f3F800000; + selp.f32 %f1291, %f1290, %f1288, %p308; + selp.f32 %f1292, %f1289, %f1286, %p308; + add.f32 %f1293, %f1292, 0fBF800000; + add.f32 %f1294, %f1292, 0f3F800000; + rcp.approx.ftz.f32 %f1295, %f1294; + add.f32 %f1296, %f1293, %f1293; + mul.f32 %f1298, %f1296, %f1295; + mul.f32 %f1299, %f1298, %f1298; + fma.rn.f32 %f1302, %f2903, %f1299, %f2904; + fma.rn.f32 %f1304, %f1302, %f1299, %f2905; + mul.rn.f32 %f1305, %f1304, %f1299; + mul.rn.f32 %f1306, %f1305, %f1298; + sub.f32 %f1307, %f1293, %f1298; + add.f32 %f1308, %f1307, %f1307; + neg.f32 %f1309, %f1298; + fma.rn.f32 %f1310, %f1309, %f1293, %f1308; + mul.rn.f32 %f1311, %f1295, %f1310; + add.f32 %f1312, %f1306, %f1298; + sub.f32 %f1313, %f1298, %f1312; + add.f32 %f1314, %f1306, %f1313; + add.f32 %f1315, %f1311, %f1314; + add.f32 %f1316, %f1312, %f1315; + sub.f32 %f1317, %f1312, %f1316; + add.f32 %f1318, %f1315, %f1317; + mul.rn.f32 %f1320, %f1291, %f2906; + mul.rn.f32 %f1322, %f1291, %f2907; + add.f32 %f1323, %f1320, %f1316; + sub.f32 %f1324, %f1320, %f1323; + add.f32 %f1325, %f1316, %f1324; + add.f32 %f1326, %f1318, %f1325; + add.f32 %f1327, %f1322, %f1326; + add.f32 %f1328, %f1323, %f1327; + sub.f32 %f1329, %f1323, %f1328; + add.f32 %f1330, %f1327, %f1329; + mul.rn.f32 %f1331, %f622, %f1328; + neg.f32 %f1332, %f1331; + fma.rn.f32 %f1333, %f622, %f1328, %f1332; + fma.rn.f32 %f1334, %f622, %f1330, %f1333; + fma.rn.f32 %f1336, %f2910, %f1328, %f1334; + add.rn.f32 %f1337, %f1331, %f1336; + neg.f32 %f1338, %f1337; + add.rn.f32 %f1339, %f1331, %f1338; + add.rn.f32 %f1340, %f1339, %f1336; + mov.b32 %r414, %f1337; + setp.eq.s32 %p309, %r414, 1118925336; + add.s32 %r415, %r414, -1; + mov.b32 %f1341, %r415; + add.f32 %f1342, %f1340, 0f37000000; + selp.f32 %f195, %f1342, %f1340, %p309; + selp.f32 %f1343, %f1341, %f1337, %p309; + mul.rn.f32 %f1345, %f1343, %f2902; + cvt.rzi.f32.f32 %f1346, %f1345; + abs.f32 %f1347, %f1346; + setp.gt.f32 %p310, %f1347, 0f42FC0000; + mov.b32 %r416, %f1346; + and.b32 %r417, %r416, -2147483648; + or.b32 %r418, %r417, 1123811328; + mov.b32 %f1348, %r418; + selp.f32 %f1349, %f1348, %f1346, %p310; + fma.rn.f32 %f1351, %f1349, %f2908, %f1343; + fma.rn.f32 %f1353, %f1349, %f2909, %f1351; + mul.f32 %f1354, %f1353, 0f3FB8AA3B; + add.f32 %f1355, %f1349, 0f4B40007F; + mov.b32 %r419, %f1355; + shl.b32 %r420, %r419, 23; + mov.b32 %f1356, %r420; + ex2.approx.ftz.f32 %f1357, %f1354; + mul.f32 %f196, %f1357, %f1356; + setp.eq.f32 %p311, %f196, 0f7F800000; + mov.f32 %f3023, 0f7F800000; + @%p311 bra $L__BB3_170; + + fma.rn.f32 %f3023, %f196, %f195, %f196; + +$L__BB3_170: + setp.lt.f32 %p312, %f193, 0f00000000; + and.pred %p26, %p312, %p110; + setp.eq.f32 %p314, %f193, 0f00000000; + @%p314 bra $L__BB3_174; + bra.uni $L__BB3_171; + +$L__BB3_174: + add.f32 %f1362, %f193, %f193; + selp.f32 %f3025, %f1362, 0f00000000, %p110; + bra.uni $L__BB3_175; + +$L__BB3_171: + mov.b32 %r421, %f3023; + xor.b32 %r422, %r421, -2147483648; + mov.b32 %f1358, %r422; + selp.f32 %f3025, %f1358, %f3023, %p26; + setp.geu.f32 %p315, %f193, 0f00000000; + @%p315 bra $L__BB3_175; + + cvt.rzi.f32.f32 %f1360, %f622; + setp.eq.f32 %p316, %f1360, 0f40000000; + @%p316 bra $L__BB3_175; + + mov.f32 %f3025, 0f7FFFFFFF; + +$L__BB3_175: + abs.f32 %f2943, %f193; + add.f32 %f1363, %f2943, 0f40000000; + mov.b32 %r423, %f1363; + setp.lt.s32 %p318, %r423, 2139095040; + @%p318 bra $L__BB3_180; + + abs.f32 %f2944, %f193; + setp.gtu.f32 %p319, %f2944, 0f7F800000; + @%p319 bra $L__BB3_179; + bra.uni $L__BB3_177; + +$L__BB3_179: + add.f32 %f3025, %f193, 0f40000000; + bra.uni $L__BB3_180; + +$L__BB3_177: + abs.f32 %f2945, %f193; + setp.neu.f32 %p320, %f2945, 0f7F800000; + @%p320 bra $L__BB3_180; + + selp.f32 %f3025, 0fFF800000, 0f7F800000, %p26; + +$L__BB3_180: + mov.f32 %f2926, 0f00000000; + mov.f32 %f2925, 0f3102E308; + mov.f32 %f2924, 0fBF317218; + mov.f32 %f2923, 0f35BFBE8E; + mov.f32 %f2922, 0f3F317200; + mov.f32 %f2921, 0f3DAAAABD; + mov.f32 %f2920, 0f3C4CAF63; + mov.f32 %f2919, 0f3B18F0FE; + mov.f32 %f2918, 0f32A57060; + mov.f32 %f2917, 0f4B400001; + mov.f32 %f2916, 0f437C0000; + mov.f32 %f2915, 0f3BBB989D; + mov.f32 %f2914, 0f3FB8AA3B; + mov.f32 %f2913, 0f3F000000; + cvt.rn.f32.s32 %f2912, %r853; + sub.f32 %f2911, %f2912, %f3047; + mul.f32 %f1365, %f3025, 0fBF000000; + setp.eq.f32 %p321, %f193, 0f3F800000; + selp.f32 %f1366, 0fBF000000, %f1365, %p321; + fma.rn.f32 %f1369, %f1366, %f2915, %f2913; + cvt.sat.f32.f32 %f1372, %f1369; + fma.rm.f32 %f1374, %f1372, %f2916, %f2917; + add.f32 %f1375, %f1374, 0fCB40007F; + neg.f32 %f1376, %f1375; + fma.rn.f32 %f1377, %f1366, %f2914, %f1376; + fma.rn.f32 %f1379, %f1366, %f2918, %f1377; + mov.b32 %r424, %f1374; + shl.b32 %r425, %r424, 23; + mov.b32 %f1380, %r425; + ex2.approx.ftz.f32 %f1381, %f1379; + mul.f32 %f205, %f1381, %f1380; + div.rn.f32 %f206, %f2911, %f3043; + abs.f32 %f207, %f206; + setp.lt.f32 %p322, %f207, 0f00800000; + mul.f32 %f1382, %f207, 0f4B800000; + selp.f32 %f1383, %f1382, %f207, %p322; + selp.f32 %f1384, 0fC3170000, 0fC2FE0000, %p322; + mov.b32 %r426, %f1383; + and.b32 %r427, %r426, 8388607; + or.b32 %r428, %r427, 1065353216; + mov.b32 %f1385, %r428; + shr.u32 %r429, %r426, 23; + cvt.rn.f32.u32 %f1386, %r429; + add.f32 %f1387, %f1384, %f1386; + setp.gt.f32 %p323, %f1385, 0f3FB504F3; + mul.f32 %f1388, %f1385, 0f3F000000; + add.f32 %f1389, %f1387, 0f3F800000; + selp.f32 %f1390, %f1389, %f1387, %p323; + selp.f32 %f1391, %f1388, %f1385, %p323; + add.f32 %f1392, %f1391, 0fBF800000; + add.f32 %f1393, %f1391, 0f3F800000; + rcp.approx.ftz.f32 %f1394, %f1393; + add.f32 %f1395, %f1392, %f1392; + mul.f32 %f1397, %f1395, %f1394; + mul.f32 %f1398, %f1397, %f1397; + fma.rn.f32 %f1401, %f2919, %f1398, %f2920; + fma.rn.f32 %f1403, %f1401, %f1398, %f2921; + mul.rn.f32 %f1404, %f1403, %f1398; + mul.rn.f32 %f1405, %f1404, %f1397; + sub.f32 %f1406, %f1392, %f1397; + add.f32 %f1407, %f1406, %f1406; + neg.f32 %f1408, %f1397; + fma.rn.f32 %f1409, %f1408, %f1392, %f1407; + mul.rn.f32 %f1410, %f1394, %f1409; + add.f32 %f1411, %f1405, %f1397; + sub.f32 %f1412, %f1397, %f1411; + add.f32 %f1413, %f1405, %f1412; + add.f32 %f1414, %f1410, %f1413; add.f32 %f1415, %f1411, %f1414; - add.f32 %f1416, %f135, %f1415; - add.f32 %f1417, %f1412, %f1416; - sub.f32 %f1418, %f1412, %f1417; - add.f32 %f1419, %f1416, %f1418; - mul.rn.f32 %f1421, %f895, %f1417; - neg.f32 %f1422, %f1421; - fma.rn.f32 %f1423, %f895, %f1417, %f1422; - fma.rn.f32 %f1424, %f895, %f1419, %f1423; - fma.rn.f32 %f1426, %f2992, %f1417, %f1424; - add.rn.f32 %f1427, %f1421, %f1426; - neg.f32 %f1428, %f1427; - add.rn.f32 %f1429, %f1421, %f1428; - add.rn.f32 %f1430, %f1429, %f1426; - mov.b32 %r178, %f1427; - setp.eq.s32 %p130, %r178, 1118925336; - add.s32 %r179, %r178, -1; - mov.b32 %f1431, %r179; - add.f32 %f1432, %f1430, 0f37000000; - selp.f32 %f1433, %f1431, %f1427, %p130; - selp.f32 %f241, %f1432, %f1430, %p130; - mul.f32 %f1434, %f1433, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1435, %f1434; - fma.rn.f32 %f1436, %f1435, %f2987, %f1433; - fma.rn.f32 %f1437, %f1435, %f2988, %f1436; - mul.f32 %f1438, %f1437, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1439, %f1438; - add.f32 %f1440, %f1435, 0f00000000; - ex2.approx.f32 %f1441, %f1440; - mul.f32 %f1442, %f1439, %f1441; - setp.lt.f32 %p131, %f1433, 0fC2D20000; - selp.f32 %f1443, 0f00000000, %f1442, %p131; - setp.gt.f32 %p132, %f1433, 0f42D20000; - selp.f32 %f3292, 0f7F800000, %f1443, %p132; - setp.eq.f32 %p133, %f3292, 0f7F800000; - @%p133 bra BB3_86; - - fma.rn.f32 %f3292, %f3292, %f241, %f3292; - -BB3_86: - setp.eq.f32 %p336, %f128, 0f00000000; - setp.geu.f32 %p335, %f128, 0f00000000; - mov.b32 %r180, %f3292; - xor.b32 %r181, %r180, -2147483648; - mov.b32 %f1444, %r181; - selp.f32 %f245, %f1444, %f3292, %p1; - selp.f32 %f3293, %f136, %f245, %p336; - @%p335 bra BB3_88; - - cvt.rzi.f32.f32 %f1446, %f895; - setp.neu.f32 %p135, %f1446, 0f40000000; - selp.f32 %f3293, 0f7FFFFFFF, %f245, %p135; - -BB3_88: - abs.f32 %f3005, %f128; - setp.eq.f32 %p340, %f128, 0f3F800000; - add.f32 %f3004, %f3005, 0f40000000; - mov.b32 %r287, %f3004; - setp.gt.s32 %p339, %r287, 2139095039; - setp.neu.f32 %p338, %f3005, 0f7F800000; - selp.f32 %f3003, 0fFF800000, 0f7F800000, %p1; - setp.gtu.f32 %p337, %f3005, 0f7F800000; - add.f32 %f3002, %f128, 0f40000000; - mov.f32 %f3001, 0f00000000; - mov.f32 %f3000, 0f3DAAAABD; - mov.f32 %f2999, 0f3C4CAF63; - mov.f32 %f2998, 0f3B18F0FE; - mov.f32 %f2997, 0fB5BFBE8E; - mov.f32 %f2996, 0fBF317200; - selp.f32 %f1450, %f3002, %f3293, %p337; - selp.f32 %f1452, %f1450, %f3003, %p338; - selp.f32 %f1453, %f1452, %f3293, %p339; - mul.f32 %f1454, %f1453, 0fBF000000; - selp.f32 %f1455, 0fBF000000, %f1454, %p340; - mul.f32 %f1456, %f1455, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1457, %f1456; - fma.rn.f32 %f1459, %f1457, %f2996, %f1455; - fma.rn.f32 %f1461, %f1457, %f2997, %f1459; - mul.f32 %f1462, %f1461, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1463, %f1462; - add.f32 %f1464, %f1457, 0f00000000; - ex2.approx.f32 %f1465, %f1464; - mul.f32 %f1466, %f1463, %f1465; - setp.lt.f32 %p140, %f1455, 0fC2D20000; - selp.f32 %f1467, 0f00000000, %f1466, %p140; - setp.gt.f32 %p141, %f1455, 0f42D20000; - selp.f32 %f249, 0f7F800000, %f1467, %p141; - // inline asm - rcp.approx.ftz.f32 %f1447,%f140; - // inline asm - mul.f32 %f1468, %f1447, %f141; - mul.f32 %f1469, %f1468, %f1468; - fma.rn.f32 %f1472, %f2998, %f1469, %f2999; - fma.rn.f32 %f1474, %f1472, %f1469, %f3000; - mul.rn.f32 %f1475, %f1474, %f1469; - mul.rn.f32 %f1476, %f1475, %f1468; - sub.f32 %f1477, %f139, %f1468; - neg.f32 %f1478, %f1468; - add.f32 %f1479, %f1477, %f1477; - fma.rn.f32 %f1480, %f1478, %f139, %f1479; - mul.rn.f32 %f1481, %f1447, %f1480; - add.f32 %f1482, %f1476, %f1468; - sub.f32 %f1483, %f1468, %f1482; - add.f32 %f1484, %f1476, %f1483; - add.f32 %f1485, %f1481, %f1484; - add.f32 %f1486, %f1482, %f1485; - sub.f32 %f1487, %f1482, %f1486; - add.f32 %f1488, %f1485, %f1487; - add.f32 %f1489, %f142, %f1486; - sub.f32 %f1490, %f142, %f1489; - add.f32 %f1491, %f1486, %f1490; - add.f32 %f1492, %f1488, %f1491; - add.f32 %f1493, %f143, %f1492; - add.f32 %f1494, %f1489, %f1493; - sub.f32 %f1495, %f1489, %f1494; - add.f32 %f1496, %f1493, %f1495; - mul.rn.f32 %f1498, %f895, %f1494; - neg.f32 %f1499, %f1498; - fma.rn.f32 %f1500, %f895, %f1494, %f1499; - fma.rn.f32 %f1501, %f895, %f1496, %f1500; - fma.rn.f32 %f1503, %f3001, %f1494, %f1501; - add.rn.f32 %f1504, %f1498, %f1503; - neg.f32 %f1505, %f1504; - add.rn.f32 %f1506, %f1498, %f1505; - add.rn.f32 %f1507, %f1506, %f1503; - mov.b32 %r182, %f1504; - setp.eq.s32 %p142, %r182, 1118925336; - add.s32 %r183, %r182, -1; - mov.b32 %f1508, %r183; - add.f32 %f1509, %f1507, 0f37000000; - selp.f32 %f1510, %f1508, %f1504, %p142; - selp.f32 %f250, %f1509, %f1507, %p142; - mul.f32 %f1511, %f1510, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1512, %f1511; - fma.rn.f32 %f1513, %f1512, %f2996, %f1510; - fma.rn.f32 %f1514, %f1512, %f2997, %f1513; - mul.f32 %f1515, %f1514, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1516, %f1515; - add.f32 %f1517, %f1512, 0f00000000; - ex2.approx.f32 %f1518, %f1517; - mul.f32 %f1519, %f1516, %f1518; - setp.lt.f32 %p143, %f1510, 0fC2D20000; - selp.f32 %f1520, 0f00000000, %f1519, %p143; - setp.gt.f32 %p144, %f1510, 0f42D20000; - selp.f32 %f3294, 0f7F800000, %f1520, %p144; - setp.eq.f32 %p145, %f3294, 0f7F800000; - @%p145 bra BB3_90; - - fma.rn.f32 %f3294, %f3294, %f250, %f3294; - -BB3_90: - setp.eq.f32 %p342, %f137, 0f00000000; - setp.geu.f32 %p341, %f137, 0f00000000; - mov.b32 %r184, %f3294; - xor.b32 %r185, %r184, -2147483648; - mov.b32 %f1521, %r185; - selp.f32 %f254, %f1521, %f3294, %p2; - selp.f32 %f3295, %f144, %f254, %p342; - @%p341 bra BB3_92; - - cvt.rzi.f32.f32 %f1523, %f895; - setp.neu.f32 %p147, %f1523, 0f40000000; - selp.f32 %f3295, 0f7FFFFFFF, %f254, %p147; - -BB3_92: - abs.f32 %f3020, %f137; - cvt.rn.f32.s32 %f3019, %r321; - sub.f32 %f3018, %f3019, %f3321; - mul.f32 %f3017, %f3018, %f3018; - mul.f32 %f3016, %f3018, %f3017; - add.f32 %f3015, %f3018, 0f3F800000; - setp.eq.f32 %p346, %f137, 0f3F800000; - add.f32 %f3014, %f3020, 0f40000000; - mov.b32 %r288, %f3014; - setp.gt.s32 %p345, %r288, 2139095039; - setp.neu.f32 %p344, %f3020, 0f7F800000; - selp.f32 %f3013, 0fFF800000, 0f7F800000, %p2; - setp.gtu.f32 %p343, %f3020, 0f7F800000; - add.f32 %f3012, %f137, 0f40000000; - mov.f32 %f3011, 0f00000000; - mov.f32 %f3010, 0f3DAAAABD; - mov.f32 %f3009, 0f3C4CAF63; - mov.f32 %f3008, 0f3B18F0FE; - mov.f32 %f3007, 0fB5BFBE8E; - mov.f32 %f3006, 0fBF317200; - selp.f32 %f1527, %f3012, %f3295, %p343; - selp.f32 %f1529, %f1527, %f3013, %p344; - selp.f32 %f1530, %f1529, %f3295, %p345; - mul.f32 %f1531, %f1530, 0fBF000000; - selp.f32 %f1532, 0fBF000000, %f1531, %p346; - mul.f32 %f1533, %f1532, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1534, %f1533; - fma.rn.f32 %f1536, %f1534, %f3006, %f1532; - fma.rn.f32 %f1538, %f1534, %f3007, %f1536; - mul.f32 %f1539, %f1538, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1540, %f1539; - add.f32 %f1541, %f1534, 0f00000000; - ex2.approx.f32 %f1542, %f1541; - mul.f32 %f1543, %f1540, %f1542; - setp.lt.f32 %p152, %f1532, 0fC2D20000; - selp.f32 %f1544, 0f00000000, %f1543, %p152; - setp.gt.f32 %p153, %f1532, 0f42D20000; - selp.f32 %f1545, 0f7F800000, %f1544, %p153; - mul.f32 %f1546, %f3018, %f1545; - mul.f32 %f1547, %f3015, %f249; - sub.f32 %f1548, %f1547, %f1546; - mul.f32 %f1549, %f98, %f1548; - mul.f32 %f258, %f179, %f1549; - mul.f32 %f1550, %f99, %f258; - mul.f32 %f1551, %f1545, %f3016; - mul.f32 %f1552, %f249, %f145; - sub.f32 %f1553, %f1552, %f1551; - mul.f32 %f1554, %f104, %f1553; - mul.f32 %f1555, %f179, %f1554; - sub.f32 %f259, %f1550, %f1555; - // inline asm - rcp.approx.ftz.f32 %f1524,%f1184; - // inline asm - mul.f32 %f1556, %f1524, %f206; - mul.f32 %f1557, %f1556, %f1556; - fma.rn.f32 %f1560, %f3008, %f1557, %f3009; - fma.rn.f32 %f1562, %f1560, %f1557, %f3010; - mul.rn.f32 %f1563, %f1562, %f1557; - mul.rn.f32 %f1564, %f1563, %f1556; - sub.f32 %f1565, %f204, %f1556; - neg.f32 %f1566, %f1556; - add.f32 %f1567, %f1565, %f1565; - fma.rn.f32 %f1568, %f1566, %f204, %f1567; - mul.rn.f32 %f1569, %f1524, %f1568; - add.f32 %f1570, %f1564, %f1556; - sub.f32 %f1571, %f1556, %f1570; - add.f32 %f1572, %f1564, %f1571; - add.f32 %f1573, %f1569, %f1572; - add.f32 %f1574, %f1570, %f1573; - sub.f32 %f1575, %f1570, %f1574; - add.f32 %f1576, %f1573, %f1575; - add.f32 %f1577, %f207, %f1574; - sub.f32 %f1578, %f207, %f1577; - add.f32 %f1579, %f1574, %f1578; - add.f32 %f1580, %f1576, %f1579; - add.f32 %f1581, %f208, %f1580; - add.f32 %f1582, %f1577, %f1581; - sub.f32 %f1583, %f1577, %f1582; - add.f32 %f1584, %f1581, %f1583; - mul.rn.f32 %f1586, %f895, %f1582; - neg.f32 %f1587, %f1586; - fma.rn.f32 %f1588, %f895, %f1582, %f1587; - fma.rn.f32 %f1589, %f895, %f1584, %f1588; - fma.rn.f32 %f1591, %f3011, %f1582, %f1589; - add.rn.f32 %f1592, %f1586, %f1591; - neg.f32 %f1593, %f1592; - add.rn.f32 %f1594, %f1586, %f1593; - add.rn.f32 %f1595, %f1594, %f1591; - mov.b32 %r186, %f1592; - setp.eq.s32 %p154, %r186, 1118925336; - add.s32 %r187, %r186, -1; - mov.b32 %f1596, %r187; - add.f32 %f1597, %f1595, 0f37000000; - selp.f32 %f1598, %f1596, %f1592, %p154; - selp.f32 %f260, %f1597, %f1595, %p154; - mul.f32 %f1599, %f1598, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1600, %f1599; - fma.rn.f32 %f1601, %f1600, %f3006, %f1598; - fma.rn.f32 %f1602, %f1600, %f3007, %f1601; - mul.f32 %f1603, %f1602, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1604, %f1603; - add.f32 %f1605, %f1600, 0f00000000; - ex2.approx.f32 %f1606, %f1605; - mul.f32 %f1607, %f1604, %f1606; - setp.lt.f32 %p155, %f1598, 0fC2D20000; - selp.f32 %f1608, 0f00000000, %f1607, %p155; - setp.gt.f32 %p156, %f1598, 0f42D20000; - selp.f32 %f3296, 0f7F800000, %f1608, %p156; - setp.eq.f32 %p157, %f3296, 0f7F800000; - @%p157 bra BB3_94; - - fma.rn.f32 %f3296, %f3296, %f260, %f3296; - -BB3_94: - setp.eq.f32 %p347, %f202, 0f00000000; - mov.b32 %r188, %f3296; - xor.b32 %r189, %r188, -2147483648; - mov.b32 %f1609, %r189; - selp.f32 %f3298, %f1609, %f3296, %p5; - @%p347 bra BB3_97; - bra.uni BB3_95; - -BB3_97: - add.f32 %f1612, %f202, %f202; - selp.f32 %f3298, %f1612, 0f00000000, %p55; - bra.uni BB3_98; - -BB3_95: - setp.geu.f32 %p159, %f202, 0f00000000; - @%p159 bra BB3_98; - - cvt.rzi.f32.f32 %f1611, %f895; - setp.neu.f32 %p160, %f1611, 0f40000000; - selp.f32 %f3298, 0f7FFFFFFF, %f3298, %p160; - -BB3_98: - abs.f32 %f3022, %f202; - add.f32 %f3021, %f3022, 0f40000000; - mov.b32 %r289, %f3021; - setp.lt.s32 %p348, %r289, 2139095040; - @%p348 bra BB3_103; - - abs.f32 %f3080, %f202; - setp.gtu.f32 %p163, %f3080, 0f7F800000; - @%p163 bra BB3_102; - bra.uni BB3_100; - -BB3_102: - add.f32 %f3298, %f202, 0f40000000; - bra.uni BB3_103; - -BB3_100: - abs.f32 %f3081, %f202; - setp.neu.f32 %p164, %f3081, 0f7F800000; - @%p164 bra BB3_103; - - selp.f32 %f3298, 0fFF800000, 0f7F800000, %p5; - -BB3_103: - setp.eq.f32 %p349, %f202, 0f3F800000; - mov.f32 %f3028, 0f00000000; - mov.f32 %f3027, 0f3DAAAABD; - mov.f32 %f3026, 0f3C4CAF63; - mov.f32 %f3025, 0f3B18F0FE; - mov.f32 %f3024, 0fB5BFBE8E; - mov.f32 %f3023, 0fBF317200; - mul.f32 %f1615, %f3298, 0fBF000000; - selp.f32 %f1616, 0fBF000000, %f1615, %p349; - mul.f32 %f1617, %f1616, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1618, %f1617; - fma.rn.f32 %f1620, %f1618, %f3023, %f1616; - fma.rn.f32 %f1622, %f1618, %f3024, %f1620; - mul.f32 %f1623, %f1622, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1624, %f1623; - add.f32 %f1625, %f1618, 0f00000000; - ex2.approx.f32 %f1626, %f1625; - mul.f32 %f1627, %f1624, %f1626; - setp.lt.f32 %p166, %f1616, 0fC2D20000; - selp.f32 %f1628, 0f00000000, %f1627, %p166; - setp.gt.f32 %p167, %f1616, 0f42D20000; - selp.f32 %f271, 0f7F800000, %f1628, %p167; - // inline asm - rcp.approx.ftz.f32 %f1613,%f1283; - // inline asm - mul.f32 %f1629, %f1613, %f225; - mul.f32 %f1630, %f1629, %f1629; - fma.rn.f32 %f1633, %f3025, %f1630, %f3026; - fma.rn.f32 %f1635, %f1633, %f1630, %f3027; - mul.rn.f32 %f1636, %f1635, %f1630; - mul.rn.f32 %f1637, %f1636, %f1629; - sub.f32 %f1638, %f223, %f1629; - neg.f32 %f1639, %f1629; - add.f32 %f1640, %f1638, %f1638; - fma.rn.f32 %f1641, %f1639, %f223, %f1640; - mul.rn.f32 %f1642, %f1613, %f1641; - add.f32 %f1643, %f1637, %f1629; - sub.f32 %f1644, %f1629, %f1643; - add.f32 %f1645, %f1637, %f1644; - add.f32 %f1646, %f1642, %f1645; - add.f32 %f1647, %f1643, %f1646; - sub.f32 %f1648, %f1643, %f1647; - add.f32 %f1649, %f1646, %f1648; - add.f32 %f1650, %f226, %f1647; - sub.f32 %f1651, %f226, %f1650; - add.f32 %f1652, %f1647, %f1651; - add.f32 %f1653, %f1649, %f1652; - add.f32 %f1654, %f227, %f1653; - add.f32 %f1655, %f1650, %f1654; - sub.f32 %f1656, %f1650, %f1655; - add.f32 %f1657, %f1654, %f1656; - mul.rn.f32 %f1659, %f895, %f1655; - neg.f32 %f1660, %f1659; - fma.rn.f32 %f1661, %f895, %f1655, %f1660; - fma.rn.f32 %f1662, %f895, %f1657, %f1661; - fma.rn.f32 %f1664, %f3028, %f1655, %f1662; - add.rn.f32 %f1665, %f1659, %f1664; - neg.f32 %f1666, %f1665; - add.rn.f32 %f1667, %f1659, %f1666; - add.rn.f32 %f1668, %f1667, %f1664; - mov.b32 %r190, %f1665; - setp.eq.s32 %p168, %r190, 1118925336; - add.s32 %r191, %r190, -1; - mov.b32 %f1669, %r191; - add.f32 %f1670, %f1668, 0f37000000; - selp.f32 %f1671, %f1669, %f1665, %p168; - selp.f32 %f272, %f1670, %f1668, %p168; - mul.f32 %f1672, %f1671, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1673, %f1672; - fma.rn.f32 %f1674, %f1673, %f3023, %f1671; - fma.rn.f32 %f1675, %f1673, %f3024, %f1674; - mul.f32 %f1676, %f1675, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1677, %f1676; - add.f32 %f1678, %f1673, 0f00000000; - ex2.approx.f32 %f1679, %f1678; - mul.f32 %f1680, %f1677, %f1679; - setp.lt.f32 %p169, %f1671, 0fC2D20000; - selp.f32 %f1681, 0f00000000, %f1680, %p169; - setp.gt.f32 %p170, %f1671, 0f42D20000; - selp.f32 %f3299, 0f7F800000, %f1681, %p170; - setp.eq.f32 %p171, %f3299, 0f7F800000; - @%p171 bra BB3_105; - - fma.rn.f32 %f3299, %f3299, %f272, %f3299; - -BB3_105: - setp.eq.f32 %p352, %f221, 0f00000000; - mov.b32 %r192, %f3299; - xor.b32 %r193, %r192, -2147483648; - mov.b32 %f1682, %r193; - selp.f32 %f3301, %f1682, %f3299, %p6; - @%p352 bra BB3_108; - bra.uni BB3_106; - -BB3_108: - add.f32 %f1685, %f221, %f221; - selp.f32 %f3301, %f1685, 0f00000000, %p55; - bra.uni BB3_109; - -BB3_106: - setp.geu.f32 %p173, %f221, 0f00000000; - @%p173 bra BB3_109; - - cvt.rzi.f32.f32 %f1684, %f895; - setp.neu.f32 %p174, %f1684, 0f40000000; - selp.f32 %f3301, 0f7FFFFFFF, %f3301, %p174; - -BB3_109: - abs.f32 %f3094, %f221; - add.f32 %f3093, %f3094, 0f40000000; - mov.b32 %r298, %f3093; - setp.lt.s32 %p353, %r298, 2139095040; - @%p353 bra BB3_114; - - abs.f32 %f3078, %f221; - setp.gtu.f32 %p177, %f3078, 0f7F800000; - @%p177 bra BB3_113; - bra.uni BB3_111; - -BB3_113: - add.f32 %f3301, %f221, 0f40000000; - bra.uni BB3_114; - -BB3_111: - abs.f32 %f3079, %f221; - setp.neu.f32 %p178, %f3079, 0f7F800000; - @%p178 bra BB3_114; - - selp.f32 %f3301, 0fFF800000, 0f7F800000, %p6; - -BB3_114: - setp.eq.f32 %p354, %f221, 0f3F800000; - cvt.rn.f32.s32 %f3034, %r322; - sub.f32 %f3033, %f3034, %f3320; - add.f32 %f3032, %f3033, 0f3F800000; - mov.f32 %f3302, 0f00000000; - mov.f32 %f3030, 0fB5BFBE8E; - mov.f32 %f3029, 0fBF317200; - mul.f32 %f1687, %f3301, 0fBF000000; - selp.f32 %f1688, 0fBF000000, %f1687, %p354; - mul.f32 %f1689, %f1688, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1690, %f1689; - fma.rn.f32 %f1692, %f1690, %f3029, %f1688; - fma.rn.f32 %f1694, %f1690, %f3030, %f1692; - mul.f32 %f1695, %f1694, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1696, %f1695; - add.f32 %f1697, %f1690, 0f00000000; - ex2.approx.f32 %f1698, %f1697; - mul.f32 %f1699, %f1696, %f1698; - setp.lt.f32 %p180, %f1688, 0fC2D20000; - selp.f32 %f1700, 0f00000000, %f1699, %p180; - setp.gt.f32 %p181, %f1688, 0f42D20000; - selp.f32 %f1701, 0f7F800000, %f1700, %p181; - mul.f32 %f1702, %f3033, %f1701; - mul.f32 %f1703, %f3032, %f271; - sub.f32 %f1704, %f1703, %f1702; - mul.f32 %f1705, %f100, %f1704; - mul.f32 %f283, %f165, %f1705; - mul.f32 %f1706, %f101, %f283; - mul.f32 %f1707, %f3032, %f3032; - mul.f32 %f1708, %f3032, %f1707; - mul.f32 %f1709, %f3033, %f3033; - mul.f32 %f1710, %f3033, %f1709; - mul.f32 %f1711, %f271, %f1708; - mul.f32 %f1712, %f1701, %f1710; - sub.f32 %f1713, %f1711, %f1712; - mul.f32 %f1714, %f105, %f1713; - mul.f32 %f1715, %f165, %f1714; - sub.f32 %f284, %f1706, %f1715; - mul.f32 %f285, %f165, %f179; - setp.leu.f32 %p182, %f180, 0f3C23D70A; - @%p182 bra BB3_116; - - div.rn.f32 %f1716, %f181, %f180; - add.f32 %f3302, %f1716, 0fBF800000; - -BB3_116: - mov.f32 %f3303, 0f00000000; - @%p182 bra BB3_118; - - mul.f32 %f1718, %f180, %f180; - div.rn.f32 %f3303, %f181, %f1718; - -BB3_118: - mov.f32 %f1719, 0f47C35000; - min.f32 %f1720, %f3302, %f1719; - fma.rn.f32 %f3271, %f1720, %f199, %f3271; - mul.f32 %f1721, %f1720, %f200; - mul.f32 %f1722, %f199, %f199; - min.f32 %f1723, %f3303, %f1719; - mul.f32 %f1724, %f1723, %f1722; - sub.f32 %f1725, %f1721, %f1724; - add.f32 %f3277, %f1725, %f3277; - fma.rn.f32 %f3270, %f1720, %f239, %f3270; - mul.f32 %f1726, %f1720, %f240; - mul.f32 %f1727, %f239, %f239; - mul.f32 %f1728, %f1723, %f1727; - sub.f32 %f1729, %f1726, %f1728; - add.f32 %f3276, %f1729, %f3276; - fma.rn.f32 %f3269, %f1720, %f285, %f3269; - mul.f32 %f1730, %f1720, 0f00000000; - mul.f32 %f1731, %f285, %f285; - mul.f32 %f1732, %f1723, %f1731; - sub.f32 %f1733, %f1730, %f1732; - add.f32 %f3275, %f1733, %f3275; - add.f32 %f3268, %f3268, %f1720; - sub.f32 %f1734, %f1730, %f1723; - add.f32 %f3274, %f1734, %f3274; - fma.rn.f32 %f3267, %f1720, %f258, %f3267; - mul.f32 %f1735, %f1720, %f259; - mul.f32 %f1736, %f258, %f258; - mul.f32 %f1737, %f1723, %f1736; - sub.f32 %f1738, %f1735, %f1737; - add.f32 %f3273, %f1738, %f3273; - fma.rn.f32 %f3266, %f1720, %f283, %f3266; - mul.f32 %f1739, %f1720, %f284; - mul.f32 %f1740, %f283, %f283; - mul.f32 %f1741, %f1723, %f1740; - sub.f32 %f1742, %f1739, %f1741; - add.f32 %f3272, %f1742, %f3272; - add.s32 %r322, %r322, 1; - setp.lt.s32 %p184, %r322, %r71; - @%p184 bra BB3_42; - - add.s32 %r321, %r321, 1; - setp.lt.s32 %p185, %r321, %r71; - @%p185 bra BB3_41; - -BB3_120: - div.rn.f32 %f1743, %f3271, %f3277; - mov.f32 %f1744, 0fBF800000; - max.f32 %f1745, %f1743, %f1744; - mov.f32 %f1746, 0f3F800000; - min.f32 %f1747, %f1745, %f1746; - sub.f32 %f3321, %f3321, %f1747; - div.rn.f32 %f1748, %f3270, %f3276; - max.f32 %f1749, %f1748, %f1744; - min.f32 %f1750, %f1749, %f1746; - sub.f32 %f3320, %f3320, %f1750; - neg.f32 %f1751, %f3319; - div.rn.f32 %f1752, %f3269, %f3275; - max.f32 %f1753, %f1752, %f1751; - min.f32 %f1754, %f1753, %f3319; - sub.f32 %f1755, %f3319, %f1754; - neg.f32 %f1756, %f3226; - div.rn.f32 %f1757, %f3268, %f3274; - max.f32 %f1758, %f1757, %f1756; - min.f32 %f1759, %f1758, %f3226; - sub.f32 %f1760, %f3226, %f1759; - neg.f32 %f1761, %f3317; - div.rn.f32 %f1762, %f3267, %f3273; - max.f32 %f1763, %f1762, %f1761; - min.f32 %f1764, %f1763, %f3317; - sub.f32 %f1765, %f3317, %f1764; - neg.f32 %f1766, %f3316; - div.rn.f32 %f1767, %f3266, %f3272; - max.f32 %f1768, %f1767, %f1766; - min.f32 %f1769, %f1768, %f3316; - sub.f32 %f1770, %f3316, %f1769; - max.f32 %f3319, %f1755, %f1746; - mov.f32 %f1771, 0f3C23D70A; - max.f32 %f3226, %f1760, %f1771; - max.f32 %f1773, %f1765, %f648; - min.f32 %f3317, %f1773, %f87; - max.f32 %f1774, %f1770, %f648; - min.f32 %f3316, %f1774, %f87; - add.s32 %r320, %r320, 1; - setp.lt.s32 %p186, %r320, %r72; - @%p186 bra BB3_39; - bra.uni BB3_121; - -BB3_37: - mov.f32 %f3317, %f3316; - -BB3_121: - mov.f32 %f3371, 0f00000000; - @%p15 bra BB3_206; - - div.rn.f32 %f1778, %f648, %f3317; - div.rn.f32 %f326, %f1778, %f3317; - div.rn.f32 %f1779, %f648, %f3316; - div.rn.f32 %f327, %f1779, %f3316; - div.rn.f32 %f1780, %f3319, 0fC0206C98; - div.rn.f32 %f328, %f1780, %f3317; - div.rn.f32 %f329, %f1780, %f3316; - div.rn.f32 %f330, %f328, %f3317; - div.rn.f32 %f331, %f329, %f3316; - mov.u32 %r194, 0; - mov.f32 %f3371, 0f00000000; - sqrt.rn.f32 %f1782, %f326; - sqrt.rn.f32 %f341, %f327; - mov.u32 %r323, %r194; - -BB3_123: - cvt.rn.f32.s32 %f1781, %r323; - sub.f32 %f333, %f1781, %f3321; - add.f32 %f334, %f333, 0f3F800000; - mul.f32 %f335, %f334, %f1782; - abs.f32 %f336, %f335; - mul.f32 %f337, %f335, %f335; - mul.f32 %f338, %f333, %f1782; - abs.f32 %f339, %f338; - add.f32 %f1783, %f1781, 0f3F800000; - sub.f32 %f1784, %f1783, %f3321; - div.rn.f32 %f342, %f1784, %f3317; - mov.f32 %f1785, 0f3F800000; - cvt.rzi.f32.f32 %f1786, %f1785; - add.f32 %f1787, %f1786, %f1786; - mov.f32 %f1788, 0f40000000; - sub.f32 %f1789, %f1788, %f1787; - abs.f32 %f343, %f1789; - setp.eq.f32 %p188, %f343, 0f3F800000; - abs.f32 %f344, %f342; - setp.lt.f32 %p189, %f344, 0f00800000; - mul.f32 %f1790, %f344, 0f4B800000; - selp.f32 %f1791, 0fC3170000, 0fC2FE0000, %p189; - selp.f32 %f1792, %f1790, %f344, %p189; - mov.b32 %r196, %f1792; - and.b32 %r197, %r196, 8388607; - or.b32 %r198, %r197, 1065353216; - mov.b32 %f1793, %r198; - shr.u32 %r199, %r196, 23; - cvt.rn.f32.u32 %f1794, %r199; - add.f32 %f1795, %f1791, %f1794; - setp.gt.f32 %p190, %f1793, 0f3FB504F3; - mul.f32 %f1796, %f1793, 0f3F000000; - add.f32 %f1797, %f1795, 0f3F800000; - selp.f32 %f1798, %f1796, %f1793, %p190; - selp.f32 %f1799, %f1797, %f1795, %p190; - add.f32 %f345, %f1798, 0fBF800000; - add.f32 %f346, %f1798, 0f3F800000; - add.f32 %f347, %f345, %f345; - mov.f32 %f1800, 0f3F317200; - mul.rn.f32 %f348, %f1799, %f1800; - mov.f32 %f1801, 0f35BFBE8E; - mul.rn.f32 %f349, %f1799, %f1801; - setp.lt.f32 %p191, %f342, 0f00000000; - and.pred %p7, %p191, %p188; - add.f32 %f1802, %f342, %f342; - selp.f32 %f350, %f1802, 0f00000000, %p188; - div.rn.f32 %f353, %f333, %f3317; - abs.f32 %f354, %f353; - setp.lt.f32 %p192, %f354, 0f00800000; - mul.f32 %f1804, %f354, 0f4B800000; - selp.f32 %f1805, 0fC3170000, 0fC2FE0000, %p192; - selp.f32 %f1806, %f1804, %f354, %p192; - mov.b32 %r200, %f1806; - and.b32 %r201, %r200, 8388607; - or.b32 %r202, %r201, 1065353216; - mov.b32 %f1807, %r202; - shr.u32 %r203, %r200, 23; - cvt.rn.f32.u32 %f1808, %r203; - add.f32 %f1809, %f1805, %f1808; - setp.gt.f32 %p193, %f1807, 0f3FB504F3; - mul.f32 %f1810, %f1807, 0f3F000000; - add.f32 %f1811, %f1809, 0f3F800000; - selp.f32 %f1812, %f1810, %f1807, %p193; - selp.f32 %f1813, %f1811, %f1809, %p193; - add.f32 %f355, %f1812, 0fBF800000; - add.f32 %f356, %f1812, 0f3F800000; - add.f32 %f357, %f355, %f355; - mul.rn.f32 %f358, %f1813, %f1800; - mul.rn.f32 %f359, %f1813, %f1801; - setp.lt.f32 %p194, %f353, 0f00000000; - and.pred %p8, %p194, %p188; - add.f32 %f1814, %f353, %f353; - selp.f32 %f360, %f1814, 0f00000000, %p188; - mov.b32 %r205, %f335; - and.b32 %r45, %r205, -2147483648; - ld.local.v4.f32 {%f3343, %f3342, %f3341, %f3340}, [%rd2]; - ld.local.v4.f32 {%f3339, %f3338, %f1822, %f3337}, [%rd2+16]; - ld.local.v4.f32 {%f3336, %f3335, %f3334, %f3333}, [%rd2+32]; - ld.local.v2.f32 {%f3332, %f3331}, [%rd2+56]; - ld.local.v2.f32 {%f3330, %f3329}, [%rd2+64]; - ld.local.f32 %f3328, [%rd2+84]; - ld.local.v2.f32 {%f3327, %f3326}, [%rd2+88]; - ld.local.v2.f32 {%f3325, %f3324}, [%rd2+112]; - ld.local.f32 %f3323, [%rd2+140]; - mov.u32 %r324, %r194; - -BB3_124: - setp.ltu.f32 %p195, %f336, 0f3F800000; - @%p195 bra BB3_126; - bra.uni BB3_125; - -BB3_126: - cvt.rn.f32.s32 %f3191, %r323; - sub.f32 %f3190, %f3191, %f3321; - add.f32 %f3189, %f3190, 0f3F800000; - mul.f32 %f3188, %f3189, %f1782; - mov.f32 %f1854, 0f3BA0C9F8; - mov.f32 %f1855, 0fBA1268FB; - fma.rn.f32 %f1856, %f1855, %f337, %f1854; - mov.f32 %f1857, 0fBCDABFD4; - fma.rn.f32 %f1858, %f1856, %f337, %f1857; - mov.f32 %f1859, 0f3DE70331; - fma.rn.f32 %f1860, %f1858, %f337, %f1859; - mov.f32 %f1861, 0fBEC09330; - fma.rn.f32 %f1862, %f1860, %f337, %f1861; - mov.f32 %f1863, 0f3F906EBA; - fma.rn.f32 %f1864, %f1862, %f337, %f1863; - mul.f32 %f3345, %f3188, %f1864; - bra.uni BB3_127; - -BB3_125: - mov.f32 %f3139, 0f3F800000; - setp.ltu.f32 %p196, %f336, 0f407AD445; - mov.f32 %f1836, 0f3A03BB71; - mov.f32 %f1837, 0fB7B730FB; - fma.rn.f32 %f1838, %f1837, %f336, %f1836; - mov.f32 %f1839, 0fBBACA3B3; - fma.rn.f32 %f1840, %f1838, %f336, %f1839; - mov.f32 %f1841, 0f3D0A7445; - fma.rn.f32 %f1842, %f1840, %f336, %f1841; - mov.f32 %f1843, 0fBE1B3B75; - fma.rn.f32 %f1844, %f1842, %f336, %f1843; - mov.f32 %f1845, 0fBF6B385A; - fma.rn.f32 %f1846, %f1844, %f336, %f1845; - mov.f32 %f1847, 0fBFD0316E; - fma.rn.f32 %f1848, %f1846, %f336, %f1847; - mov.f32 %f1849, 0fBA031CCE; - fma.rn.f32 %f1850, %f1848, %f336, %f1849; - ex2.approx.ftz.f32 %f1851, %f1850; - sub.f32 %f1853, %f3139, %f1851; - mov.b32 %r206, %f1853; - selp.b32 %r207, %r206, 1065353216, %p196; - or.b32 %r208, %r207, %r45; - mov.b32 %f3345, %r208; - -BB3_127: - setp.ltu.f32 %p197, %f339, 0f3F800000; - @%p197 bra BB3_129; - bra.uni BB3_128; - -BB3_129: - cvt.rn.f32.s32 %f3186, %r323; - sub.f32 %f3185, %f3186, %f3321; - mul.f32 %f3184, %f3185, %f1782; - mul.f32 %f3183, %f3184, %f3184; - mov.f32 %f1883, 0f3BA0C9F8; - mov.f32 %f1884, 0fBA1268FB; - fma.rn.f32 %f1885, %f1884, %f3183, %f1883; - mov.f32 %f1886, 0fBCDABFD4; - fma.rn.f32 %f1887, %f1885, %f3183, %f1886; - mov.f32 %f1888, 0f3DE70331; - fma.rn.f32 %f1889, %f1887, %f3183, %f1888; - mov.f32 %f1890, 0fBEC09330; - fma.rn.f32 %f1891, %f1889, %f3183, %f1890; - mov.f32 %f1892, 0f3F906EBA; - fma.rn.f32 %f1893, %f1891, %f3183, %f1892; - mul.f32 %f3346, %f3184, %f1893; - bra.uni BB3_130; - -BB3_128: - cvt.rn.f32.s32 %f3143, %r323; - sub.f32 %f3142, %f3143, %f3321; - mul.f32 %f3141, %f3142, %f1782; - mov.b32 %r303, %f3141; - and.b32 %r302, %r303, -2147483648; - mov.f32 %f3140, 0f3F800000; - setp.ltu.f32 %p198, %f339, 0f407AD445; - mov.f32 %f1865, 0f3A03BB71; - mov.f32 %f1866, 0fB7B730FB; - fma.rn.f32 %f1867, %f1866, %f339, %f1865; - mov.f32 %f1868, 0fBBACA3B3; - fma.rn.f32 %f1869, %f1867, %f339, %f1868; - mov.f32 %f1870, 0f3D0A7445; - fma.rn.f32 %f1871, %f1869, %f339, %f1870; - mov.f32 %f1872, 0fBE1B3B75; - fma.rn.f32 %f1873, %f1871, %f339, %f1872; - mov.f32 %f1874, 0fBF6B385A; - fma.rn.f32 %f1875, %f1873, %f339, %f1874; - mov.f32 %f1876, 0fBFD0316E; - fma.rn.f32 %f1877, %f1875, %f339, %f1876; - mov.f32 %f1878, 0fBA031CCE; - fma.rn.f32 %f1879, %f1877, %f339, %f1878; - ex2.approx.ftz.f32 %f1880, %f1879; - sub.f32 %f1882, %f3140, %f1880; - mov.b32 %r209, %f1882; - selp.b32 %r210, %r209, 1065353216, %p198; - or.b32 %r211, %r210, %r302; - mov.b32 %f3346, %r211; - -BB3_130: - sub.f32 %f1894, %f3345, %f3346; - mul.f32 %f412, %f1894, 0f3F000000; - cvt.rn.f32.s32 %f413, %r324; - sub.f32 %f414, %f413, %f3320; - add.f32 %f415, %f414, 0f3F800000; - mul.f32 %f416, %f415, %f341; + sub.f32 %f1416, %f1411, %f1415; + add.f32 %f1417, %f1414, %f1416; + mul.rn.f32 %f1419, %f1390, %f2922; + mul.rn.f32 %f1421, %f1390, %f2923; + add.f32 %f1422, %f1419, %f1415; + sub.f32 %f1423, %f1419, %f1422; + add.f32 %f1424, %f1415, %f1423; + add.f32 %f1425, %f1417, %f1424; + add.f32 %f1426, %f1421, %f1425; + add.f32 %f1427, %f1422, %f1426; + sub.f32 %f1428, %f1422, %f1427; + add.f32 %f1429, %f1426, %f1428; + mul.rn.f32 %f1430, %f622, %f1427; + neg.f32 %f1431, %f1430; + fma.rn.f32 %f1432, %f622, %f1427, %f1431; + fma.rn.f32 %f1433, %f622, %f1429, %f1432; + fma.rn.f32 %f1435, %f2926, %f1427, %f1433; + add.rn.f32 %f1436, %f1430, %f1435; + neg.f32 %f1437, %f1436; + add.rn.f32 %f1438, %f1430, %f1437; + add.rn.f32 %f1439, %f1438, %f1435; + mov.b32 %r430, %f1436; + setp.eq.s32 %p324, %r430, 1118925336; + add.s32 %r431, %r430, -1; + mov.b32 %f1440, %r431; + add.f32 %f1441, %f1439, 0f37000000; + selp.f32 %f208, %f1441, %f1439, %p324; + selp.f32 %f1442, %f1440, %f1436, %p324; + mul.rn.f32 %f1443, %f1442, %f2914; + cvt.rzi.f32.f32 %f1444, %f1443; + abs.f32 %f1445, %f1444; + setp.gt.f32 %p325, %f1445, 0f42FC0000; + mov.b32 %r432, %f1444; + and.b32 %r433, %r432, -2147483648; + or.b32 %r434, %r433, 1123811328; + mov.b32 %f1446, %r434; + selp.f32 %f1447, %f1446, %f1444, %p325; + fma.rn.f32 %f1449, %f1447, %f2924, %f1442; + fma.rn.f32 %f1451, %f1447, %f2925, %f1449; + mul.f32 %f1452, %f1451, 0f3FB8AA3B; + add.f32 %f1453, %f1447, 0f4B40007F; + mov.b32 %r435, %f1453; + shl.b32 %r436, %r435, 23; + mov.b32 %f1454, %r436; + ex2.approx.ftz.f32 %f1455, %f1452; + mul.f32 %f209, %f1455, %f1454; + setp.eq.f32 %p326, %f209, 0f7F800000; + mov.f32 %f3026, 0f7F800000; + @%p326 bra $L__BB3_182; + + fma.rn.f32 %f3026, %f209, %f208, %f209; + +$L__BB3_182: + setp.lt.f32 %p327, %f206, 0f00000000; + and.pred %p27, %p327, %p110; + setp.eq.f32 %p329, %f206, 0f00000000; + @%p329 bra $L__BB3_186; + bra.uni $L__BB3_183; + +$L__BB3_186: + add.f32 %f1460, %f206, %f206; + selp.f32 %f3028, %f1460, 0f00000000, %p110; + bra.uni $L__BB3_187; + +$L__BB3_183: + mov.b32 %r437, %f3026; + xor.b32 %r438, %r437, -2147483648; + mov.b32 %f1456, %r438; + selp.f32 %f3028, %f1456, %f3026, %p27; + setp.geu.f32 %p330, %f206, 0f00000000; + @%p330 bra $L__BB3_187; + + cvt.rzi.f32.f32 %f1458, %f622; + setp.eq.f32 %p331, %f1458, 0f40000000; + @%p331 bra $L__BB3_187; + + mov.f32 %f3028, 0f7FFFFFFF; + +$L__BB3_187: + abs.f32 %f2808, %f206; + add.f32 %f1461, %f2808, 0f40000000; + mov.b32 %r439, %f1461; + setp.lt.s32 %p333, %r439, 2139095040; + @%p333 bra $L__BB3_192; + + abs.f32 %f2931, %f206; + setp.gtu.f32 %p334, %f2931, 0f7F800000; + @%p334 bra $L__BB3_191; + bra.uni $L__BB3_189; + +$L__BB3_191: + add.f32 %f3028, %f206, 0f40000000; + bra.uni $L__BB3_192; + +$L__BB3_189: + abs.f32 %f2932, %f206; + setp.neu.f32 %p335, %f2932, 0f7F800000; + @%p335 bra $L__BB3_192; + + selp.f32 %f3028, 0fFF800000, 0f7F800000, %p27; + +$L__BB3_192: + mov.f32 %f2816, 0f32A57060; + mov.f32 %f2815, 0f4B400001; + mov.f32 %f2814, 0f437C0000; + mov.f32 %f2813, 0f3BBB989D; + mov.f32 %f2812, 0f3FB8AA3B; + mov.f32 %f2811, 0f3F000000; + cvt.rn.f32.s32 %f2810, %r853; + sub.f32 %f2809, %f2810, %f3047; + mul.f32 %f1462, %f3028, 0fBF000000; + setp.eq.f32 %p336, %f206, 0f3F800000; + selp.f32 %f1463, 0fBF000000, %f1462, %p336; + fma.rn.f32 %f1466, %f1463, %f2813, %f2811; + cvt.sat.f32.f32 %f1469, %f1466; + fma.rm.f32 %f1471, %f1469, %f2814, %f2815; + add.f32 %f1472, %f1471, 0fCB40007F; + neg.f32 %f1473, %f1472; + fma.rn.f32 %f1474, %f1463, %f2812, %f1473; + fma.rn.f32 %f1476, %f1463, %f2816, %f1474; + mov.b32 %r440, %f1471; + shl.b32 %r441, %r440, 23; + mov.b32 %f1477, %r441; + ex2.approx.ftz.f32 %f1478, %f1476; + mul.f32 %f218, %f1478, %f1477; + add.f32 %f1479, %f2809, 0f3F800000; + mul.f32 %f1480, %f1479, %f205; + mul.f32 %f1481, %f2809, %f218; + sub.f32 %f1482, %f1480, %f1481; + mul.f32 %f1483, %f64, %f1482; + mul.f32 %f219, %f120, %f1483; + not.pred %p337, %p13; + mov.f64 %fd601, %fd45; + @%p337 bra $L__BB3_194; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r442}, %fd45; + } + xor.b32 %r443, %r442, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r444, %temp}, %fd45; + } + mov.b64 %fd601, {%r444, %r443}; + +$L__BB3_194: + setp.eq.f32 %p726, %f3043, 0f00000000; + @%p726 bra $L__BB3_198; + bra.uni $L__BB3_195; + +$L__BB3_198: + mov.u32 %r445, 0; + mov.b64 %fd601, {%r445, %r74}; + bra.uni $L__BB3_199; + +$L__BB3_195: + setp.gt.s32 %p339, %r60, -1; + @%p339 bra $L__BB3_199; + + cvt.rzi.f64.f64 %fd404, %fd345; + setp.eq.f64 %p340, %fd404, 0d4014000000000000; + @%p340 bra $L__BB3_199; + + mov.f64 %fd601, 0dFFF8000000000000; + +$L__BB3_199: + cvt.f64.f32 %fd546, %f3043; + add.f64 %fd545, %fd546, 0d4014000000000000; + selp.f64 %fd602, %fd601, %fd545, %p160; + @%p23 bra $L__BB3_204; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r829}, %fd345; + } + and.b32 %r828, %r829, 2147483647; + setp.eq.s32 %p342, %r828, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r446, %temp}, %fd345; + } + setp.eq.s32 %p343, %r446, 0; + and.pred %p344, %p342, %p343; + @%p344 bra $L__BB3_203; + bra.uni $L__BB3_201; + +$L__BB3_203: + mov.u32 %r450, 0; + mov.b64 %fd602, {%r450, %r76}; + bra.uni $L__BB3_204; + +$L__BB3_201: + cvt.f64.f32 %fd547, %f3043; + and.b32 %r447, %r60, 2147483647; + setp.ne.s32 %p345, %r447, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r448, %temp}, %fd547; + } + setp.ne.s32 %p346, %r448, 0; + or.pred %p347, %p345, %p346; + mov.f64 %fd602, %fd601; + @%p347 bra $L__BB3_204; + + mov.u32 %r449, 0; + mov.b64 %fd602, {%r449, %r77}; + +$L__BB3_204: + cvt.f64.f32 %fd410, %f123; + { + .reg .b32 %temp; + mov.b64 {%temp, %r83}, %fd410; + } + abs.f64 %fd96, %fd410; + { // callseq 75, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd96; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd604, [retval0+0]; + } // callseq 75 + setp.lt.s32 %p348, %r83, 0; + and.pred %p28, %p348, %p121; + not.pred %p350, %p28; + @%p350 bra $L__BB3_206; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r451}, %fd604; + } + xor.b32 %r452, %r451, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r453, %temp}, %fd604; + } + mov.b64 %fd604, {%r453, %r452}; + +$L__BB3_206: + setp.eq.f32 %p351, %f123, 0f00000000; + @%p351 bra $L__BB3_210; + bra.uni $L__BB3_207; + +$L__BB3_210: + mov.u32 %r454, 0; + selp.b32 %r455, %r83, 0, %p121; + or.b32 %r456, %r455, 2146435072; + selp.b32 %r457, %r456, %r455, %p123; + mov.b64 %fd604, {%r454, %r457}; + bra.uni $L__BB3_211; + +$L__BB3_207: + setp.gt.s32 %p352, %r83, -1; + @%p352 bra $L__BB3_211; + + cvt.rzi.f64.f64 %fd413, %fd339; + setp.eq.f64 %p353, %fd413, 0d4008000000000000; + @%p353 bra $L__BB3_211; + + mov.f64 %fd604, 0dFFF8000000000000; + +$L__BB3_211: + add.f64 %fd102, %fd410, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r458}, %fd102; + } + and.b32 %r459, %r458, 2146435072; + setp.ne.s32 %p356, %r459, 2146435072; + mov.f64 %fd605, %fd604; + @%p356 bra $L__BB3_217; + + setp.gtu.f64 %p357, %fd96, 0d7FF0000000000000; + mov.f64 %fd605, %fd102; + @%p357 bra $L__BB3_217; + + setp.eq.s32 %p358, %r57, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r460, %temp}, %fd339; + } + setp.eq.s32 %p359, %r460, 0; + and.pred %p360, %p358, %p359; + @%p360 bra $L__BB3_216; + bra.uni $L__BB3_214; + +$L__BB3_216: + mov.u32 %r467, 0; + setp.gt.f64 %p368, %fd96, 0d3FF0000000000000; + selp.b32 %r468, 2146435072, 0, %p368; + xor.b32 %r469, %r468, 2146435072; + selp.b32 %r470, %r469, %r468, %p123; + setp.eq.f32 %p369, %f123, 0fBF800000; + selp.b32 %r471, 1072693248, %r470, %p369; + mov.b64 %fd605, {%r467, %r471}; + bra.uni $L__BB3_217; + +$L__BB3_214: + { + .reg .b32 %temp; + mov.b64 {%r461, %temp}, %fd410; + } + and.b32 %r462, %r83, 2147483647; + setp.ne.s32 %p361, %r462, 2146435072; + setp.ne.s32 %p362, %r461, 0; + or.pred %p363, %p361, %p362; + mov.f64 %fd605, %fd604; + @%p363 bra $L__BB3_217; + + setp.ne.s32 %p364, %r57, 1071644672; + and.pred %p365, %p364, %p28; + mov.u32 %r464, 0; + or.b32 %r465, %r59, -2147483648; + selp.b32 %r466, %r465, %r59, %p365; + mov.b64 %fd605, {%r464, %r466}; + +$L__BB3_217: + setp.eq.f32 %p370, %f123, 0f3F800000; + selp.f64 %fd418, 0d3FF0000000000000, %fd605, %p370; + cvt.f64.f32 %fd419, %f205; + mul.f64 %fd106, %fd418, %fd419; + cvt.f64.f32 %fd107, %f128; + { + .reg .b32 %temp; + mov.b64 {%temp, %r84}, %fd107; + } + abs.f64 %fd108, %fd107; + { // callseq 76, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd108; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd607, [retval0+0]; + } // callseq 76 + setp.lt.s32 %p371, %r84, 0; + and.pred %p29, %p371, %p121; + not.pred %p373, %p29; + @%p373 bra $L__BB3_219; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r472}, %fd607; + } + xor.b32 %r473, %r472, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r474, %temp}, %fd607; + } + mov.b64 %fd607, {%r474, %r473}; + +$L__BB3_219: + setp.eq.f32 %p374, %f128, 0f00000000; + @%p374 bra $L__BB3_223; + bra.uni $L__BB3_220; + +$L__BB3_223: + mov.u32 %r475, 0; + selp.b32 %r476, %r84, 0, %p121; + or.b32 %r477, %r476, 2146435072; + selp.b32 %r478, %r477, %r476, %p123; + mov.b64 %fd607, {%r475, %r478}; + bra.uni $L__BB3_224; + +$L__BB3_220: + setp.gt.s32 %p375, %r84, -1; + @%p375 bra $L__BB3_224; + + cvt.rzi.f64.f64 %fd422, %fd339; + setp.eq.f64 %p376, %fd422, 0d4008000000000000; + @%p376 bra $L__BB3_224; + + mov.f64 %fd607, 0dFFF8000000000000; + +$L__BB3_224: + add.f64 %fd114, %fd107, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r479}, %fd114; + } + and.b32 %r480, %r479, 2146435072; + setp.ne.s32 %p379, %r480, 2146435072; + mov.f64 %fd608, %fd607; + @%p379 bra $L__BB3_230; + + setp.gtu.f64 %p380, %fd108, 0d7FF0000000000000; + mov.f64 %fd608, %fd114; + @%p380 bra $L__BB3_230; + + setp.eq.s32 %p381, %r57, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r481, %temp}, %fd339; + } + setp.eq.s32 %p382, %r481, 0; + and.pred %p383, %p381, %p382; + @%p383 bra $L__BB3_229; + bra.uni $L__BB3_227; + +$L__BB3_229: + mov.u32 %r488, 0; + setp.gt.f64 %p391, %fd108, 0d3FF0000000000000; + selp.b32 %r489, 2146435072, 0, %p391; + xor.b32 %r490, %r489, 2146435072; + selp.b32 %r491, %r490, %r489, %p123; + setp.eq.f32 %p392, %f128, 0fBF800000; + selp.b32 %r492, 1072693248, %r491, %p392; + mov.b64 %fd608, {%r488, %r492}; + bra.uni $L__BB3_230; + +$L__BB3_227: + { + .reg .b32 %temp; + mov.b64 {%r482, %temp}, %fd107; + } + and.b32 %r483, %r84, 2147483647; + setp.ne.s32 %p384, %r483, 2146435072; + setp.ne.s32 %p385, %r482, 0; + or.pred %p386, %p384, %p385; + mov.f64 %fd608, %fd607; + @%p386 bra $L__BB3_230; + + setp.ne.s32 %p387, %r57, 1071644672; + and.pred %p388, %p387, %p29; + mov.u32 %r485, 0; + or.b32 %r486, %r59, -2147483648; + selp.b32 %r487, %r486, %r59, %p388; + mov.b64 %fd608, {%r485, %r487}; + +$L__BB3_230: + cvt.f64.f32 %fd548, %f120; + setp.eq.f32 %p727, %f3043, 0f3F800000; + mov.f32 %f3029, 0f00000000; + setp.eq.f32 %p393, %f128, 0f3F800000; + selp.f64 %fd425, 0d3FF0000000000000, %fd608, %p393; + cvt.f64.f32 %fd426, %f218; + mul.f64 %fd427, %fd425, %fd426; + sub.f64 %fd428, %fd106, %fd427; + selp.f64 %fd429, 0d3FF0000000000000, %fd602, %p727; + div.rn.f64 %fd430, %fd35, %fd429; + mul.f64 %fd431, %fd430, %fd428; + mul.f64 %fd433, %fd431, %fd548; + mul.f32 %f1485, %f65, %f219; + cvt.f64.f32 %fd434, %f1485; + sub.f64 %fd435, %fd434, %fd433; + cvt.rn.f32.f64 %f220, %fd435; + mul.f32 %f221, %f120, %f133; + setp.leu.f32 %p395, %f134, 0f3C23D70A; + @%p395 bra $L__BB3_232; + + div.rn.f32 %f1486, %f135, %f134; + add.f32 %f3029, %f1486, 0fBF800000; + +$L__BB3_232: + mov.f32 %f3030, 0f00000000; + @%p395 bra $L__BB3_247; + + and.b32 %r493, %r78, 2146435072; + setp.eq.s32 %p397, %r493, 1062207488; + cvt.f64.f32 %fd118, %f134; + { + .reg .b32 %temp; + mov.b64 {%temp, %r85}, %fd118; + } + abs.f64 %fd119, %fd118; + { // callseq 77, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd119; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd610, [retval0+0]; + } // callseq 77 + setp.lt.s32 %p398, %r85, 0; + and.pred %p30, %p398, %p397; + not.pred %p399, %p30; + @%p399 bra $L__BB3_235; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r494}, %fd610; + } + xor.b32 %r495, %r494, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r496, %temp}, %fd610; + } + mov.b64 %fd610, {%r496, %r495}; + +$L__BB3_235: + setp.eq.f32 %p400, %f134, 0f00000000; + @%p400 bra $L__BB3_239; + bra.uni $L__BB3_236; + +$L__BB3_239: + setp.lt.s32 %p403, %r78, 0; + mov.u32 %r497, 0; + selp.b32 %r499, %r85, 0, %p397; + or.b32 %r500, %r499, 2146435072; + selp.b32 %r501, %r500, %r499, %p403; + mov.b64 %fd610, {%r497, %r501}; + bra.uni $L__BB3_240; + +$L__BB3_236: + setp.gt.s32 %p401, %r85, -1; + @%p401 bra $L__BB3_240; + + cvt.rzi.f64.f64 %fd438, %fd350; + setp.eq.f64 %p402, %fd438, 0d4000000000000000; + @%p402 bra $L__BB3_240; + + mov.f64 %fd610, 0dFFF8000000000000; + +$L__BB3_240: + add.f64 %fd125, %fd118, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r502}, %fd125; + } + and.b32 %r503, %r502, 2146435072; + setp.ne.s32 %p405, %r503, 2146435072; + mov.f64 %fd611, %fd610; + @%p405 bra $L__BB3_246; + + setp.gtu.f64 %p406, %fd119, 0d7FF0000000000000; + mov.f64 %fd611, %fd125; + @%p406 bra $L__BB3_246; + + setp.eq.s32 %p407, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r504, %temp}, %fd350; + } + setp.eq.s32 %p408, %r504, 0; + and.pred %p409, %p407, %p408; + @%p409 bra $L__BB3_245; + bra.uni $L__BB3_243; + +$L__BB3_245: + setp.lt.s32 %p415, %r78, 0; + mov.u32 %r510, 0; + setp.gt.f64 %p416, %fd119, 0d3FF0000000000000; + selp.b32 %r511, 2146435072, 0, %p416; + xor.b32 %r512, %r511, 2146435072; + selp.b32 %r513, %r512, %r511, %p415; + setp.eq.f32 %p417, %f134, 0fBF800000; + selp.b32 %r514, 1072693248, %r513, %p417; + mov.b64 %fd611, {%r510, %r514}; + bra.uni $L__BB3_246; + +$L__BB3_243: + { + .reg .b32 %temp; + mov.b64 {%r505, %temp}, %fd118; + } + and.b32 %r506, %r85, 2147483647; + setp.ne.s32 %p410, %r506, 2146435072; + setp.ne.s32 %p411, %r505, 0; + or.pred %p412, %p410, %p411; + mov.f64 %fd611, %fd610; + @%p412 bra $L__BB3_246; + + setp.ne.s32 %p413, %r79, 1071644672; + and.pred %p414, %p413, %p30; + or.b32 %r507, %r80, -2147483648; + selp.b32 %r508, %r507, %r80, %p414; + mov.u32 %r509, 0; + mov.b64 %fd611, {%r509, %r508}; + +$L__BB3_246: + setp.eq.f32 %p418, %f134, 0f3F800000; + selp.f64 %fd441, 0d3FF0000000000000, %fd611, %p418; + cvt.f64.f32 %fd442, %f135; + div.rn.f64 %fd443, %fd442, %fd441; + cvt.rn.f32.f64 %f3030, %fd443; + +$L__BB3_247: + and.b32 %r515, %r78, 2146435072; + setp.eq.s32 %p419, %r515, 1062207488; + mov.f32 %f1488, 0f47C35000; + min.f32 %f1489, %f3030, %f1488; + cvt.f64.f32 %fd129, %f1489; + min.f32 %f226, %f3029, %f1488; + fma.rn.f32 %f2998, %f226, %f148, %f2998; + mul.f32 %f1490, %f226, %f149; + cvt.f64.f32 %fd130, %f1490; + cvt.f64.f32 %fd131, %f148; + { + .reg .b32 %temp; + mov.b64 {%temp, %r86}, %fd131; + } + abs.f64 %fd132, %fd131; + { // callseq 78, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd132; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd612, [retval0+0]; + } // callseq 78 + @%p419 bra $L__BB3_304; + bra.uni $L__BB3_248; + +$L__BB3_304: + setp.gt.s32 %p495, %r86, -1; + @%p495 bra $L__BB3_306; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r576}, %fd612; + } + xor.b32 %r577, %r576, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r578, %temp}, %fd612; + } + mov.b64 %fd612, {%r578, %r577}; + +$L__BB3_306: + setp.eq.f32 %p496, %f148, 0f00000000; + @%p496 bra $L__BB3_310; + bra.uni $L__BB3_307; + +$L__BB3_310: + setp.lt.s32 %p499, %r78, 0; + mov.u32 %r579, 0; + or.b32 %r580, %r86, 2146435072; + selp.b32 %r581, %r580, %r86, %p499; + mov.b64 %fd612, {%r579, %r581}; + bra.uni $L__BB3_311; + +$L__BB3_248: + setp.eq.f32 %p420, %f148, 0f00000000; + @%p420 bra $L__BB3_252; + bra.uni $L__BB3_249; + +$L__BB3_252: + mov.u32 %r516, 0; + mov.b64 %fd612, {%r516, %r81}; + bra.uni $L__BB3_253; + +$L__BB3_307: + @%p495 bra $L__BB3_311; + + cvt.rzi.f64.f64 %fd496, %fd350; + setp.eq.f64 %p498, %fd496, 0d4000000000000000; + @%p498 bra $L__BB3_311; + + mov.f64 %fd612, 0dFFF8000000000000; + +$L__BB3_311: + add.f64 %fd186, %fd131, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r582}, %fd186; + } + and.b32 %r583, %r582, 2146435072; + setp.ne.s32 %p500, %r583, 2146435072; + mov.f64 %fd624, %fd612; + @%p500 bra $L__BB3_317; + + setp.gtu.f64 %p501, %fd132, 0d7FF0000000000000; + mov.f64 %fd624, %fd186; + @%p501 bra $L__BB3_317; + + setp.eq.s32 %p502, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r584, %temp}, %fd350; + } + setp.eq.s32 %p503, %r584, 0; + and.pred %p504, %p502, %p503; + @%p504 bra $L__BB3_316; + bra.uni $L__BB3_314; + +$L__BB3_316: + setp.lt.s32 %p511, %r78, 0; + mov.u32 %r590, 0; + setp.gt.f64 %p512, %fd132, 0d3FF0000000000000; + selp.b32 %r591, 2146435072, 0, %p512; + xor.b32 %r592, %r591, 2146435072; + selp.b32 %r593, %r592, %r591, %p511; + setp.eq.f32 %p513, %f148, 0fBF800000; + selp.b32 %r594, 1072693248, %r593, %p513; + mov.b64 %fd624, {%r590, %r594}; + bra.uni $L__BB3_317; + +$L__BB3_249: + setp.gt.s32 %p421, %r86, -1; + @%p421 bra $L__BB3_253; + + cvt.rzi.f64.f64 %fd446, %fd350; + setp.eq.f64 %p422, %fd446, 0d4000000000000000; + @%p422 bra $L__BB3_253; + + mov.f64 %fd612, 0dFFF8000000000000; + +$L__BB3_253: + add.f64 %fd136, %fd131, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r517}, %fd136; + } + and.b32 %r518, %r517, 2146435072; + setp.ne.s32 %p423, %r518, 2146435072; + mov.f64 %fd613, %fd612; + @%p423 bra $L__BB3_259; + + setp.gtu.f64 %p424, %fd132, 0d7FF0000000000000; + mov.f64 %fd613, %fd136; + @%p424 bra $L__BB3_259; + + setp.eq.s32 %p425, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r519, %temp}, %fd350; + } + setp.eq.s32 %p426, %r519, 0; + and.pred %p427, %p425, %p426; + @%p427 bra $L__BB3_258; + bra.uni $L__BB3_256; + +$L__BB3_258: + setp.lt.s32 %p431, %r78, 0; + mov.u32 %r523, 0; + setp.gt.f64 %p432, %fd132, 0d3FF0000000000000; + selp.b32 %r524, 2146435072, 0, %p432; + xor.b32 %r525, %r524, 2146435072; + selp.b32 %r526, %r525, %r524, %p431; + setp.eq.f32 %p433, %f148, 0fBF800000; + selp.b32 %r527, 1072693248, %r526, %p433; + mov.b64 %fd613, {%r523, %r527}; + bra.uni $L__BB3_259; + +$L__BB3_314: + { + .reg .b32 %temp; + mov.b64 {%r585, %temp}, %fd131; + } + and.b32 %r586, %r86, 2147483647; + setp.ne.s32 %p505, %r586, 2146435072; + setp.ne.s32 %p506, %r585, 0; + or.pred %p507, %p505, %p506; + mov.f64 %fd624, %fd612; + @%p507 bra $L__BB3_317; + + setp.lt.s32 %p508, %r86, 0; + mov.u32 %r587, 0; + setp.ne.s32 %p509, %r79, 1071644672; + and.pred %p510, %p509, %p508; + or.b32 %r588, %r80, -2147483648; + selp.b32 %r589, %r588, %r80, %p510; + mov.b64 %fd624, {%r587, %r589}; + +$L__BB3_317: + setp.eq.f32 %p514, %f148, 0f3F800000; + selp.f64 %fd499, 0d3FF0000000000000, %fd624, %p514; + mul.f64 %fd500, %fd499, %fd129; + sub.f64 %fd501, %fd130, %fd500; + cvt.f64.f32 %fd502, %f3004; + add.f64 %fd642, %fd501, %fd502; + cvt.f64.f32 %fd191, %f177; + { + .reg .b32 %temp; + mov.b64 {%temp, %r91}, %fd191; + } + abs.f64 %fd192, %fd191; + { // callseq 83, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd192; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd626, [retval0+0]; + } // callseq 83 + setp.gt.s32 %p515, %r91, -1; + @%p515 bra $L__BB3_319; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r595}, %fd626; + } + xor.b32 %r596, %r595, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r597, %temp}, %fd626; + } + mov.b64 %fd626, {%r597, %r596}; + +$L__BB3_319: + setp.eq.f32 %p516, %f177, 0f00000000; + @%p516 bra $L__BB3_323; + bra.uni $L__BB3_320; + +$L__BB3_323: + setp.lt.s32 %p519, %r78, 0; + mov.u32 %r598, 0; + or.b32 %r599, %r91, 2146435072; + selp.b32 %r600, %r599, %r91, %p519; + mov.b64 %fd626, {%r598, %r600}; + bra.uni $L__BB3_324; + +$L__BB3_320: + @%p515 bra $L__BB3_324; + + cvt.rzi.f64.f64 %fd505, %fd350; + setp.eq.f64 %p518, %fd505, 0d4000000000000000; + @%p518 bra $L__BB3_324; + + mov.f64 %fd626, 0dFFF8000000000000; + +$L__BB3_324: + add.f64 %fd198, %fd191, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r601}, %fd198; + } + and.b32 %r602, %r601, 2146435072; + setp.ne.s32 %p520, %r602, 2146435072; + mov.f64 %fd627, %fd626; + @%p520 bra $L__BB3_330; + + setp.gtu.f64 %p521, %fd192, 0d7FF0000000000000; + mov.f64 %fd627, %fd198; + @%p521 bra $L__BB3_330; + + setp.eq.s32 %p522, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r603, %temp}, %fd350; + } + setp.eq.s32 %p523, %r603, 0; + and.pred %p524, %p522, %p523; + @%p524 bra $L__BB3_329; + bra.uni $L__BB3_327; + +$L__BB3_329: + setp.lt.s32 %p531, %r78, 0; + mov.u32 %r609, 0; + setp.gt.f64 %p532, %fd192, 0d3FF0000000000000; + selp.b32 %r610, 2146435072, 0, %p532; + xor.b32 %r611, %r610, 2146435072; + selp.b32 %r612, %r611, %r610, %p531; + setp.eq.f32 %p533, %f177, 0fBF800000; + selp.b32 %r613, 1072693248, %r612, %p533; + mov.b64 %fd627, {%r609, %r613}; + bra.uni $L__BB3_330; + +$L__BB3_256: + { + .reg .b32 %temp; + mov.b64 {%r520, %temp}, %fd131; + } + and.b32 %r521, %r86, 2147483647; + setp.ne.s32 %p428, %r521, 2146435072; + setp.ne.s32 %p429, %r520, 0; + or.pred %p430, %p428, %p429; + mov.f64 %fd613, %fd612; + @%p430 bra $L__BB3_259; + + mov.u32 %r522, 0; + mov.b64 %fd613, {%r522, %r80}; + +$L__BB3_259: + setp.eq.f32 %p434, %f148, 0f3F800000; + selp.f64 %fd449, 0d3FF0000000000000, %fd613, %p434; + mul.f64 %fd450, %fd449, %fd129; + sub.f64 %fd451, %fd130, %fd450; + cvt.f64.f32 %fd452, %f3004; + add.f64 %fd642, %fd451, %fd452; + cvt.f64.f32 %fd141, %f177; + { + .reg .b32 %temp; + mov.b64 {%temp, %r87}, %fd141; + } + abs.f64 %fd142, %fd141; + setp.eq.f32 %p435, %f177, 0f00000000; + @%p435 bra $L__BB3_263; + bra.uni $L__BB3_260; + +$L__BB3_263: + mov.u32 %r528, 0; + mov.b64 %fd614, {%r528, %r81}; + bra.uni $L__BB3_264; + +$L__BB3_260: + { // callseq 79, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd142; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd614, [retval0+0]; + } // callseq 79 + setp.gt.s32 %p436, %r87, -1; + @%p436 bra $L__BB3_264; + + cvt.rzi.f64.f64 %fd455, %fd350; + setp.eq.f64 %p437, %fd455, 0d4000000000000000; + @%p437 bra $L__BB3_264; + + mov.f64 %fd614, 0dFFF8000000000000; + +$L__BB3_264: + add.f64 %fd146, %fd141, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r529}, %fd146; + } + and.b32 %r530, %r529, 2146435072; + setp.ne.s32 %p438, %r530, 2146435072; + mov.f64 %fd615, %fd614; + @%p438 bra $L__BB3_270; + + setp.gtu.f64 %p439, %fd142, 0d7FF0000000000000; + mov.f64 %fd615, %fd146; + @%p439 bra $L__BB3_270; + + setp.eq.s32 %p440, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r531, %temp}, %fd350; + } + setp.eq.s32 %p441, %r531, 0; + and.pred %p442, %p440, %p441; + @%p442 bra $L__BB3_269; + bra.uni $L__BB3_267; + +$L__BB3_269: + setp.lt.s32 %p446, %r78, 0; + mov.u32 %r535, 0; + setp.gt.f64 %p447, %fd142, 0d3FF0000000000000; + selp.b32 %r536, 2146435072, 0, %p447; + xor.b32 %r537, %r536, 2146435072; + selp.b32 %r538, %r537, %r536, %p446; + setp.eq.f32 %p448, %f177, 0fBF800000; + selp.b32 %r539, 1072693248, %r538, %p448; + mov.b64 %fd615, {%r535, %r539}; + bra.uni $L__BB3_270; + +$L__BB3_327: + { + .reg .b32 %temp; + mov.b64 {%r604, %temp}, %fd191; + } + and.b32 %r605, %r91, 2147483647; + setp.ne.s32 %p525, %r605, 2146435072; + setp.ne.s32 %p526, %r604, 0; + or.pred %p527, %p525, %p526; + mov.f64 %fd627, %fd626; + @%p527 bra $L__BB3_330; + + setp.lt.s32 %p528, %r91, 0; + mov.u32 %r606, 0; + setp.ne.s32 %p529, %r79, 1071644672; + and.pred %p530, %p529, %p528; + or.b32 %r607, %r80, -2147483648; + selp.b32 %r608, %r607, %r80, %p530; + mov.b64 %fd627, {%r606, %r608}; + +$L__BB3_330: + setp.eq.f32 %p534, %f177, 0f3F800000; + selp.f64 %fd508, 0d3FF0000000000000, %fd627, %p534; + mul.f64 %fd509, %fd508, %fd129; + mul.f32 %f1495, %f226, %f178; + cvt.f64.f32 %fd510, %f1495; + sub.f64 %fd511, %fd510, %fd509; + cvt.f64.f32 %fd512, %f3003; + add.f64 %fd641, %fd511, %fd512; + cvt.f64.f32 %fd203, %f221; + { + .reg .b32 %temp; + mov.b64 {%temp, %r92}, %fd203; + } + abs.f64 %fd204, %fd203; + { // callseq 84, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd204; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd629, [retval0+0]; + } // callseq 84 + setp.gt.s32 %p535, %r92, -1; + @%p535 bra $L__BB3_332; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r614}, %fd629; + } + xor.b32 %r615, %r614, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r616, %temp}, %fd629; + } + mov.b64 %fd629, {%r616, %r615}; + +$L__BB3_332: + setp.eq.f32 %p536, %f221, 0f00000000; + @%p536 bra $L__BB3_336; + bra.uni $L__BB3_333; + +$L__BB3_336: + setp.lt.s32 %p539, %r78, 0; + mov.u32 %r617, 0; + or.b32 %r618, %r92, 2146435072; + selp.b32 %r619, %r618, %r92, %p539; + mov.b64 %fd629, {%r617, %r619}; + bra.uni $L__BB3_337; + +$L__BB3_333: + @%p535 bra $L__BB3_337; + + cvt.rzi.f64.f64 %fd515, %fd350; + setp.eq.f64 %p538, %fd515, 0d4000000000000000; + @%p538 bra $L__BB3_337; + + mov.f64 %fd629, 0dFFF8000000000000; + +$L__BB3_337: + add.f64 %fd210, %fd203, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r620}, %fd210; + } + and.b32 %r621, %r620, 2146435072; + setp.ne.s32 %p540, %r621, 2146435072; + mov.f64 %fd630, %fd629; + @%p540 bra $L__BB3_343; + + setp.gtu.f64 %p541, %fd204, 0d7FF0000000000000; + mov.f64 %fd630, %fd210; + @%p541 bra $L__BB3_343; + + setp.eq.s32 %p542, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r622, %temp}, %fd350; + } + setp.eq.s32 %p543, %r622, 0; + and.pred %p544, %p542, %p543; + @%p544 bra $L__BB3_342; + bra.uni $L__BB3_340; + +$L__BB3_342: + setp.lt.s32 %p551, %r78, 0; + mov.u32 %r628, 0; + setp.gt.f64 %p552, %fd204, 0d3FF0000000000000; + selp.b32 %r629, 2146435072, 0, %p552; + xor.b32 %r630, %r629, 2146435072; + selp.b32 %r631, %r630, %r629, %p551; + setp.eq.f32 %p553, %f221, 0fBF800000; + selp.b32 %r632, 1072693248, %r631, %p553; + mov.b64 %fd630, {%r628, %r632}; + bra.uni $L__BB3_343; + +$L__BB3_267: + { + .reg .b32 %temp; + mov.b64 {%r532, %temp}, %fd141; + } + and.b32 %r533, %r87, 2147483647; + setp.ne.s32 %p443, %r533, 2146435072; + setp.ne.s32 %p444, %r532, 0; + or.pred %p445, %p443, %p444; + mov.f64 %fd615, %fd614; + @%p445 bra $L__BB3_270; + + mov.u32 %r534, 0; + mov.b64 %fd615, {%r534, %r80}; + +$L__BB3_270: + setp.eq.f32 %p449, %f177, 0f3F800000; + selp.f64 %fd458, 0d3FF0000000000000, %fd615, %p449; + mul.f64 %fd459, %fd458, %fd129; + mul.f32 %f1491, %f226, %f178; + cvt.f64.f32 %fd460, %f1491; + sub.f64 %fd461, %fd460, %fd459; + cvt.f64.f32 %fd462, %f3003; + add.f64 %fd641, %fd461, %fd462; + cvt.f64.f32 %fd151, %f221; + { + .reg .b32 %temp; + mov.b64 {%temp, %r88}, %fd151; + } + abs.f64 %fd152, %fd151; + setp.eq.f32 %p450, %f221, 0f00000000; + @%p450 bra $L__BB3_274; + bra.uni $L__BB3_271; + +$L__BB3_274: + mov.u32 %r540, 0; + mov.b64 %fd616, {%r540, %r81}; + bra.uni $L__BB3_275; + +$L__BB3_271: + { // callseq 80, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd152; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd616, [retval0+0]; + } // callseq 80 + setp.gt.s32 %p451, %r88, -1; + @%p451 bra $L__BB3_275; + + cvt.rzi.f64.f64 %fd465, %fd350; + setp.eq.f64 %p452, %fd465, 0d4000000000000000; + @%p452 bra $L__BB3_275; + + mov.f64 %fd616, 0dFFF8000000000000; + +$L__BB3_275: + add.f64 %fd156, %fd151, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r541}, %fd156; + } + and.b32 %r542, %r541, 2146435072; + setp.ne.s32 %p453, %r542, 2146435072; + mov.f64 %fd617, %fd616; + @%p453 bra $L__BB3_281; + + setp.gtu.f64 %p454, %fd152, 0d7FF0000000000000; + mov.f64 %fd617, %fd156; + @%p454 bra $L__BB3_281; + + setp.eq.s32 %p455, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r543, %temp}, %fd350; + } + setp.eq.s32 %p456, %r543, 0; + and.pred %p457, %p455, %p456; + @%p457 bra $L__BB3_280; + bra.uni $L__BB3_278; + +$L__BB3_280: + setp.lt.s32 %p461, %r78, 0; + mov.u32 %r547, 0; + setp.gt.f64 %p462, %fd152, 0d3FF0000000000000; + selp.b32 %r548, 2146435072, 0, %p462; + xor.b32 %r549, %r548, 2146435072; + selp.b32 %r550, %r549, %r548, %p461; + setp.eq.f32 %p463, %f221, 0fBF800000; + selp.b32 %r551, 1072693248, %r550, %p463; + mov.b64 %fd617, {%r547, %r551}; + bra.uni $L__BB3_281; + +$L__BB3_340: + { + .reg .b32 %temp; + mov.b64 {%r623, %temp}, %fd203; + } + and.b32 %r624, %r92, 2147483647; + setp.ne.s32 %p545, %r624, 2146435072; + setp.ne.s32 %p546, %r623, 0; + or.pred %p547, %p545, %p546; + mov.f64 %fd630, %fd629; + @%p547 bra $L__BB3_343; + + setp.lt.s32 %p548, %r92, 0; + mov.u32 %r625, 0; + setp.ne.s32 %p549, %r79, 1071644672; + and.pred %p550, %p549, %p548; + or.b32 %r626, %r80, -2147483648; + selp.b32 %r627, %r626, %r80, %p550; + mov.b64 %fd630, {%r625, %r627}; + +$L__BB3_343: + mul.f32 %f1496, %f226, 0f00000000; + cvt.f64.f32 %fd518, %f1496; + setp.eq.f32 %p554, %f221, 0f3F800000; + selp.f64 %fd519, 0d3FF0000000000000, %fd630, %p554; + mul.f64 %fd520, %fd519, %fd129; + sub.f64 %fd521, %fd518, %fd520; + cvt.f64.f32 %fd522, %f3002; + add.f64 %fd640, %fd521, %fd522; + cvt.f64.f32 %fd523, %f3001; + sub.f64 %fd524, %fd518, %fd129; + add.f64 %fd639, %fd524, %fd523; + cvt.f64.f32 %fd216, %f191; + { + .reg .b32 %temp; + mov.b64 {%temp, %r93}, %fd216; + } + abs.f64 %fd217, %fd216; + { // callseq 85, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd217; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd632, [retval0+0]; + } // callseq 85 + setp.gt.s32 %p555, %r93, -1; + @%p555 bra $L__BB3_345; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r633}, %fd632; + } + xor.b32 %r634, %r633, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r635, %temp}, %fd632; + } + mov.b64 %fd632, {%r635, %r634}; + +$L__BB3_345: + setp.eq.f32 %p556, %f191, 0f00000000; + @%p556 bra $L__BB3_349; + bra.uni $L__BB3_346; + +$L__BB3_349: + setp.lt.s32 %p559, %r78, 0; + mov.u32 %r636, 0; + or.b32 %r637, %r93, 2146435072; + selp.b32 %r638, %r637, %r93, %p559; + mov.b64 %fd632, {%r636, %r638}; + bra.uni $L__BB3_350; + +$L__BB3_346: + @%p555 bra $L__BB3_350; + + cvt.rzi.f64.f64 %fd527, %fd350; + setp.eq.f64 %p558, %fd527, 0d4000000000000000; + @%p558 bra $L__BB3_350; + + mov.f64 %fd632, 0dFFF8000000000000; + +$L__BB3_350: + add.f64 %fd223, %fd216, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r639}, %fd223; + } + and.b32 %r640, %r639, 2146435072; + setp.ne.s32 %p560, %r640, 2146435072; + mov.f64 %fd633, %fd632; + @%p560 bra $L__BB3_356; + + setp.gtu.f64 %p561, %fd217, 0d7FF0000000000000; + mov.f64 %fd633, %fd223; + @%p561 bra $L__BB3_356; + + setp.eq.s32 %p562, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r641, %temp}, %fd350; + } + setp.eq.s32 %p563, %r641, 0; + and.pred %p564, %p562, %p563; + @%p564 bra $L__BB3_355; + bra.uni $L__BB3_353; + +$L__BB3_355: + setp.lt.s32 %p571, %r78, 0; + mov.u32 %r647, 0; + setp.gt.f64 %p572, %fd217, 0d3FF0000000000000; + selp.b32 %r648, 2146435072, 0, %p572; + xor.b32 %r649, %r648, 2146435072; + selp.b32 %r650, %r649, %r648, %p571; + setp.eq.f32 %p573, %f191, 0fBF800000; + selp.b32 %r651, 1072693248, %r650, %p573; + mov.b64 %fd633, {%r647, %r651}; + bra.uni $L__BB3_356; + +$L__BB3_278: + { + .reg .b32 %temp; + mov.b64 {%r544, %temp}, %fd151; + } + and.b32 %r545, %r88, 2147483647; + setp.ne.s32 %p458, %r545, 2146435072; + setp.ne.s32 %p459, %r544, 0; + or.pred %p460, %p458, %p459; + mov.f64 %fd617, %fd616; + @%p460 bra $L__BB3_281; + + mov.u32 %r546, 0; + mov.b64 %fd617, {%r546, %r80}; + +$L__BB3_281: + mul.f32 %f1492, %f226, 0f00000000; + cvt.f64.f32 %fd468, %f1492; + setp.eq.f32 %p464, %f221, 0f3F800000; + selp.f64 %fd469, 0d3FF0000000000000, %fd617, %p464; + mul.f64 %fd470, %fd469, %fd129; + sub.f64 %fd471, %fd468, %fd470; + cvt.f64.f32 %fd472, %f3002; + add.f64 %fd640, %fd471, %fd472; + cvt.f64.f32 %fd473, %f3001; + sub.f64 %fd474, %fd468, %fd129; + add.f64 %fd639, %fd474, %fd473; + cvt.f64.f32 %fd162, %f191; + { + .reg .b32 %temp; + mov.b64 {%temp, %r89}, %fd162; + } + abs.f64 %fd163, %fd162; + setp.eq.f32 %p465, %f191, 0f00000000; + @%p465 bra $L__BB3_285; + bra.uni $L__BB3_282; + +$L__BB3_285: + mov.u32 %r552, 0; + mov.b64 %fd618, {%r552, %r81}; + bra.uni $L__BB3_286; + +$L__BB3_282: + { // callseq 81, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd163; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd618, [retval0+0]; + } // callseq 81 + setp.gt.s32 %p466, %r89, -1; + @%p466 bra $L__BB3_286; + + cvt.rzi.f64.f64 %fd477, %fd350; + setp.eq.f64 %p467, %fd477, 0d4000000000000000; + @%p467 bra $L__BB3_286; + + mov.f64 %fd618, 0dFFF8000000000000; + +$L__BB3_286: + add.f64 %fd167, %fd162, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r553}, %fd167; + } + and.b32 %r554, %r553, 2146435072; + setp.ne.s32 %p468, %r554, 2146435072; + mov.f64 %fd619, %fd618; + @%p468 bra $L__BB3_292; + + setp.gtu.f64 %p469, %fd163, 0d7FF0000000000000; + mov.f64 %fd619, %fd167; + @%p469 bra $L__BB3_292; + + setp.eq.s32 %p470, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r555, %temp}, %fd350; + } + setp.eq.s32 %p471, %r555, 0; + and.pred %p472, %p470, %p471; + @%p472 bra $L__BB3_291; + bra.uni $L__BB3_289; + +$L__BB3_291: + setp.lt.s32 %p476, %r78, 0; + mov.u32 %r559, 0; + setp.gt.f64 %p477, %fd163, 0d3FF0000000000000; + selp.b32 %r560, 2146435072, 0, %p477; + xor.b32 %r561, %r560, 2146435072; + selp.b32 %r562, %r561, %r560, %p476; + setp.eq.f32 %p478, %f191, 0fBF800000; + selp.b32 %r563, 1072693248, %r562, %p478; + mov.b64 %fd619, {%r559, %r563}; + bra.uni $L__BB3_292; + +$L__BB3_353: + { + .reg .b32 %temp; + mov.b64 {%r642, %temp}, %fd216; + } + and.b32 %r643, %r93, 2147483647; + setp.ne.s32 %p565, %r643, 2146435072; + setp.ne.s32 %p566, %r642, 0; + or.pred %p567, %p565, %p566; + mov.f64 %fd633, %fd632; + @%p567 bra $L__BB3_356; + + setp.lt.s32 %p568, %r93, 0; + mov.u32 %r644, 0; + setp.ne.s32 %p569, %r79, 1071644672; + and.pred %p570, %p569, %p568; + or.b32 %r645, %r80, -2147483648; + selp.b32 %r646, %r645, %r80, %p570; + mov.b64 %fd633, {%r644, %r646}; + +$L__BB3_356: + setp.eq.f32 %p574, %f191, 0f3F800000; + selp.f64 %fd530, 0d3FF0000000000000, %fd633, %p574; + mul.f64 %fd531, %fd530, %fd129; + mul.f32 %f1497, %f226, %f192; + cvt.f64.f32 %fd532, %f1497; + sub.f64 %fd533, %fd532, %fd531; + cvt.f64.f32 %fd534, %f3000; + add.f64 %fd638, %fd533, %fd534; + cvt.f64.f32 %fd228, %f219; + { + .reg .b32 %temp; + mov.b64 {%temp, %r94}, %fd228; + } + abs.f64 %fd229, %fd228; + { // callseq 86, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd229; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd635, [retval0+0]; + } // callseq 86 + setp.gt.s32 %p575, %r94, -1; + @%p575 bra $L__BB3_358; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r652}, %fd635; + } + xor.b32 %r653, %r652, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r654, %temp}, %fd635; + } + mov.b64 %fd635, {%r654, %r653}; + +$L__BB3_358: + setp.eq.f32 %p576, %f219, 0f00000000; + @%p576 bra $L__BB3_362; + bra.uni $L__BB3_359; + +$L__BB3_362: + setp.lt.s32 %p579, %r78, 0; + mov.u32 %r655, 0; + or.b32 %r656, %r94, 2146435072; + selp.b32 %r657, %r656, %r94, %p579; + mov.b64 %fd635, {%r655, %r657}; + bra.uni $L__BB3_363; + +$L__BB3_359: + @%p575 bra $L__BB3_363; + + cvt.rzi.f64.f64 %fd537, %fd350; + setp.eq.f64 %p578, %fd537, 0d4000000000000000; + @%p578 bra $L__BB3_363; + + mov.f64 %fd635, 0dFFF8000000000000; + +$L__BB3_363: + add.f64 %fd235, %fd228, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r658}, %fd235; + } + and.b32 %r659, %r658, 2146435072; + setp.ne.s32 %p580, %r659, 2146435072; + mov.f64 %fd636, %fd635; + @%p580 bra $L__BB3_369; + + setp.gtu.f64 %p581, %fd229, 0d7FF0000000000000; + mov.f64 %fd636, %fd235; + @%p581 bra $L__BB3_369; + + setp.eq.s32 %p582, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r660, %temp}, %fd350; + } + setp.eq.s32 %p583, %r660, 0; + and.pred %p584, %p582, %p583; + @%p584 bra $L__BB3_368; + bra.uni $L__BB3_366; + +$L__BB3_368: + setp.lt.s32 %p591, %r78, 0; + mov.u32 %r666, 0; + setp.gt.f64 %p592, %fd229, 0d3FF0000000000000; + selp.b32 %r667, 2146435072, 0, %p592; + xor.b32 %r668, %r667, 2146435072; + selp.b32 %r669, %r668, %r667, %p591; + setp.eq.f32 %p593, %f219, 0fBF800000; + selp.b32 %r670, 1072693248, %r669, %p593; + mov.b64 %fd636, {%r666, %r670}; + bra.uni $L__BB3_369; + +$L__BB3_289: + { + .reg .b32 %temp; + mov.b64 {%r556, %temp}, %fd162; + } + and.b32 %r557, %r89, 2147483647; + setp.ne.s32 %p473, %r557, 2146435072; + setp.ne.s32 %p474, %r556, 0; + or.pred %p475, %p473, %p474; + mov.f64 %fd619, %fd618; + @%p475 bra $L__BB3_292; + + mov.u32 %r558, 0; + mov.b64 %fd619, {%r558, %r80}; + +$L__BB3_292: + setp.eq.f32 %p479, %f191, 0f3F800000; + selp.f64 %fd480, 0d3FF0000000000000, %fd619, %p479; + mul.f64 %fd481, %fd480, %fd129; + mul.f32 %f1493, %f226, %f192; + cvt.f64.f32 %fd482, %f1493; + sub.f64 %fd483, %fd482, %fd481; + cvt.f64.f32 %fd484, %f3000; + add.f64 %fd638, %fd483, %fd484; + cvt.f64.f32 %fd172, %f219; + { + .reg .b32 %temp; + mov.b64 {%temp, %r90}, %fd172; + } + abs.f64 %fd173, %fd172; + setp.eq.f32 %p480, %f219, 0f00000000; + @%p480 bra $L__BB3_296; + bra.uni $L__BB3_293; + +$L__BB3_296: + mov.u32 %r564, 0; + mov.b64 %fd620, {%r564, %r81}; + bra.uni $L__BB3_297; + +$L__BB3_293: + { // callseq 82, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd173; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd620, [retval0+0]; + } // callseq 82 + setp.gt.s32 %p481, %r90, -1; + @%p481 bra $L__BB3_297; + + cvt.rzi.f64.f64 %fd487, %fd350; + setp.eq.f64 %p482, %fd487, 0d4000000000000000; + @%p482 bra $L__BB3_297; + + mov.f64 %fd620, 0dFFF8000000000000; + +$L__BB3_297: + add.f64 %fd177, %fd172, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r565}, %fd177; + } + and.b32 %r566, %r565, 2146435072; + setp.ne.s32 %p483, %r566, 2146435072; + mov.f64 %fd621, %fd620; + @%p483 bra $L__BB3_303; + + setp.gtu.f64 %p484, %fd173, 0d7FF0000000000000; + mov.f64 %fd621, %fd177; + @%p484 bra $L__BB3_303; + + setp.eq.s32 %p485, %r79, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r567, %temp}, %fd350; + } + setp.eq.s32 %p486, %r567, 0; + and.pred %p487, %p485, %p486; + @%p487 bra $L__BB3_302; + bra.uni $L__BB3_300; + +$L__BB3_302: + setp.lt.s32 %p491, %r78, 0; + mov.u32 %r571, 0; + setp.gt.f64 %p492, %fd173, 0d3FF0000000000000; + selp.b32 %r572, 2146435072, 0, %p492; + xor.b32 %r573, %r572, 2146435072; + selp.b32 %r574, %r573, %r572, %p491; + setp.eq.f32 %p493, %f219, 0fBF800000; + selp.b32 %r575, 1072693248, %r574, %p493; + mov.b64 %fd621, {%r571, %r575}; + bra.uni $L__BB3_303; + +$L__BB3_366: + { + .reg .b32 %temp; + mov.b64 {%r661, %temp}, %fd228; + } + and.b32 %r662, %r94, 2147483647; + setp.ne.s32 %p585, %r662, 2146435072; + setp.ne.s32 %p586, %r661, 0; + or.pred %p587, %p585, %p586; + mov.f64 %fd636, %fd635; + @%p587 bra $L__BB3_369; + + setp.lt.s32 %p588, %r94, 0; + mov.u32 %r663, 0; + setp.ne.s32 %p589, %r79, 1071644672; + and.pred %p590, %p589, %p588; + or.b32 %r664, %r80, -2147483648; + selp.b32 %r665, %r664, %r80, %p590; + mov.b64 %fd636, {%r663, %r665}; + +$L__BB3_369: + setp.eq.f32 %p594, %f219, 0f3F800000; + selp.f64 %fd540, 0d3FF0000000000000, %fd636, %p594; + mul.f64 %fd541, %fd540, %fd129; + mul.f32 %f1498, %f226, %f220; + cvt.f64.f32 %fd542, %f1498; + sub.f64 %fd543, %fd542, %fd541; + cvt.f64.f32 %fd544, %f2999; + add.f64 %fd637, %fd543, %fd544; + bra.uni $L__BB3_370; + +$L__BB3_300: + { + .reg .b32 %temp; + mov.b64 {%r568, %temp}, %fd172; + } + and.b32 %r569, %r90, 2147483647; + setp.ne.s32 %p488, %r569, 2146435072; + setp.ne.s32 %p489, %r568, 0; + or.pred %p490, %p488, %p489; + mov.f64 %fd621, %fd620; + @%p490 bra $L__BB3_303; + + mov.u32 %r570, 0; + mov.b64 %fd621, {%r570, %r80}; + +$L__BB3_303: + setp.eq.f32 %p494, %f219, 0f3F800000; + selp.f64 %fd490, 0d3FF0000000000000, %fd621, %p494; + mul.f64 %fd491, %fd490, %fd129; + mul.f32 %f1494, %f226, %f220; + cvt.f64.f32 %fd492, %f1494; + sub.f64 %fd493, %fd492, %fd491; + cvt.f64.f32 %fd494, %f2999; + add.f64 %fd637, %fd493, %fd494; + +$L__BB3_370: + cvt.rn.f32.f64 %f3004, %fd642; + cvt.rn.f32.f64 %f3003, %fd641; + cvt.rn.f32.f64 %f3002, %fd640; + cvt.rn.f32.f64 %f3001, %fd639; + cvt.rn.f32.f64 %f3000, %fd638; + cvt.rn.f32.f64 %f2999, %fd637; + fma.rn.f32 %f2997, %f226, %f177, %f2997; + fma.rn.f32 %f2996, %f226, %f221, %f2996; + add.f32 %f2995, %f2995, %f226; + fma.rn.f32 %f2994, %f226, %f191, %f2994; + fma.rn.f32 %f2993, %f226, %f219, %f2993; + add.s32 %r853, %r853, 1; + setp.lt.s32 %p595, %r853, %r108; + @%p595 bra $L__BB3_56; + + add.s32 %r852, %r852, 1; + setp.lt.s32 %p596, %r852, %r108; + @%p596 bra $L__BB3_55; + +$L__BB3_372: + ld.param.u32 %r830, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_3]; + div.rn.f32 %f1499, %f2998, %f3004; + mov.f32 %f1500, 0fBF800000; + max.f32 %f1501, %f1499, %f1500; + mov.f32 %f1502, 0f3F800000; + min.f32 %f1503, %f1501, %f1502; + sub.f32 %f3048, %f3048, %f1503; + div.rn.f32 %f1504, %f2997, %f3003; + max.f32 %f1505, %f1504, %f1500; + min.f32 %f1506, %f1505, %f1502; + sub.f32 %f3047, %f3047, %f1506; + neg.f32 %f1507, %f3046; + div.rn.f32 %f1508, %f2996, %f3002; + max.f32 %f1509, %f1508, %f1507; + min.f32 %f1510, %f1509, %f3046; + sub.f32 %f1511, %f3046, %f1510; + neg.f32 %f1512, %f3045; + div.rn.f32 %f1513, %f2995, %f3001; + max.f32 %f1514, %f1513, %f1512; + min.f32 %f1515, %f1514, %f3045; + sub.f32 %f1516, %f3045, %f1515; + neg.f32 %f1517, %f3044; + div.rn.f32 %f1518, %f2994, %f3000; + max.f32 %f1519, %f1518, %f1517; + min.f32 %f1520, %f1519, %f3044; + sub.f32 %f1521, %f3044, %f1520; + neg.f32 %f1522, %f3043; + div.rn.f32 %f1523, %f2993, %f2999; + max.f32 %f1524, %f1523, %f1522; + min.f32 %f1525, %f1524, %f3043; + sub.f32 %f1526, %f3043, %f1525; + max.f32 %f3046, %f1511, %f1502; + mov.f32 %f1527, 0f3C23D70A; + max.f32 %f3045, %f1516, %f1527; + mov.f32 %f1528, 0f3F000000; + max.f32 %f1529, %f1521, %f1528; + min.f32 %f3044, %f1529, %f51; + max.f32 %f1530, %f1526, %f1528; + min.f32 %f3043, %f1530, %f51; + add.s32 %r851, %r851, 1; + setp.lt.s32 %p597, %r851, %r830; + @%p597 bra $L__BB3_53; + +$L__BB3_373: + mov.f32 %f1552, 0f00000000; + mov.f32 %f3071, %f1552; + mov.f32 %f3072, %f1552; + mov.f32 %f3073, %f1552; + mov.f32 %f3076, %f1552; + mov.f32 %f3080, %f1552; + mov.f32 %f3085, %f1552; + mov.f32 %f3074, %f1552; + mov.f32 %f3075, %f1552; + mov.f32 %f3077, %f1552; + mov.f32 %f3081, %f1552; + mov.f32 %f3086, %f1552; + mov.f32 %f3078, %f1552; + mov.f32 %f3079, %f1552; + mov.f32 %f3082, %f1552; + mov.f32 %f3087, %f1552; + mov.f32 %f3083, %f1552; + mov.f32 %f3084, %f1552; + mov.f32 %f3088, %f1552; + mov.f32 %f3089, %f1552; + mov.f32 %f3090, %f1552; + mov.f32 %f3091, %f1552; + mov.f32 %f3119, %f1552; + @%p44 bra $L__BB3_462; + + mov.f32 %f1575, 0f3F000000; + div.rn.f32 %f1576, %f1575, %f3044; + div.rn.f32 %f1577, %f1576, %f3044; + div.rn.f32 %f1578, %f1575, %f3043; + div.rn.f32 %f1579, %f1578, %f3043; + div.rn.f32 %f1580, %f3046, 0fC0206C98; + div.rn.f32 %f263, %f1580, %f3044; + div.rn.f32 %f264, %f1580, %f3043; + div.rn.f32 %f265, %f263, %f3044; + div.rn.f32 %f266, %f264, %f3043; + sqrt.rn.f32 %f267, %f1577; + sqrt.rn.f32 %f268, %f1579; + mov.f32 %f1581, 0f3F800000; + cvt.rzi.f32.f32 %f1582, %f1581; + add.f32 %f1583, %f1582, %f1582; + mov.f32 %f1584, 0f40000000; + sub.f32 %f1585, %f1584, %f1583; + abs.f32 %f269, %f1585; + mov.u32 %r671, 0; + setp.eq.f32 %p606, %f269, 0f3F800000; + mov.u32 %r854, %r671; + +$L__BB3_375: + cvt.rn.f32.s32 %f1586, %r854; + sub.f32 %f292, %f1586, %f3048; + add.f32 %f1587, %f292, 0f3F000000; + mul.f32 %f1588, %f1587, %f267; + abs.f32 %f293, %f1588; + setp.ge.f32 %p599, %f293, 0f3F8060FE; + mul.f32 %f1589, %f1588, %f1588; + selp.f32 %f1590, %f293, %f1589, %p599; + selp.f32 %f1591, 0f3789CA3C, 0f38B1E96A, %p599; + selp.f32 %f1592, 0fB9F560B9, 0fBA574D20, %p599; + fma.rn.f32 %f1593, %f1591, %f1590, %f1592; + selp.f32 %f1594, 0f3BAC840B, 0f3BAAD5EA, %p599; + fma.rn.f32 %f1595, %f1593, %f1590, %f1594; + selp.f32 %f1596, 0fBD0C8162, 0fBCDC1BE7, %p599; + fma.rn.f32 %f1597, %f1595, %f1590, %f1596; + selp.f32 %f1598, 0f3E1CF906, 0f3DE718AF, %p599; + fma.rn.f32 %f1599, %f1597, %f1590, %f1598; + selp.f32 %f1600, 0f3F6A937E, 0fBEC093AC, %p599; + fma.rn.f32 %f1601, %f1599, %f1590, %f1600; + selp.f32 %f1602, 0f3F20D842, 0f3E0375D3, %p599; + fma.rn.f32 %f1603, %f1601, %f1590, %f1602; + neg.f32 %f1604, %f293; + selp.f32 %f1605, %f1604, %f1588, %p599; + fma.rn.f32 %f294, %f1603, %f1605, %f1605; + mov.b32 %r673, %f1588; + and.b32 %r99, %r673, -2147483648; + add.f32 %f1606, %f292, 0fBF000000; + mul.f32 %f1607, %f1606, %f267; + abs.f32 %f295, %f1607; + setp.ge.f32 %p600, %f295, 0f3F8060FE; + mul.f32 %f1608, %f1607, %f1607; + selp.f32 %f1609, %f295, %f1608, %p600; + selp.f32 %f1610, 0f3789CA3C, 0f38B1E96A, %p600; + selp.f32 %f1611, 0fB9F560B9, 0fBA574D20, %p600; + fma.rn.f32 %f1612, %f1610, %f1609, %f1611; + selp.f32 %f1613, 0f3BAC840B, 0f3BAAD5EA, %p600; + fma.rn.f32 %f1614, %f1612, %f1609, %f1613; + selp.f32 %f1615, 0fBD0C8162, 0fBCDC1BE7, %p600; + fma.rn.f32 %f1616, %f1614, %f1609, %f1615; + selp.f32 %f1617, 0f3E1CF906, 0f3DE718AF, %p600; + fma.rn.f32 %f1618, %f1616, %f1609, %f1617; + selp.f32 %f1619, 0f3F6A937E, 0fBEC093AC, %p600; + fma.rn.f32 %f1620, %f1618, %f1609, %f1619; + selp.f32 %f1621, 0f3F20D842, 0f3E0375D3, %p600; + fma.rn.f32 %f1622, %f1620, %f1609, %f1621; + neg.f32 %f1623, %f295; + selp.f32 %f1624, %f1623, %f1607, %p600; + fma.rn.f32 %f296, %f1622, %f1624, %f1624; + mov.b32 %r674, %f1607; + and.b32 %r100, %r674, -2147483648; + add.f32 %f1625, %f1586, 0f3F000000; + sub.f32 %f1626, %f1625, %f3048; + div.rn.f32 %f297, %f1626, %f3044; + abs.f32 %f298, %f297; + setp.lt.f32 %p601, %f298, 0f00800000; + mul.f32 %f1627, %f298, 0f4B800000; + selp.f32 %f1628, %f1627, %f298, %p601; + selp.f32 %f1629, 0fC3170000, 0fC2FE0000, %p601; + mov.b32 %r675, %f1628; + and.b32 %r676, %r675, 8388607; + or.b32 %r677, %r676, 1065353216; + mov.b32 %f1630, %r677; + shr.u32 %r678, %r675, 23; + cvt.rn.f32.u32 %f1631, %r678; + add.f32 %f1632, %f1629, %f1631; + setp.gt.f32 %p602, %f1630, 0f3FB504F3; + mul.f32 %f1633, %f1630, 0f3F000000; + add.f32 %f1634, %f1632, 0f3F800000; + selp.f32 %f1635, %f1634, %f1632, %p602; + selp.f32 %f1636, %f1633, %f1630, %p602; + add.f32 %f1637, %f1636, 0fBF800000; + add.f32 %f1638, %f1636, 0f3F800000; + rcp.approx.ftz.f32 %f1639, %f1638; + add.f32 %f1640, %f1637, %f1637; + mul.f32 %f1642, %f1640, %f1639; + mul.f32 %f1643, %f1642, %f1642; + mov.f32 %f1644, 0f3C4CAF63; + mov.f32 %f1645, 0f3B18F0FE; + fma.rn.f32 %f1646, %f1645, %f1643, %f1644; + mov.f32 %f1647, 0f3DAAAABD; + fma.rn.f32 %f1648, %f1646, %f1643, %f1647; + mul.rn.f32 %f1649, %f1648, %f1643; + mul.rn.f32 %f1650, %f1649, %f1642; + sub.f32 %f1651, %f1637, %f1642; + add.f32 %f1652, %f1651, %f1651; + neg.f32 %f1653, %f1642; + fma.rn.f32 %f1654, %f1653, %f1637, %f1652; + mul.rn.f32 %f1655, %f1639, %f1654; + add.f32 %f1656, %f1650, %f1642; + sub.f32 %f1657, %f1642, %f1656; + add.f32 %f1658, %f1650, %f1657; + add.f32 %f1659, %f1655, %f1658; + add.f32 %f1660, %f1656, %f1659; + sub.f32 %f1661, %f1656, %f1660; + add.f32 %f1662, %f1659, %f1661; + mov.f32 %f1663, 0f3F317200; + mul.rn.f32 %f1664, %f1635, %f1663; + mov.f32 %f1665, 0f35BFBE8E; + mul.rn.f32 %f1666, %f1635, %f1665; + add.f32 %f1667, %f1664, %f1660; + sub.f32 %f1668, %f1664, %f1667; + add.f32 %f1669, %f1660, %f1668; + add.f32 %f1670, %f1662, %f1669; + add.f32 %f1671, %f1666, %f1670; + add.f32 %f1672, %f1667, %f1671; + sub.f32 %f1673, %f1667, %f1672; + add.f32 %f1674, %f1671, %f1673; + mul.rn.f32 %f1675, %f1584, %f1672; + neg.f32 %f1676, %f1675; + fma.rn.f32 %f1677, %f1584, %f1672, %f1676; + fma.rn.f32 %f1678, %f1584, %f1674, %f1677; + fma.rn.f32 %f1680, %f1552, %f1672, %f1678; + add.rn.f32 %f1681, %f1675, %f1680; + neg.f32 %f1682, %f1681; + add.rn.f32 %f1683, %f1675, %f1682; + add.rn.f32 %f1684, %f1683, %f1680; + mov.b32 %r679, %f1681; + setp.eq.s32 %p603, %r679, 1118925336; + add.s32 %r680, %r679, -1; + mov.b32 %f1685, %r680; + add.f32 %f1686, %f1684, 0f37000000; + selp.f32 %f299, %f1686, %f1684, %p603; + selp.f32 %f1687, %f1685, %f1681, %p603; + mov.f32 %f1688, 0f3FB8AA3B; + mul.rn.f32 %f1689, %f1687, %f1688; + cvt.rzi.f32.f32 %f1690, %f1689; + abs.f32 %f1691, %f1690; + setp.gt.f32 %p604, %f1691, 0f42FC0000; + mov.b32 %r681, %f1690; + and.b32 %r682, %r681, -2147483648; + or.b32 %r683, %r682, 1123811328; + mov.b32 %f1692, %r683; + selp.f32 %f1693, %f1692, %f1690, %p604; + mov.f32 %f1694, 0fBF317218; + fma.rn.f32 %f1695, %f1693, %f1694, %f1687; + mov.f32 %f1696, 0f3102E308; + fma.rn.f32 %f1697, %f1693, %f1696, %f1695; + mul.f32 %f1698, %f1697, 0f3FB8AA3B; + add.f32 %f1699, %f1693, 0f4B40007F; + mov.b32 %r684, %f1699; + shl.b32 %r685, %r684, 23; + mov.b32 %f1700, %r685; + ex2.approx.ftz.f32 %f1701, %f1698; + mul.f32 %f300, %f1701, %f1700; + setp.lt.f32 %p605, %f297, 0f00000000; + and.pred %p31, %p605, %p606; + add.f32 %f1702, %f297, %f297; + selp.f32 %f301, %f1702, 0f00000000, %p606; + add.f32 %f1703, %f298, 0f40000000; + mov.b32 %r101, %f1703; + div.rn.f32 %f302, %f1606, %f3044; + abs.f32 %f303, %f302; + setp.lt.f32 %p607, %f303, 0f00800000; + mul.f32 %f1704, %f303, 0f4B800000; + selp.f32 %f1705, %f1704, %f303, %p607; + selp.f32 %f1706, 0fC3170000, 0fC2FE0000, %p607; + mov.b32 %r686, %f1705; + and.b32 %r687, %r686, 8388607; + or.b32 %r688, %r687, 1065353216; + mov.b32 %f1707, %r688; + shr.u32 %r689, %r686, 23; + cvt.rn.f32.u32 %f1708, %r689; + add.f32 %f1709, %f1706, %f1708; + setp.gt.f32 %p608, %f1707, 0f3FB504F3; + mul.f32 %f1710, %f1707, 0f3F000000; + add.f32 %f1711, %f1709, 0f3F800000; + selp.f32 %f1712, %f1711, %f1709, %p608; + selp.f32 %f1713, %f1710, %f1707, %p608; + add.f32 %f1714, %f1713, 0fBF800000; + add.f32 %f1715, %f1713, 0f3F800000; + rcp.approx.ftz.f32 %f1716, %f1715; + add.f32 %f1717, %f1714, %f1714; + mul.f32 %f1718, %f1717, %f1716; + mul.f32 %f1719, %f1718, %f1718; + fma.rn.f32 %f1720, %f1645, %f1719, %f1644; + fma.rn.f32 %f1721, %f1720, %f1719, %f1647; + mul.rn.f32 %f1722, %f1721, %f1719; + mul.rn.f32 %f1723, %f1722, %f1718; + sub.f32 %f1724, %f1714, %f1718; + add.f32 %f1725, %f1724, %f1724; + neg.f32 %f1726, %f1718; + fma.rn.f32 %f1727, %f1726, %f1714, %f1725; + mul.rn.f32 %f1728, %f1716, %f1727; + add.f32 %f1729, %f1723, %f1718; + sub.f32 %f1730, %f1718, %f1729; + add.f32 %f1731, %f1723, %f1730; + add.f32 %f1732, %f1728, %f1731; + add.f32 %f1733, %f1729, %f1732; + sub.f32 %f1734, %f1729, %f1733; + add.f32 %f1735, %f1732, %f1734; + mul.rn.f32 %f1736, %f1712, %f1663; + mul.rn.f32 %f1737, %f1712, %f1665; + add.f32 %f1738, %f1736, %f1733; + sub.f32 %f1739, %f1736, %f1738; + add.f32 %f1740, %f1733, %f1739; + add.f32 %f1741, %f1735, %f1740; + add.f32 %f1742, %f1737, %f1741; + add.f32 %f1743, %f1738, %f1742; + sub.f32 %f1744, %f1738, %f1743; + add.f32 %f1745, %f1742, %f1744; + mul.rn.f32 %f1746, %f1584, %f1743; + neg.f32 %f1747, %f1746; + fma.rn.f32 %f1748, %f1584, %f1743, %f1747; + fma.rn.f32 %f1749, %f1584, %f1745, %f1748; + fma.rn.f32 %f1750, %f1552, %f1743, %f1749; + add.rn.f32 %f1751, %f1746, %f1750; + neg.f32 %f1752, %f1751; + add.rn.f32 %f1753, %f1746, %f1752; + add.rn.f32 %f1754, %f1753, %f1750; + mov.b32 %r690, %f1751; + setp.eq.s32 %p609, %r690, 1118925336; + add.s32 %r691, %r690, -1; + mov.b32 %f1755, %r691; + add.f32 %f1756, %f1754, 0f37000000; + selp.f32 %f304, %f1756, %f1754, %p609; + selp.f32 %f1757, %f1755, %f1751, %p609; + mul.rn.f32 %f1758, %f1757, %f1688; + cvt.rzi.f32.f32 %f1759, %f1758; + abs.f32 %f1760, %f1759; + setp.gt.f32 %p610, %f1760, 0f42FC0000; + mov.b32 %r692, %f1759; + and.b32 %r693, %r692, -2147483648; + or.b32 %r694, %r693, 1123811328; + mov.b32 %f1761, %r694; + selp.f32 %f1762, %f1761, %f1759, %p610; + fma.rn.f32 %f1763, %f1762, %f1694, %f1757; + fma.rn.f32 %f1764, %f1762, %f1696, %f1763; + mul.f32 %f1765, %f1764, 0f3FB8AA3B; + add.f32 %f1766, %f1762, 0f4B40007F; + mov.b32 %r695, %f1766; + shl.b32 %r696, %r695, 23; + mov.b32 %f1767, %r696; + ex2.approx.ftz.f32 %f1768, %f1765; + mul.f32 %f305, %f1768, %f1767; + add.f32 %f306, %f297, 0f40000000; + setp.lt.f32 %p611, %f302, 0f00000000; + and.pred %p32, %p611, %p606; + selp.f32 %f307, 0fFF800000, 0f7F800000, %p31; + add.f32 %f1769, %f302, %f302; + selp.f32 %f308, %f1769, 0f00000000, %p606; + add.f32 %f1770, %f303, 0f40000000; + mov.b32 %r102, %f1770; + add.f32 %f309, %f302, 0f40000000; + selp.f32 %f310, 0fFF800000, 0f7F800000, %p32; + add.f32 %f1771, %f1586, 0f3F800000; + sub.f32 %f1772, %f1771, %f3048; + div.rn.f32 %f311, %f1772, %f3044; + abs.f32 %f312, %f311; + setp.lt.f32 %p612, %f312, 0f00800000; + mul.f32 %f1773, %f312, 0f4B800000; + selp.f32 %f1774, %f1773, %f312, %p612; + selp.f32 %f1775, 0fC3170000, 0fC2FE0000, %p612; + mov.b32 %r697, %f1774; + and.b32 %r698, %r697, 8388607; + or.b32 %r699, %r698, 1065353216; + mov.b32 %f1776, %r699; + shr.u32 %r700, %r697, 23; + cvt.rn.f32.u32 %f1777, %r700; + add.f32 %f1778, %f1775, %f1777; + setp.gt.f32 %p613, %f1776, 0f3FB504F3; + mul.f32 %f1779, %f1776, 0f3F000000; + add.f32 %f1780, %f1778, 0f3F800000; + selp.f32 %f1781, %f1780, %f1778, %p613; + selp.f32 %f1782, %f1779, %f1776, %p613; + add.f32 %f1783, %f1782, 0fBF800000; + add.f32 %f1784, %f1782, 0f3F800000; + rcp.approx.ftz.f32 %f1785, %f1784; + add.f32 %f1786, %f1783, %f1783; + mul.f32 %f1787, %f1786, %f1785; + mul.f32 %f1788, %f1787, %f1787; + fma.rn.f32 %f1789, %f1645, %f1788, %f1644; + fma.rn.f32 %f1790, %f1789, %f1788, %f1647; + mul.rn.f32 %f1791, %f1790, %f1788; + mul.rn.f32 %f1792, %f1791, %f1787; + sub.f32 %f1793, %f1783, %f1787; + add.f32 %f1794, %f1793, %f1793; + neg.f32 %f1795, %f1787; + fma.rn.f32 %f1796, %f1795, %f1783, %f1794; + mul.rn.f32 %f1797, %f1785, %f1796; + add.f32 %f1798, %f1792, %f1787; + sub.f32 %f1799, %f1787, %f1798; + add.f32 %f1800, %f1792, %f1799; + add.f32 %f1801, %f1797, %f1800; + add.f32 %f1802, %f1798, %f1801; + sub.f32 %f1803, %f1798, %f1802; + add.f32 %f1804, %f1801, %f1803; + mul.rn.f32 %f1805, %f1781, %f1663; + mul.rn.f32 %f1806, %f1781, %f1665; + add.f32 %f1807, %f1805, %f1802; + sub.f32 %f1808, %f1805, %f1807; + add.f32 %f1809, %f1802, %f1808; + add.f32 %f1810, %f1804, %f1809; + add.f32 %f1811, %f1806, %f1810; + add.f32 %f1812, %f1807, %f1811; + sub.f32 %f1813, %f1807, %f1812; + add.f32 %f1814, %f1811, %f1813; + mul.rn.f32 %f1815, %f1584, %f1812; + neg.f32 %f1816, %f1815; + fma.rn.f32 %f1817, %f1584, %f1812, %f1816; + fma.rn.f32 %f1818, %f1584, %f1814, %f1817; + fma.rn.f32 %f1819, %f1552, %f1812, %f1818; + add.rn.f32 %f1820, %f1815, %f1819; + neg.f32 %f1821, %f1820; + add.rn.f32 %f1822, %f1815, %f1821; + add.rn.f32 %f1823, %f1822, %f1819; + mov.b32 %r701, %f1820; + setp.eq.s32 %p614, %r701, 1118925336; + add.s32 %r702, %r701, -1; + mov.b32 %f1824, %r702; + add.f32 %f1825, %f1823, 0f37000000; + selp.f32 %f313, %f1825, %f1823, %p614; + selp.f32 %f1826, %f1824, %f1820, %p614; + mul.rn.f32 %f1827, %f1826, %f1688; + cvt.rzi.f32.f32 %f1828, %f1827; + abs.f32 %f1829, %f1828; + setp.gt.f32 %p615, %f1829, 0f42FC0000; + mov.b32 %r703, %f1828; + and.b32 %r704, %r703, -2147483648; + or.b32 %r705, %r704, 1123811328; + mov.b32 %f1830, %r705; + selp.f32 %f1831, %f1830, %f1828, %p615; + fma.rn.f32 %f1832, %f1831, %f1694, %f1826; + fma.rn.f32 %f1833, %f1831, %f1696, %f1832; + mul.f32 %f1834, %f1833, 0f3FB8AA3B; + add.f32 %f1835, %f1831, 0f4B40007F; + mov.b32 %r706, %f1835; + shl.b32 %r707, %r706, 23; + mov.b32 %f1836, %r707; + ex2.approx.ftz.f32 %f1837, %f1834; + mul.f32 %f314, %f1837, %f1836; + setp.lt.f32 %p616, %f311, 0f00000000; + and.pred %p33, %p616, %p606; + add.f32 %f1838, %f311, %f311; + selp.f32 %f315, %f1838, 0f00000000, %p606; + add.f32 %f1839, %f312, 0f40000000; + mov.b32 %r103, %f1839; + div.rn.f32 %f316, %f292, %f3044; + abs.f32 %f317, %f316; + setp.lt.f32 %p617, %f317, 0f00800000; + mul.f32 %f1840, %f317, 0f4B800000; + selp.f32 %f1841, %f1840, %f317, %p617; + selp.f32 %f1842, 0fC3170000, 0fC2FE0000, %p617; + mov.b32 %r708, %f1841; + and.b32 %r709, %r708, 8388607; + or.b32 %r710, %r709, 1065353216; + mov.b32 %f1843, %r710; + shr.u32 %r711, %r708, 23; + cvt.rn.f32.u32 %f1844, %r711; + add.f32 %f1845, %f1842, %f1844; + setp.gt.f32 %p618, %f1843, 0f3FB504F3; + mul.f32 %f1846, %f1843, 0f3F000000; + add.f32 %f1847, %f1845, 0f3F800000; + selp.f32 %f1848, %f1847, %f1845, %p618; + selp.f32 %f1849, %f1846, %f1843, %p618; + add.f32 %f1850, %f1849, 0fBF800000; + add.f32 %f1851, %f1849, 0f3F800000; + rcp.approx.ftz.f32 %f1852, %f1851; + add.f32 %f1853, %f1850, %f1850; + mul.f32 %f1854, %f1853, %f1852; + mul.f32 %f1855, %f1854, %f1854; + fma.rn.f32 %f1856, %f1645, %f1855, %f1644; + fma.rn.f32 %f1857, %f1856, %f1855, %f1647; + mul.rn.f32 %f1858, %f1857, %f1855; + mul.rn.f32 %f1859, %f1858, %f1854; + sub.f32 %f1860, %f1850, %f1854; + add.f32 %f1861, %f1860, %f1860; + neg.f32 %f1862, %f1854; + fma.rn.f32 %f1863, %f1862, %f1850, %f1861; + mul.rn.f32 %f1864, %f1852, %f1863; + add.f32 %f1865, %f1859, %f1854; + sub.f32 %f1866, %f1854, %f1865; + add.f32 %f1867, %f1859, %f1866; + add.f32 %f1868, %f1864, %f1867; + add.f32 %f1869, %f1865, %f1868; + sub.f32 %f1870, %f1865, %f1869; + add.f32 %f1871, %f1868, %f1870; + mul.rn.f32 %f1872, %f1848, %f1663; + mul.rn.f32 %f1873, %f1848, %f1665; + add.f32 %f1874, %f1872, %f1869; + sub.f32 %f1875, %f1872, %f1874; + add.f32 %f1876, %f1869, %f1875; + add.f32 %f1877, %f1871, %f1876; + add.f32 %f1878, %f1873, %f1877; + add.f32 %f1879, %f1874, %f1878; + sub.f32 %f1880, %f1874, %f1879; + add.f32 %f1881, %f1878, %f1880; + mul.rn.f32 %f1882, %f1584, %f1879; + neg.f32 %f1883, %f1882; + fma.rn.f32 %f1884, %f1584, %f1879, %f1883; + fma.rn.f32 %f1885, %f1584, %f1881, %f1884; + fma.rn.f32 %f1886, %f1552, %f1879, %f1885; + add.rn.f32 %f1887, %f1882, %f1886; + neg.f32 %f1888, %f1887; + add.rn.f32 %f1889, %f1882, %f1888; + add.rn.f32 %f1890, %f1889, %f1886; + mov.b32 %r712, %f1887; + setp.eq.s32 %p619, %r712, 1118925336; + add.s32 %r713, %r712, -1; + mov.b32 %f1891, %r713; + add.f32 %f1892, %f1890, 0f37000000; + selp.f32 %f318, %f1892, %f1890, %p619; + selp.f32 %f1893, %f1891, %f1887, %p619; + mul.rn.f32 %f1894, %f1893, %f1688; + cvt.rzi.f32.f32 %f1895, %f1894; + abs.f32 %f1896, %f1895; + setp.gt.f32 %p620, %f1896, 0f42FC0000; + mov.b32 %r714, %f1895; + and.b32 %r715, %r714, -2147483648; + or.b32 %r716, %r715, 1123811328; + mov.b32 %f1897, %r716; + selp.f32 %f1898, %f1897, %f1895, %p620; + fma.rn.f32 %f1899, %f1898, %f1694, %f1893; + fma.rn.f32 %f1900, %f1898, %f1696, %f1899; + mul.f32 %f1901, %f1900, 0f3FB8AA3B; + add.f32 %f1902, %f1898, 0f4B40007F; + mov.b32 %r717, %f1902; + shl.b32 %r718, %r717, 23; + mov.b32 %f1903, %r718; + ex2.approx.ftz.f32 %f1904, %f1901; + mul.f32 %f319, %f1904, %f1903; + add.f32 %f320, %f311, 0f40000000; + setp.lt.f32 %p621, %f316, 0f00000000; + and.pred %p34, %p621, %p606; + selp.f32 %f321, 0fFF800000, 0f7F800000, %p33; + add.f32 %f1905, %f316, %f316; + selp.f32 %f322, %f1905, 0f00000000, %p606; + add.f32 %f1906, %f317, 0f40000000; + mov.b32 %r104, %f1906; + add.f32 %f323, %f292, 0f3F800000; + add.f32 %f324, %f316, 0f40000000; + selp.f32 %f325, 0fFF800000, 0f7F800000, %p34; + setp.geu.f32 %p35, %f297, 0f00000000; + setp.geu.f32 %p36, %f302, 0f00000000; + setp.geu.f32 %p37, %f311, 0f00000000; + setp.geu.f32 %p38, %f316, 0f00000000; + mov.u32 %r855, %r671; + +$L__BB3_376: + setp.ltu.f32 %p622, %f293, 0f3F8060FE; + mov.f32 %f3093, %f294; + @%p622 bra $L__BB3_378; + + ex2.approx.ftz.f32 %f1907, %f294; + sub.f32 %f1909, %f1581, %f1907; + mov.b32 %r719, %f1909; + or.b32 %r720, %r99, %r719; + mov.b32 %f3093, %r720; + +$L__BB3_378: + setp.ltu.f32 %p623, %f295, 0f3F8060FE; + mov.f32 %f3094, %f296; + @%p623 bra $L__BB3_380; + + ex2.approx.ftz.f32 %f1910, %f296; + sub.f32 %f1912, %f1581, %f1910; + mov.b32 %r721, %f1912; + or.b32 %r722, %r100, %r721; + mov.b32 %f3094, %r722; + +$L__BB3_380: + sub.f32 %f1913, %f3093, %f3094; + mul.f32 %f352, %f1913, 0f3F000000; + cvt.rn.f32.s32 %f353, %r855; + sub.f32 %f354, %f353, %f3047; + add.f32 %f1914, %f354, 0f3F000000; + mul.f32 %f355, %f1914, %f268; + abs.f32 %f1915, %f355; + setp.ltu.f32 %p624, %f1915, 0f3F8060FE; + setp.ge.f32 %p625, %f1915, 0f3F8060FE; + mul.f32 %f1916, %f355, %f355; + selp.f32 %f1917, %f1915, %f1916, %p625; + selp.f32 %f1918, 0f3789CA3C, 0f38B1E96A, %p625; + selp.f32 %f1919, 0fB9F560B9, 0fBA574D20, %p625; + fma.rn.f32 %f1920, %f1918, %f1917, %f1919; + selp.f32 %f1921, 0f3BAC840B, 0f3BAAD5EA, %p625; + fma.rn.f32 %f1922, %f1920, %f1917, %f1921; + selp.f32 %f1923, 0fBD0C8162, 0fBCDC1BE7, %p625; + fma.rn.f32 %f1924, %f1922, %f1917, %f1923; + selp.f32 %f1925, 0f3E1CF906, 0f3DE718AF, %p625; + fma.rn.f32 %f1926, %f1924, %f1917, %f1925; + selp.f32 %f1927, 0f3F6A937E, 0fBEC093AC, %p625; + fma.rn.f32 %f1928, %f1926, %f1917, %f1927; + selp.f32 %f1929, 0f3F20D842, 0f3E0375D3, %p625; + fma.rn.f32 %f1930, %f1928, %f1917, %f1929; + neg.f32 %f1931, %f1915; + selp.f32 %f1932, %f1931, %f355, %p625; + fma.rn.f32 %f3095, %f1930, %f1932, %f1932; + @%p624 bra $L__BB3_382; + + ex2.approx.ftz.f32 %f1933, %f3095; + sub.f32 %f1935, %f1581, %f1933; + mov.b32 %r723, %f1935; + mov.b32 %r724, %f355; + and.b32 %r725, %r724, -2147483648; + or.b32 %r726, %r725, %r723; + mov.b32 %f3095, %r726; + +$L__BB3_382: + add.f32 %f359, %f354, 0fBF000000; + mul.f32 %f360, %f359, %f268; + abs.f32 %f1936, %f360; + setp.ltu.f32 %p626, %f1936, 0f3F8060FE; + setp.ge.f32 %p627, %f1936, 0f3F8060FE; + mul.f32 %f1937, %f360, %f360; + selp.f32 %f1938, %f1936, %f1937, %p627; + selp.f32 %f1939, 0f3789CA3C, 0f38B1E96A, %p627; + selp.f32 %f1940, 0fB9F560B9, 0fBA574D20, %p627; + fma.rn.f32 %f1941, %f1939, %f1938, %f1940; + selp.f32 %f1942, 0f3BAC840B, 0f3BAAD5EA, %p627; + fma.rn.f32 %f1943, %f1941, %f1938, %f1942; + selp.f32 %f1944, 0fBD0C8162, 0fBCDC1BE7, %p627; + fma.rn.f32 %f1945, %f1943, %f1938, %f1944; + selp.f32 %f1946, 0f3E1CF906, 0f3DE718AF, %p627; + fma.rn.f32 %f1947, %f1945, %f1938, %f1946; + selp.f32 %f1948, 0f3F6A937E, 0fBEC093AC, %p627; + fma.rn.f32 %f1949, %f1947, %f1938, %f1948; + selp.f32 %f1950, 0f3F20D842, 0f3E0375D3, %p627; + fma.rn.f32 %f1951, %f1949, %f1938, %f1950; + neg.f32 %f1952, %f1936; + selp.f32 %f1953, %f1952, %f360, %p627; + fma.rn.f32 %f3096, %f1951, %f1953, %f1953; + @%p626 bra $L__BB3_384; + + ex2.approx.ftz.f32 %f1954, %f3096; + sub.f32 %f1956, %f1581, %f1954; + mov.b32 %r727, %f1956; + mov.b32 %r728, %f360; + and.b32 %r729, %r728, -2147483648; + or.b32 %r730, %r729, %r727; + mov.b32 %f3096, %r730; + +$L__BB3_384: + sub.f32 %f1958, %f3095, %f3096; + mul.f32 %f364, %f1958, 0f3F000000; + mul.f32 %f1959, %f352, %f3046; + fma.rn.f32 %f365, %f364, %f1959, %f3045; + mad.lo.s32 %r731, %r855, %r108, %r854; + add.s32 %r732, %r731, %r2; + mul.wide.s32 %rd28, %r732, 4; + add.s64 %rd29, %rd1, %rd28; + ld.global.f32 %f366, [%rd29]; + setp.eq.f32 %p628, %f300, 0f7F800000; + mov.f32 %f3097, 0f7F800000; + @%p628 bra $L__BB3_386; + + fma.rn.f32 %f3097, %f300, %f299, %f300; + +$L__BB3_386: + mov.b32 %r733, %f3097; + xor.b32 %r734, %r733, -2147483648; + mov.b32 %f1960, %r734; + selp.f32 %f369, %f1960, %f3097, %p31; + setp.eq.f32 %p629, %f297, 0f00000000; + selp.f32 %f3098, %f301, %f369, %p629; + @%p35 bra $L__BB3_389; + + cvt.rzi.f32.f32 %f1962, %f1584; + setp.eq.f32 %p630, %f1962, 0f40000000; + mov.f32 %f3098, %f369; + @%p630 bra $L__BB3_389; + + mov.f32 %f3098, 0f7FFFFFFF; + +$L__BB3_389: + setp.eq.f32 %p631, %f305, 0f7F800000; + mov.f32 %f3099, 0f7F800000; + @%p631 bra $L__BB3_391; + + fma.rn.f32 %f3099, %f305, %f304, %f305; + +$L__BB3_391: + mov.b32 %r735, %f3099; + xor.b32 %r736, %r735, -2147483648; + mov.b32 %f1965, %r736; + selp.f32 %f374, %f1965, %f3099, %p32; + setp.eq.f32 %p632, %f302, 0f00000000; + selp.f32 %f3100, %f308, %f374, %p632; + @%p36 bra $L__BB3_394; + + cvt.rzi.f32.f32 %f1967, %f1584; + setp.eq.f32 %p633, %f1967, 0f40000000; + mov.f32 %f3100, %f374; + @%p633 bra $L__BB3_394; + + mov.f32 %f3100, 0f7FFFFFFF; + +$L__BB3_394: + setp.gtu.f32 %p634, %f298, 0f7F800000; + mov.f32 %f3101, 0f7F800000; + selp.f32 %f1970, %f306, %f3098, %p634; + setp.neu.f32 %p635, %f298, 0f7F800000; + selp.f32 %f1971, %f1970, %f307, %p635; + setp.gt.s32 %p636, %r101, 2139095039; + selp.f32 %f1972, %f1971, %f3098, %p636; + mul.f32 %f1973, %f1972, 0fBF000000; + setp.eq.f32 %p637, %f297, 0f3F800000; + selp.f32 %f1974, 0fBF000000, %f1973, %p637; + mov.f32 %f1976, 0f3BBB989D; + fma.rn.f32 %f1977, %f1974, %f1976, %f1575; + mov.f32 %f1979, 0f437C0000; + cvt.sat.f32.f32 %f1980, %f1977; + mov.f32 %f1981, 0f4B400001; + fma.rm.f32 %f1982, %f1980, %f1979, %f1981; + setp.gtu.f32 %p638, %f303, 0f7F800000; + selp.f32 %f1983, %f309, %f3100, %p638; + setp.neu.f32 %p639, %f303, 0f7F800000; + selp.f32 %f1984, %f1983, %f310, %p639; + setp.gt.s32 %p640, %r102, 2139095039; + selp.f32 %f1985, %f1984, %f3100, %p640; + mul.f32 %f1986, %f1985, 0fBF000000; + setp.eq.f32 %p641, %f302, 0f3F800000; + selp.f32 %f1987, 0fBF000000, %f1986, %p641; + fma.rn.f32 %f1988, %f1987, %f1976, %f1575; + cvt.sat.f32.f32 %f1989, %f1988; + fma.rm.f32 %f1990, %f1989, %f1979, %f1981; + add.f32 %f1991, %f1990, 0fCB40007F; + neg.f32 %f1992, %f1991; + fma.rn.f32 %f1993, %f1987, %f1688, %f1992; + mov.f32 %f1994, 0f32A57060; + fma.rn.f32 %f1995, %f1987, %f1994, %f1993; + mov.b32 %r737, %f1990; + shl.b32 %r738, %r737, 23; + mov.b32 %f1996, %r738; + ex2.approx.ftz.f32 %f1997, %f1995; + mul.f32 %f1998, %f1997, %f1996; + mov.b32 %r739, %f1982; + shl.b32 %r740, %r739, 23; + mov.b32 %f1999, %r740; + add.f32 %f2000, %f1982, 0fCB40007F; + neg.f32 %f2001, %f2000; + fma.rn.f32 %f2002, %f1974, %f1688, %f2001; + fma.rn.f32 %f2003, %f1974, %f1994, %f2002; + ex2.approx.ftz.f32 %f2004, %f2003; + mul.f32 %f2005, %f2004, %f1999; + sub.f32 %f2006, %f2005, %f1998; + mul.f32 %f2007, %f263, %f2006; + mul.f32 %f377, %f364, %f2007; + add.f32 %f2008, %f353, 0f3F000000; + sub.f32 %f2009, %f2008, %f3047; + div.rn.f32 %f378, %f2009, %f3043; + abs.f32 %f379, %f378; + setp.lt.f32 %p642, %f379, 0f00800000; + mul.f32 %f2010, %f379, 0f4B800000; + selp.f32 %f2011, %f2010, %f379, %p642; + selp.f32 %f2012, 0fC3170000, 0fC2FE0000, %p642; + mov.b32 %r741, %f2011; + and.b32 %r742, %r741, 8388607; + or.b32 %r743, %r742, 1065353216; + mov.b32 %f2013, %r743; + shr.u32 %r744, %r741, 23; + cvt.rn.f32.u32 %f2014, %r744; + add.f32 %f2015, %f2012, %f2014; + setp.gt.f32 %p643, %f2013, 0f3FB504F3; + mul.f32 %f2016, %f2013, 0f3F000000; + add.f32 %f2017, %f2015, 0f3F800000; + selp.f32 %f2018, %f2017, %f2015, %p643; + selp.f32 %f2019, %f2016, %f2013, %p643; + add.f32 %f2020, %f2019, 0fBF800000; + add.f32 %f2021, %f2019, 0f3F800000; + rcp.approx.ftz.f32 %f2022, %f2021; + add.f32 %f2023, %f2020, %f2020; + mul.f32 %f2025, %f2023, %f2022; + mul.f32 %f2026, %f2025, %f2025; + fma.rn.f32 %f2029, %f1645, %f2026, %f1644; + fma.rn.f32 %f2031, %f2029, %f2026, %f1647; + mul.rn.f32 %f2032, %f2031, %f2026; + mul.rn.f32 %f2033, %f2032, %f2025; + sub.f32 %f2034, %f2020, %f2025; + add.f32 %f2035, %f2034, %f2034; + neg.f32 %f2036, %f2025; + fma.rn.f32 %f2037, %f2036, %f2020, %f2035; + mul.rn.f32 %f2038, %f2022, %f2037; + add.f32 %f2039, %f2033, %f2025; + sub.f32 %f2040, %f2025, %f2039; + add.f32 %f2041, %f2033, %f2040; + add.f32 %f2042, %f2038, %f2041; + add.f32 %f2043, %f2039, %f2042; + sub.f32 %f2044, %f2039, %f2043; + add.f32 %f2045, %f2042, %f2044; + mul.rn.f32 %f2047, %f2018, %f1663; + mul.rn.f32 %f2049, %f2018, %f1665; + add.f32 %f2050, %f2047, %f2043; + sub.f32 %f2051, %f2047, %f2050; + add.f32 %f2052, %f2043, %f2051; + add.f32 %f2053, %f2045, %f2052; + add.f32 %f2054, %f2049, %f2053; + add.f32 %f2055, %f2050, %f2054; + sub.f32 %f2056, %f2050, %f2055; + add.f32 %f2057, %f2054, %f2056; + mul.rn.f32 %f2058, %f1584, %f2055; + neg.f32 %f2059, %f2058; + fma.rn.f32 %f2060, %f1584, %f2055, %f2059; + fma.rn.f32 %f2061, %f1584, %f2057, %f2060; + mov.f32 %f2062, 0f00000000; + fma.rn.f32 %f2063, %f2062, %f2055, %f2061; + add.rn.f32 %f2064, %f2058, %f2063; + neg.f32 %f2065, %f2064; + add.rn.f32 %f2066, %f2058, %f2065; + add.rn.f32 %f2067, %f2066, %f2063; + mov.b32 %r745, %f2064; + setp.eq.s32 %p644, %r745, 1118925336; + add.s32 %r746, %r745, -1; + mov.b32 %f2068, %r746; + add.f32 %f2069, %f2067, 0f37000000; + selp.f32 %f380, %f2069, %f2067, %p644; + selp.f32 %f2070, %f2068, %f2064, %p644; + mul.rn.f32 %f2071, %f2070, %f1688; + cvt.rzi.f32.f32 %f2072, %f2071; + abs.f32 %f2073, %f2072; + setp.gt.f32 %p645, %f2073, 0f42FC0000; + mov.b32 %r747, %f2072; + and.b32 %r748, %r747, -2147483648; + or.b32 %r749, %r748, 1123811328; + mov.b32 %f2074, %r749; + selp.f32 %f2075, %f2074, %f2072, %p645; + fma.rn.f32 %f2077, %f2075, %f1694, %f2070; + fma.rn.f32 %f2079, %f2075, %f1696, %f2077; + mul.f32 %f2080, %f2079, 0f3FB8AA3B; + add.f32 %f2081, %f2075, 0f4B40007F; + mov.b32 %r750, %f2081; + shl.b32 %r751, %r750, 23; + mov.b32 %f2082, %r751; + ex2.approx.ftz.f32 %f2083, %f2080; + mul.f32 %f381, %f2083, %f2082; + setp.eq.f32 %p646, %f381, 0f7F800000; + @%p646 bra $L__BB3_396; + + fma.rn.f32 %f3101, %f381, %f380, %f381; + +$L__BB3_396: + setp.lt.f32 %p647, %f378, 0f00000000; + and.pred %p39, %p647, %p606; + setp.eq.f32 %p649, %f378, 0f00000000; + @%p649 bra $L__BB3_400; + bra.uni $L__BB3_397; + +$L__BB3_400: + add.f32 %f2088, %f378, %f378; + selp.f32 %f3103, %f2088, 0f00000000, %p606; + bra.uni $L__BB3_401; + +$L__BB3_397: + mov.b32 %r752, %f3101; + xor.b32 %r753, %r752, -2147483648; + mov.b32 %f2084, %r753; + selp.f32 %f3103, %f2084, %f3101, %p39; + setp.geu.f32 %p650, %f378, 0f00000000; + @%p650 bra $L__BB3_401; + + cvt.rzi.f32.f32 %f2086, %f1584; + setp.eq.f32 %p651, %f2086, 0f40000000; + @%p651 bra $L__BB3_401; + + mov.f32 %f3103, 0f7FFFFFFF; + +$L__BB3_401: + add.f32 %f2089, %f379, 0f40000000; + mov.b32 %r754, %f2089; + setp.lt.s32 %p653, %r754, 2139095040; + @%p653 bra $L__BB3_406; + + setp.gtu.f32 %p654, %f379, 0f7F800000; + @%p654 bra $L__BB3_405; + bra.uni $L__BB3_403; + +$L__BB3_405: + add.f32 %f3103, %f378, 0f40000000; + bra.uni $L__BB3_406; + +$L__BB3_403: + setp.neu.f32 %p655, %f379, 0f7F800000; + @%p655 bra $L__BB3_406; + + selp.f32 %f3103, 0fFF800000, 0f7F800000, %p39; + +$L__BB3_406: + mul.f32 %f2091, %f3103, 0fBF000000; + setp.eq.f32 %p656, %f378, 0f3F800000; + selp.f32 %f2092, 0fBF000000, %f2091, %p656; + fma.rn.f32 %f2095, %f2092, %f1976, %f1575; + cvt.sat.f32.f32 %f2098, %f2095; + fma.rm.f32 %f2100, %f2098, %f1979, %f1981; + add.f32 %f2101, %f2100, 0fCB40007F; + neg.f32 %f2102, %f2101; + fma.rn.f32 %f2103, %f2092, %f1688, %f2102; + fma.rn.f32 %f2105, %f2092, %f1994, %f2103; + mov.b32 %r755, %f2100; + shl.b32 %r756, %r755, 23; + mov.b32 %f2106, %r756; + ex2.approx.ftz.f32 %f2107, %f2105; + mul.f32 %f390, %f2107, %f2106; + div.rn.f32 %f391, %f359, %f3043; + abs.f32 %f392, %f391; + setp.lt.f32 %p657, %f392, 0f00800000; + mul.f32 %f2108, %f392, 0f4B800000; + selp.f32 %f2109, %f2108, %f392, %p657; + selp.f32 %f2110, 0fC3170000, 0fC2FE0000, %p657; + mov.b32 %r757, %f2109; + and.b32 %r758, %r757, 8388607; + or.b32 %r759, %r758, 1065353216; + mov.b32 %f2111, %r759; + shr.u32 %r760, %r757, 23; + cvt.rn.f32.u32 %f2112, %r760; + add.f32 %f2113, %f2110, %f2112; + setp.gt.f32 %p658, %f2111, 0f3FB504F3; + mul.f32 %f2114, %f2111, 0f3F000000; + add.f32 %f2115, %f2113, 0f3F800000; + selp.f32 %f2116, %f2115, %f2113, %p658; + selp.f32 %f2117, %f2114, %f2111, %p658; + add.f32 %f2118, %f2117, 0fBF800000; + add.f32 %f2119, %f2117, 0f3F800000; + rcp.approx.ftz.f32 %f2120, %f2119; + add.f32 %f2121, %f2118, %f2118; + mul.f32 %f2123, %f2121, %f2120; + mul.f32 %f2124, %f2123, %f2123; + fma.rn.f32 %f2127, %f1645, %f2124, %f1644; + fma.rn.f32 %f2129, %f2127, %f2124, %f1647; + mul.rn.f32 %f2130, %f2129, %f2124; + mul.rn.f32 %f2131, %f2130, %f2123; + sub.f32 %f2132, %f2118, %f2123; + add.f32 %f2133, %f2132, %f2132; + neg.f32 %f2134, %f2123; + fma.rn.f32 %f2135, %f2134, %f2118, %f2133; + mul.rn.f32 %f2136, %f2120, %f2135; + add.f32 %f2137, %f2131, %f2123; + sub.f32 %f2138, %f2123, %f2137; + add.f32 %f2139, %f2131, %f2138; + add.f32 %f2140, %f2136, %f2139; + add.f32 %f2141, %f2137, %f2140; + sub.f32 %f2142, %f2137, %f2141; + add.f32 %f2143, %f2140, %f2142; + mul.rn.f32 %f2145, %f2116, %f1663; + mul.rn.f32 %f2147, %f2116, %f1665; + add.f32 %f2148, %f2145, %f2141; + sub.f32 %f2149, %f2145, %f2148; + add.f32 %f2150, %f2141, %f2149; + add.f32 %f2151, %f2143, %f2150; + add.f32 %f2152, %f2147, %f2151; + add.f32 %f2153, %f2148, %f2152; + sub.f32 %f2154, %f2148, %f2153; + add.f32 %f2155, %f2152, %f2154; + mul.rn.f32 %f2156, %f1584, %f2153; + neg.f32 %f2157, %f2156; + fma.rn.f32 %f2158, %f1584, %f2153, %f2157; + fma.rn.f32 %f2159, %f1584, %f2155, %f2158; + fma.rn.f32 %f2161, %f2062, %f2153, %f2159; + add.rn.f32 %f2162, %f2156, %f2161; + neg.f32 %f2163, %f2162; + add.rn.f32 %f2164, %f2156, %f2163; + add.rn.f32 %f2165, %f2164, %f2161; + mov.b32 %r761, %f2162; + setp.eq.s32 %p659, %r761, 1118925336; + add.s32 %r762, %r761, -1; + mov.b32 %f2166, %r762; + add.f32 %f2167, %f2165, 0f37000000; + selp.f32 %f393, %f2167, %f2165, %p659; + selp.f32 %f2168, %f2166, %f2162, %p659; + mul.rn.f32 %f2169, %f2168, %f1688; + cvt.rzi.f32.f32 %f2170, %f2169; + abs.f32 %f2171, %f2170; + setp.gt.f32 %p660, %f2171, 0f42FC0000; + mov.b32 %r763, %f2170; + and.b32 %r764, %r763, -2147483648; + or.b32 %r765, %r764, 1123811328; + mov.b32 %f2172, %r765; + selp.f32 %f2173, %f2172, %f2170, %p660; + fma.rn.f32 %f2175, %f2173, %f1694, %f2168; + fma.rn.f32 %f2177, %f2173, %f1696, %f2175; + mul.f32 %f2178, %f2177, 0f3FB8AA3B; + add.f32 %f2179, %f2173, 0f4B40007F; + mov.b32 %r766, %f2179; + shl.b32 %r767, %r766, 23; + mov.b32 %f2180, %r767; + ex2.approx.ftz.f32 %f2181, %f2178; + mul.f32 %f394, %f2181, %f2180; + setp.eq.f32 %p661, %f394, 0f7F800000; + mov.f32 %f3104, 0f7F800000; + @%p661 bra $L__BB3_408; + + fma.rn.f32 %f3104, %f394, %f393, %f394; + +$L__BB3_408: + setp.lt.f32 %p662, %f391, 0f00000000; + and.pred %p40, %p662, %p606; + setp.eq.f32 %p664, %f391, 0f00000000; + @%p664 bra $L__BB3_412; + bra.uni $L__BB3_409; + +$L__BB3_412: + add.f32 %f2186, %f391, %f391; + selp.f32 %f3106, %f2186, 0f00000000, %p606; + bra.uni $L__BB3_413; + +$L__BB3_409: + mov.b32 %r768, %f3104; + xor.b32 %r769, %r768, -2147483648; + mov.b32 %f2182, %r769; + selp.f32 %f3106, %f2182, %f3104, %p40; + setp.geu.f32 %p665, %f391, 0f00000000; + @%p665 bra $L__BB3_413; + + cvt.rzi.f32.f32 %f2184, %f1584; + setp.eq.f32 %p666, %f2184, 0f40000000; + @%p666 bra $L__BB3_413; + + mov.f32 %f3106, 0f7FFFFFFF; + +$L__BB3_413: + add.f32 %f2187, %f392, 0f40000000; + mov.b32 %r770, %f2187; + setp.lt.s32 %p668, %r770, 2139095040; + @%p668 bra $L__BB3_418; + + setp.gtu.f32 %p669, %f392, 0f7F800000; + @%p669 bra $L__BB3_417; + bra.uni $L__BB3_415; + +$L__BB3_417: + add.f32 %f3106, %f391, 0f40000000; + bra.uni $L__BB3_418; + +$L__BB3_415: + setp.neu.f32 %p670, %f392, 0f7F800000; + @%p670 bra $L__BB3_418; + + selp.f32 %f3106, 0fFF800000, 0f7F800000, %p40; + +$L__BB3_418: + mul.f32 %f2189, %f3106, 0fBF000000; + setp.eq.f32 %p671, %f391, 0f3F800000; + selp.f32 %f2190, 0fBF000000, %f2189, %p671; + fma.rn.f32 %f2193, %f2190, %f1976, %f1575; + cvt.sat.f32.f32 %f2196, %f2193; + fma.rm.f32 %f2198, %f2196, %f1979, %f1981; + add.f32 %f2199, %f2198, 0fCB40007F; + neg.f32 %f2200, %f2199; + fma.rn.f32 %f2201, %f2190, %f1688, %f2200; + fma.rn.f32 %f2203, %f2190, %f1994, %f2201; + mov.b32 %r771, %f2198; + shl.b32 %r772, %r771, 23; + mov.b32 %f2204, %r772; + ex2.approx.ftz.f32 %f2205, %f2203; + mul.f32 %f2206, %f2205, %f2204; + sub.f32 %f403, %f390, %f2206; + setp.eq.f32 %p672, %f314, 0f7F800000; + mov.f32 %f3107, 0f7F800000; + @%p672 bra $L__BB3_420; + + fma.rn.f32 %f3107, %f314, %f313, %f314; + +$L__BB3_420: + mov.b32 %r773, %f3107; + xor.b32 %r774, %r773, -2147483648; + mov.b32 %f2207, %r774; + selp.f32 %f406, %f2207, %f3107, %p33; + setp.eq.f32 %p673, %f311, 0f00000000; + selp.f32 %f3108, %f315, %f406, %p673; + @%p37 bra $L__BB3_423; + + cvt.rzi.f32.f32 %f2209, %f1584; + setp.eq.f32 %p674, %f2209, 0f40000000; + mov.f32 %f3108, %f406; + @%p674 bra $L__BB3_423; + + mov.f32 %f3108, 0f7FFFFFFF; + +$L__BB3_423: + setp.eq.f32 %p675, %f319, 0f7F800000; + mov.f32 %f3109, 0f7F800000; + @%p675 bra $L__BB3_425; + + fma.rn.f32 %f3109, %f319, %f318, %f319; + +$L__BB3_425: + mov.b32 %r775, %f3109; + xor.b32 %r776, %r775, -2147483648; + mov.b32 %f2212, %r776; + selp.f32 %f411, %f2212, %f3109, %p34; + setp.eq.f32 %p676, %f316, 0f00000000; + selp.f32 %f3110, %f322, %f411, %p676; + @%p38 bra $L__BB3_428; + + cvt.rzi.f32.f32 %f2214, %f1584; + setp.eq.f32 %p677, %f2214, 0f40000000; + mov.f32 %f3110, %f411; + @%p677 bra $L__BB3_428; + + mov.f32 %f3110, 0f7FFFFFFF; + +$L__BB3_428: + mul.f32 %f2217, %f264, %f403; + mul.f32 %f414, %f352, %f2217; + setp.gtu.f32 %p678, %f312, 0f7F800000; + mov.f32 %f3111, 0f7F800000; + selp.f32 %f2218, %f320, %f3108, %p678; + setp.neu.f32 %p679, %f312, 0f7F800000; + selp.f32 %f2219, %f2218, %f321, %p679; + setp.gt.s32 %p680, %r103, 2139095039; + selp.f32 %f2220, %f2219, %f3108, %p680; + mul.f32 %f2221, %f2220, 0fBF000000; + setp.eq.f32 %p681, %f311, 0f3F800000; + selp.f32 %f2222, 0fBF000000, %f2221, %p681; + fma.rn.f32 %f2225, %f2222, %f1976, %f1575; + cvt.sat.f32.f32 %f2228, %f2225; + fma.rm.f32 %f2230, %f2228, %f1979, %f1981; + setp.gtu.f32 %p682, %f317, 0f7F800000; + selp.f32 %f2231, %f324, %f3110, %p682; + setp.neu.f32 %p683, %f317, 0f7F800000; + selp.f32 %f2232, %f2231, %f325, %p683; + setp.gt.s32 %p684, %r104, 2139095039; + selp.f32 %f2233, %f2232, %f3110, %p684; + mul.f32 %f2234, %f2233, 0fBF000000; + setp.eq.f32 %p685, %f316, 0f3F800000; + selp.f32 %f2235, 0fBF000000, %f2234, %p685; + fma.rn.f32 %f2236, %f2235, %f1976, %f1575; + cvt.sat.f32.f32 %f2237, %f2236; + fma.rm.f32 %f2238, %f2237, %f1979, %f1981; + add.f32 %f2239, %f2238, 0fCB40007F; + neg.f32 %f2240, %f2239; + fma.rn.f32 %f2241, %f2235, %f1688, %f2240; + fma.rn.f32 %f2243, %f2235, %f1994, %f2241; + mov.b32 %r777, %f2238; + shl.b32 %r778, %r777, 23; + mov.b32 %f2244, %r778; + ex2.approx.ftz.f32 %f2245, %f2243; + mul.f32 %f2246, %f2245, %f2244; + mul.f32 %f2247, %f292, %f2246; + mov.b32 %r779, %f2230; + shl.b32 %r780, %r779, 23; + mov.b32 %f2248, %r780; + add.f32 %f2249, %f2230, 0fCB40007F; + neg.f32 %f2250, %f2249; + fma.rn.f32 %f2251, %f2222, %f1688, %f2250; + fma.rn.f32 %f2252, %f2222, %f1994, %f2251; + ex2.approx.ftz.f32 %f2253, %f2252; + mul.f32 %f2254, %f2253, %f2248; + mul.f32 %f2255, %f323, %f2254; + sub.f32 %f2256, %f2255, %f2247; + mul.f32 %f2257, %f265, %f2256; + mul.f32 %f415, %f364, %f2257; + add.f32 %f2258, %f353, 0f3F800000; + sub.f32 %f2259, %f2258, %f3047; + div.rn.f32 %f416, %f2259, %f3043; abs.f32 %f417, %f416; - setp.ltu.f32 %p199, %f417, 0f3F800000; - @%p199 bra BB3_132; - bra.uni BB3_131; - -BB3_132: - mul.f32 %f1913, %f416, %f416; - mov.f32 %f1914, 0f3BA0C9F8; - mov.f32 %f1915, 0fBA1268FB; - fma.rn.f32 %f1916, %f1915, %f1913, %f1914; - mov.f32 %f1917, 0fBCDABFD4; - fma.rn.f32 %f1918, %f1916, %f1913, %f1917; - mov.f32 %f1919, 0f3DE70331; - fma.rn.f32 %f1920, %f1918, %f1913, %f1919; - mov.f32 %f1921, 0fBEC09330; - fma.rn.f32 %f1922, %f1920, %f1913, %f1921; - mov.f32 %f1923, 0f3F906EBA; - fma.rn.f32 %f1924, %f1922, %f1913, %f1923; - mul.f32 %f3347, %f416, %f1924; - bra.uni BB3_133; - -BB3_131: - mov.f32 %f3144, 0f3F800000; - mov.f32 %f1895, 0f3A03BB71; - mov.f32 %f1896, 0fB7B730FB; - fma.rn.f32 %f1897, %f1896, %f417, %f1895; - mov.f32 %f1898, 0fBBACA3B3; - fma.rn.f32 %f1899, %f1897, %f417, %f1898; - mov.f32 %f1900, 0f3D0A7445; - fma.rn.f32 %f1901, %f1899, %f417, %f1900; - mov.f32 %f1902, 0fBE1B3B75; - fma.rn.f32 %f1903, %f1901, %f417, %f1902; - mov.f32 %f1904, 0fBF6B385A; - fma.rn.f32 %f1905, %f1903, %f417, %f1904; - mov.f32 %f1906, 0fBFD0316E; - fma.rn.f32 %f1907, %f1905, %f417, %f1906; - mov.f32 %f1908, 0fBA031CCE; - fma.rn.f32 %f1909, %f1907, %f417, %f1908; - ex2.approx.ftz.f32 %f1910, %f1909; - sub.f32 %f1912, %f3144, %f1910; - mov.b32 %r212, %f1912; - setp.ltu.f32 %p200, %f417, 0f407AD445; - selp.b32 %r213, %r212, 1065353216, %p200; - mov.b32 %r214, %f416; - and.b32 %r215, %r214, -2147483648; - or.b32 %r216, %r213, %r215; - mov.b32 %f3347, %r216; - -BB3_133: - cvt.rn.f32.s32 %f3146, %r324; - sub.f32 %f3145, %f3146, %f3320; - mul.f32 %f421, %f3145, %f341; - abs.f32 %f422, %f421; - setp.ltu.f32 %p201, %f422, 0f3F800000; - @%p201 bra BB3_135; - bra.uni BB3_134; - -BB3_135: - mul.f32 %f1943, %f421, %f421; - mov.f32 %f1944, 0f3BA0C9F8; - mov.f32 %f1945, 0fBA1268FB; - fma.rn.f32 %f1946, %f1945, %f1943, %f1944; - mov.f32 %f1947, 0fBCDABFD4; - fma.rn.f32 %f1948, %f1946, %f1943, %f1947; - mov.f32 %f1949, 0f3DE70331; - fma.rn.f32 %f1950, %f1948, %f1943, %f1949; - mov.f32 %f1951, 0fBEC09330; - fma.rn.f32 %f1952, %f1950, %f1943, %f1951; - mov.f32 %f1953, 0f3F906EBA; - fma.rn.f32 %f1954, %f1952, %f1943, %f1953; - mul.f32 %f3348, %f421, %f1954; - bra.uni BB3_136; - -BB3_134: - mov.f32 %f3147, 0f3F800000; - mov.f32 %f1925, 0f3A03BB71; - mov.f32 %f1926, 0fB7B730FB; - fma.rn.f32 %f1927, %f1926, %f422, %f1925; - mov.f32 %f1928, 0fBBACA3B3; - fma.rn.f32 %f1929, %f1927, %f422, %f1928; - mov.f32 %f1930, 0f3D0A7445; - fma.rn.f32 %f1931, %f1929, %f422, %f1930; - mov.f32 %f1932, 0fBE1B3B75; - fma.rn.f32 %f1933, %f1931, %f422, %f1932; - mov.f32 %f1934, 0fBF6B385A; - fma.rn.f32 %f1935, %f1933, %f422, %f1934; - mov.f32 %f1936, 0fBFD0316E; - fma.rn.f32 %f1937, %f1935, %f422, %f1936; - mov.f32 %f1938, 0fBA031CCE; - fma.rn.f32 %f1939, %f1937, %f422, %f1938; - ex2.approx.ftz.f32 %f1940, %f1939; - sub.f32 %f1942, %f3147, %f1940; - mov.b32 %r217, %f1942; - setp.ltu.f32 %p202, %f422, 0f407AD445; - selp.b32 %r218, %r217, 1065353216, %p202; - mov.b32 %r219, %f421; - and.b32 %r220, %r219, -2147483648; - or.b32 %r221, %r218, %r220; - mov.b32 %f3348, %r221; - -BB3_136: - sub.f32 %f1957, %f3347, %f3348; - mul.f32 %f426, %f1957, 0f3F000000; - mul.f32 %f1958, %f412, %f3319; - fma.rn.f32 %f427, %f426, %f1958, %f3226; - mad.lo.s32 %r222, %r324, %r71, %r323; - add.s32 %r223, %r222, %r4; - mul.wide.s32 %rd100, %r223, 4; - add.s64 %rd101, %rd1, %rd100; - ld.global.f32 %f428, [%rd101]; - // inline asm - rcp.approx.ftz.f32 %f1955,%f346; - // inline asm - mul.f32 %f1959, %f1955, %f347; - mul.f32 %f1960, %f1959, %f1959; - mov.f32 %f1961, 0f3C4CAF63; - mov.f32 %f1962, 0f3B18F0FE; - fma.rn.f32 %f1963, %f1962, %f1960, %f1961; - mov.f32 %f1964, 0f3DAAAABD; - fma.rn.f32 %f1965, %f1963, %f1960, %f1964; - mul.rn.f32 %f1966, %f1965, %f1960; - mul.rn.f32 %f1967, %f1966, %f1959; - sub.f32 %f1968, %f345, %f1959; - neg.f32 %f1969, %f1959; - add.f32 %f1970, %f1968, %f1968; - fma.rn.f32 %f1971, %f1969, %f345, %f1970; - mul.rn.f32 %f1972, %f1955, %f1971; - add.f32 %f1973, %f1967, %f1959; - sub.f32 %f1974, %f1959, %f1973; - add.f32 %f1975, %f1967, %f1974; - add.f32 %f1976, %f1972, %f1975; - add.f32 %f1977, %f1973, %f1976; - sub.f32 %f1978, %f1973, %f1977; - add.f32 %f1979, %f1976, %f1978; - add.f32 %f1980, %f348, %f1977; - sub.f32 %f1981, %f348, %f1980; - add.f32 %f1982, %f1977, %f1981; - add.f32 %f1983, %f1979, %f1982; - add.f32 %f1984, %f349, %f1983; - add.f32 %f1985, %f1980, %f1984; - sub.f32 %f1986, %f1980, %f1985; - add.f32 %f1987, %f1984, %f1986; - mul.rn.f32 %f1989, %f1788, %f1985; - neg.f32 %f1990, %f1989; - fma.rn.f32 %f1991, %f1788, %f1985, %f1990; - fma.rn.f32 %f1992, %f1788, %f1987, %f1991; - mov.f32 %f1993, 0f00000000; - fma.rn.f32 %f1994, %f1993, %f1985, %f1992; - add.rn.f32 %f1995, %f1989, %f1994; - neg.f32 %f1996, %f1995; - add.rn.f32 %f1997, %f1989, %f1996; - add.rn.f32 %f1998, %f1997, %f1994; - mov.b32 %r224, %f1995; - setp.eq.s32 %p203, %r224, 1118925336; - add.s32 %r225, %r224, -1; - mov.b32 %f1999, %r225; - add.f32 %f2000, %f1998, 0f37000000; - selp.f32 %f2001, %f1999, %f1995, %p203; - selp.f32 %f429, %f2000, %f1998, %p203; - mul.f32 %f2002, %f2001, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2003, %f2002; - mov.f32 %f2004, 0fBF317200; - fma.rn.f32 %f2005, %f2003, %f2004, %f2001; - mov.f32 %f2006, 0fB5BFBE8E; - fma.rn.f32 %f2007, %f2003, %f2006, %f2005; - mul.f32 %f2008, %f2007, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2009, %f2008; - add.f32 %f2010, %f2003, 0f00000000; - ex2.approx.f32 %f2011, %f2010; - mul.f32 %f2012, %f2009, %f2011; - setp.lt.f32 %p204, %f2001, 0fC2D20000; - selp.f32 %f2013, 0f00000000, %f2012, %p204; - setp.gt.f32 %p205, %f2001, 0f42D20000; - selp.f32 %f3349, 0f7F800000, %f2013, %p205; - setp.eq.f32 %p206, %f3349, 0f7F800000; - @%p206 bra BB3_138; - - fma.rn.f32 %f3349, %f3349, %f429, %f3349; - -BB3_138: - setp.geu.f32 %p370, %f342, 0f00000000; - mov.b32 %r226, %f3349; - xor.b32 %r227, %r226, -2147483648; - mov.b32 %f2014, %r227; - selp.f32 %f433, %f2014, %f3349, %p7; - setp.eq.f32 %p207, %f342, 0f00000000; - selp.f32 %f3350, %f350, %f433, %p207; - @%p370 bra BB3_140; - - cvt.rzi.f32.f32 %f2016, %f1788; - setp.neu.f32 %p208, %f2016, 0f40000000; - selp.f32 %f3350, 0f7FFFFFFF, %f433, %p208; - -BB3_140: - abs.f32 %f3157, %f342; - add.f32 %f3156, %f3157, 0f40000000; - mov.b32 %r304, %f3156; - mov.f32 %f3155, 0f00000000; - mov.f32 %f3154, 0f3DAAAABD; - mov.f32 %f3153, 0f3C4CAF63; - mov.f32 %f3152, 0f3B18F0FE; - mov.f32 %f3151, 0fB5BFBE8E; - mov.f32 %f3150, 0fBF317200; - selp.f32 %f3149, 0fFF800000, 0f7F800000, %p7; - add.f32 %f3148, %f342, 0f40000000; - setp.gtu.f32 %p209, %f3157, 0f7F800000; - selp.f32 %f2019, %f3148, %f3350, %p209; - setp.neu.f32 %p210, %f3157, 0f7F800000; - selp.f32 %f2020, %f2019, %f3149, %p210; - setp.gt.s32 %p211, %r304, 2139095039; - selp.f32 %f2021, %f2020, %f3350, %p211; - mul.f32 %f2022, %f2021, 0fBF000000; - setp.eq.f32 %p212, %f342, 0f3F800000; - selp.f32 %f2023, 0fBF000000, %f2022, %p212; - mul.f32 %f2024, %f2023, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2025, %f2024; - fma.rn.f32 %f2027, %f2025, %f3150, %f2023; - fma.rn.f32 %f2029, %f2025, %f3151, %f2027; - mul.f32 %f2030, %f2029, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2031, %f2030; - add.f32 %f2032, %f2025, 0f00000000; - ex2.approx.f32 %f2033, %f2032; - mul.f32 %f2034, %f2031, %f2033; - setp.lt.f32 %p213, %f2023, 0fC2D20000; - selp.f32 %f2035, 0f00000000, %f2034, %p213; - setp.gt.f32 %p214, %f2023, 0f42D20000; - selp.f32 %f437, 0f7F800000, %f2035, %p214; - // inline asm - rcp.approx.ftz.f32 %f2017,%f356; - // inline asm - mul.f32 %f2036, %f2017, %f357; - mul.f32 %f2037, %f2036, %f2036; - fma.rn.f32 %f2040, %f3152, %f2037, %f3153; - fma.rn.f32 %f2042, %f2040, %f2037, %f3154; - mul.rn.f32 %f2043, %f2042, %f2037; - mul.rn.f32 %f2044, %f2043, %f2036; - sub.f32 %f2045, %f355, %f2036; - neg.f32 %f2046, %f2036; - add.f32 %f2047, %f2045, %f2045; - fma.rn.f32 %f2048, %f2046, %f355, %f2047; - mul.rn.f32 %f2049, %f2017, %f2048; - add.f32 %f2050, %f2044, %f2036; - sub.f32 %f2051, %f2036, %f2050; - add.f32 %f2052, %f2044, %f2051; - add.f32 %f2053, %f2049, %f2052; - add.f32 %f2054, %f2050, %f2053; - sub.f32 %f2055, %f2050, %f2054; - add.f32 %f2056, %f2053, %f2055; - add.f32 %f2057, %f358, %f2054; - sub.f32 %f2058, %f358, %f2057; - add.f32 %f2059, %f2054, %f2058; - add.f32 %f2060, %f2056, %f2059; - add.f32 %f2061, %f359, %f2060; - add.f32 %f2062, %f2057, %f2061; - sub.f32 %f2063, %f2057, %f2062; - add.f32 %f2064, %f2061, %f2063; - mul.rn.f32 %f2066, %f1788, %f2062; - neg.f32 %f2067, %f2066; - fma.rn.f32 %f2068, %f1788, %f2062, %f2067; - fma.rn.f32 %f2069, %f1788, %f2064, %f2068; - fma.rn.f32 %f2071, %f3155, %f2062, %f2069; - add.rn.f32 %f2072, %f2066, %f2071; - neg.f32 %f2073, %f2072; - add.rn.f32 %f2074, %f2066, %f2073; - add.rn.f32 %f2075, %f2074, %f2071; - mov.b32 %r228, %f2072; - setp.eq.s32 %p215, %r228, 1118925336; - add.s32 %r229, %r228, -1; - mov.b32 %f2076, %r229; - add.f32 %f2077, %f2075, 0f37000000; - selp.f32 %f2078, %f2076, %f2072, %p215; - selp.f32 %f438, %f2077, %f2075, %p215; - mul.f32 %f2079, %f2078, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2080, %f2079; - fma.rn.f32 %f2081, %f2080, %f3150, %f2078; - fma.rn.f32 %f2082, %f2080, %f3151, %f2081; - mul.f32 %f2083, %f2082, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2084, %f2083; - add.f32 %f2085, %f2080, 0f00000000; - ex2.approx.f32 %f2086, %f2085; - mul.f32 %f2087, %f2084, %f2086; - setp.lt.f32 %p216, %f2078, 0fC2D20000; - selp.f32 %f2088, 0f00000000, %f2087, %p216; - setp.gt.f32 %p217, %f2078, 0f42D20000; - selp.f32 %f3351, 0f7F800000, %f2088, %p217; - setp.eq.f32 %p218, %f3351, 0f7F800000; - @%p218 bra BB3_142; - - fma.rn.f32 %f3351, %f3351, %f438, %f3351; - -BB3_142: - setp.geu.f32 %p371, %f353, 0f00000000; - mov.b32 %r230, %f3351; - xor.b32 %r231, %r230, -2147483648; - mov.b32 %f2089, %r231; - selp.f32 %f442, %f2089, %f3351, %p8; - setp.eq.f32 %p219, %f353, 0f00000000; - selp.f32 %f3352, %f360, %f442, %p219; - @%p371 bra BB3_144; - - cvt.rzi.f32.f32 %f2091, %f1788; - setp.neu.f32 %p220, %f2091, 0f40000000; - selp.f32 %f3352, 0f7FFFFFFF, %f442, %p220; - -BB3_144: - abs.f32 %f3169, %f353; - add.f32 %f3168, %f3169, 0f40000000; - mov.b32 %r305, %f3168; - mov.f32 %f3167, 0f35BFBE8E; - mov.f32 %f3166, 0f3F317200; - selp.f32 %f3165, 0fFF800000, 0f7F800000, %p8; - add.f32 %f3164, %f353, 0f40000000; - mov.f32 %f3163, 0f00000000; - mov.f32 %f3162, 0f3DAAAABD; - mov.f32 %f3161, 0f3C4CAF63; - mov.f32 %f3160, 0f3B18F0FE; - mov.f32 %f3159, 0fB5BFBE8E; - mov.f32 %f3158, 0fBF317200; - setp.gtu.f32 %p221, %f3169, 0f7F800000; - selp.f32 %f2094, %f3164, %f3352, %p221; - setp.neu.f32 %p222, %f3169, 0f7F800000; - selp.f32 %f2095, %f2094, %f3165, %p222; - setp.gt.s32 %p223, %r305, 2139095039; - selp.f32 %f2096, %f2095, %f3352, %p223; - mul.f32 %f2097, %f2096, 0fBF000000; - setp.eq.f32 %p224, %f353, 0f3F800000; - selp.f32 %f2098, 0fBF000000, %f2097, %p224; - mul.f32 %f2099, %f2098, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2100, %f2099; - fma.rn.f32 %f2102, %f2100, %f3158, %f2098; - fma.rn.f32 %f2104, %f2100, %f3159, %f2102; - mul.f32 %f2105, %f2104, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2106, %f2105; - add.f32 %f2107, %f2100, 0f00000000; - ex2.approx.f32 %f2108, %f2107; - mul.f32 %f2109, %f2106, %f2108; - setp.lt.f32 %p225, %f2098, 0fC2D20000; - selp.f32 %f2110, 0f00000000, %f2109, %p225; - setp.gt.f32 %p226, %f2098, 0f42D20000; - selp.f32 %f2111, 0f7F800000, %f2110, %p226; - sub.f32 %f2112, %f437, %f2111; - mul.f32 %f2113, %f328, %f2112; - mul.f32 %f446, %f426, %f2113; - add.f32 %f2114, %f413, 0f3F800000; - sub.f32 %f2115, %f2114, %f3320; - div.rn.f32 %f447, %f2115, %f3316; - abs.f32 %f448, %f447; - setp.lt.f32 %p227, %f448, 0f00800000; - mul.f32 %f2116, %f448, 0f4B800000; - selp.f32 %f2117, 0fC3170000, 0fC2FE0000, %p227; - selp.f32 %f2118, %f2116, %f448, %p227; - mov.b32 %r232, %f2118; - and.b32 %r233, %r232, 8388607; - or.b32 %r234, %r233, 1065353216; - mov.b32 %f2119, %r234; - shr.u32 %r235, %r232, 23; - cvt.rn.f32.u32 %f2120, %r235; - add.f32 %f2121, %f2117, %f2120; - setp.gt.f32 %p228, %f2119, 0f3FB504F3; - mul.f32 %f2122, %f2119, 0f3F000000; - add.f32 %f2123, %f2121, 0f3F800000; - selp.f32 %f2124, %f2122, %f2119, %p228; - selp.f32 %f2125, %f2123, %f2121, %p228; - add.f32 %f449, %f2124, 0fBF800000; - add.f32 %f2093, %f2124, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2092,%f2093; - // inline asm - add.f32 %f451, %f449, %f449; - mul.f32 %f2126, %f2092, %f451; - mul.f32 %f2127, %f2126, %f2126; - fma.rn.f32 %f2130, %f3160, %f2127, %f3161; - fma.rn.f32 %f2132, %f2130, %f2127, %f3162; - mul.rn.f32 %f2133, %f2132, %f2127; - mul.rn.f32 %f2134, %f2133, %f2126; - sub.f32 %f2135, %f449, %f2126; - neg.f32 %f2136, %f2126; - add.f32 %f2137, %f2135, %f2135; - fma.rn.f32 %f2138, %f2136, %f449, %f2137; - mul.rn.f32 %f2139, %f2092, %f2138; - add.f32 %f2140, %f2134, %f2126; - sub.f32 %f2141, %f2126, %f2140; - add.f32 %f2142, %f2134, %f2141; - add.f32 %f2143, %f2139, %f2142; - add.f32 %f2144, %f2140, %f2143; - sub.f32 %f2145, %f2140, %f2144; - add.f32 %f2146, %f2143, %f2145; - mul.rn.f32 %f452, %f2125, %f3166; - mul.rn.f32 %f453, %f2125, %f3167; - add.f32 %f2149, %f452, %f2144; - sub.f32 %f2150, %f452, %f2149; - add.f32 %f2151, %f2144, %f2150; - add.f32 %f2152, %f2146, %f2151; - add.f32 %f2153, %f453, %f2152; - add.f32 %f2154, %f2149, %f2153; - sub.f32 %f2155, %f2149, %f2154; - add.f32 %f2156, %f2153, %f2155; - mul.rn.f32 %f2158, %f1788, %f2154; - neg.f32 %f2159, %f2158; - fma.rn.f32 %f2160, %f1788, %f2154, %f2159; - fma.rn.f32 %f2161, %f1788, %f2156, %f2160; - fma.rn.f32 %f2163, %f3163, %f2154, %f2161; - add.rn.f32 %f2164, %f2158, %f2163; - neg.f32 %f2165, %f2164; - add.rn.f32 %f2166, %f2158, %f2165; - add.rn.f32 %f2167, %f2166, %f2163; - mov.b32 %r236, %f2164; - setp.eq.s32 %p229, %r236, 1118925336; - add.s32 %r237, %r236, -1; - mov.b32 %f2168, %r237; - add.f32 %f2169, %f2167, 0f37000000; - selp.f32 %f2170, %f2168, %f2164, %p229; - selp.f32 %f454, %f2169, %f2167, %p229; - mul.f32 %f2171, %f2170, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2172, %f2171; - fma.rn.f32 %f2173, %f2172, %f3158, %f2170; - fma.rn.f32 %f2174, %f2172, %f3159, %f2173; - mul.f32 %f2175, %f2174, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2176, %f2175; - add.f32 %f2177, %f2172, 0f00000000; - ex2.approx.f32 %f2178, %f2177; - mul.f32 %f2179, %f2176, %f2178; - setp.lt.f32 %p230, %f2170, 0fC2D20000; - selp.f32 %f2180, 0f00000000, %f2179, %p230; - setp.gt.f32 %p231, %f2170, 0f42D20000; - selp.f32 %f3353, 0f7F800000, %f2180, %p231; - setp.eq.f32 %p232, %f3353, 0f7F800000; - @%p232 bra BB3_146; - - fma.rn.f32 %f3353, %f3353, %f454, %f3353; - -BB3_146: - setp.lt.f32 %p233, %f447, 0f00000000; - and.pred %p11, %p233, %p188; - mov.b32 %r238, %f3353; - xor.b32 %r239, %r238, -2147483648; - mov.b32 %f2181, %r239; - selp.f32 %f3355, %f2181, %f3353, %p11; - setp.eq.f32 %p235, %f447, 0f00000000; - @%p235 bra BB3_149; - bra.uni BB3_147; - -BB3_149: - add.f32 %f2184, %f447, %f447; - selp.f32 %f3355, %f2184, 0f00000000, %p188; - bra.uni BB3_150; - -BB3_147: - setp.geu.f32 %p236, %f447, 0f00000000; - @%p236 bra BB3_150; - - cvt.rzi.f32.f32 %f2183, %f1788; - setp.neu.f32 %p237, %f2183, 0f40000000; - selp.f32 %f3355, 0f7FFFFFFF, %f3355, %p237; - -BB3_150: - abs.f32 %f3170, %f447; - add.f32 %f2185, %f3170, 0f40000000; - mov.b32 %r47, %f2185; - setp.lt.s32 %p239, %r47, 2139095040; - @%p239 bra BB3_155; - - abs.f32 %f3181, %f447; - setp.gtu.f32 %p240, %f3181, 0f7F800000; - @%p240 bra BB3_154; - bra.uni BB3_152; - -BB3_154: - add.f32 %f3355, %f447, 0f40000000; - bra.uni BB3_155; - -BB3_152: - abs.f32 %f3182, %f447; - setp.neu.f32 %p241, %f3182, 0f7F800000; - @%p241 bra BB3_155; - - selp.f32 %f3355, 0fFF800000, 0f7F800000, %p11; - -BB3_155: - mov.f32 %f3180, 0f35BFBE8E; - mov.f32 %f3179, 0f3F317200; - mov.f32 %f3178, 0f00000000; - mov.f32 %f3177, 0f3DAAAABD; - mov.f32 %f3176, 0f3C4CAF63; - mov.f32 %f3175, 0f3B18F0FE; - mov.f32 %f3174, 0fB5BFBE8E; - mov.f32 %f3173, 0fBF317200; - cvt.rn.f32.s32 %f3172, %r324; - sub.f32 %f3171, %f3172, %f3320; - mul.f32 %f2188, %f3355, 0fBF000000; - setp.eq.f32 %p242, %f447, 0f3F800000; - selp.f32 %f2189, 0fBF000000, %f2188, %p242; - mul.f32 %f2190, %f2189, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2191, %f2190; - fma.rn.f32 %f2193, %f2191, %f3173, %f2189; - fma.rn.f32 %f2195, %f2191, %f3174, %f2193; - mul.f32 %f2196, %f2195, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2197, %f2196; - add.f32 %f2198, %f2191, 0f00000000; - ex2.approx.f32 %f2199, %f2198; - mul.f32 %f2200, %f2197, %f2199; - setp.lt.f32 %p243, %f2189, 0fC2D20000; - selp.f32 %f2201, 0f00000000, %f2200, %p243; - setp.gt.f32 %p244, %f2189, 0f42D20000; - selp.f32 %f465, 0f7F800000, %f2201, %p244; - div.rn.f32 %f466, %f3171, %f3316; - abs.f32 %f467, %f466; - setp.lt.f32 %p245, %f467, 0f00800000; - mul.f32 %f2202, %f467, 0f4B800000; - selp.f32 %f2203, 0fC3170000, 0fC2FE0000, %p245; - selp.f32 %f2204, %f2202, %f467, %p245; - mov.b32 %r240, %f2204; - and.b32 %r241, %r240, 8388607; - or.b32 %r242, %r241, 1065353216; - mov.b32 %f2205, %r242; - shr.u32 %r243, %r240, 23; - cvt.rn.f32.u32 %f2206, %r243; - add.f32 %f2207, %f2203, %f2206; - setp.gt.f32 %p246, %f2205, 0f3FB504F3; - mul.f32 %f2208, %f2205, 0f3F000000; - add.f32 %f2209, %f2207, 0f3F800000; - selp.f32 %f2210, %f2208, %f2205, %p246; - selp.f32 %f2211, %f2209, %f2207, %p246; - add.f32 %f468, %f2210, 0fBF800000; - add.f32 %f2187, %f2210, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2186,%f2187; - // inline asm - add.f32 %f470, %f468, %f468; - mul.f32 %f2212, %f2186, %f470; - mul.f32 %f2213, %f2212, %f2212; - fma.rn.f32 %f2216, %f3175, %f2213, %f3176; - fma.rn.f32 %f2218, %f2216, %f2213, %f3177; - mul.rn.f32 %f2219, %f2218, %f2213; - mul.rn.f32 %f2220, %f2219, %f2212; - sub.f32 %f2221, %f468, %f2212; - neg.f32 %f2222, %f2212; - add.f32 %f2223, %f2221, %f2221; - fma.rn.f32 %f2224, %f2222, %f468, %f2223; - mul.rn.f32 %f2225, %f2186, %f2224; - add.f32 %f2226, %f2220, %f2212; - sub.f32 %f2227, %f2212, %f2226; - add.f32 %f2228, %f2220, %f2227; - add.f32 %f2229, %f2225, %f2228; - add.f32 %f2230, %f2226, %f2229; - sub.f32 %f2231, %f2226, %f2230; - add.f32 %f2232, %f2229, %f2231; - mul.rn.f32 %f471, %f2211, %f3179; - mul.rn.f32 %f472, %f2211, %f3180; - add.f32 %f2235, %f471, %f2230; - sub.f32 %f2236, %f471, %f2235; - add.f32 %f2237, %f2230, %f2236; - add.f32 %f2238, %f2232, %f2237; - add.f32 %f2239, %f472, %f2238; - add.f32 %f2240, %f2235, %f2239; - sub.f32 %f2241, %f2235, %f2240; - add.f32 %f2242, %f2239, %f2241; - mul.rn.f32 %f2244, %f1788, %f2240; - neg.f32 %f2245, %f2244; - fma.rn.f32 %f2246, %f1788, %f2240, %f2245; - fma.rn.f32 %f2247, %f1788, %f2242, %f2246; - fma.rn.f32 %f2249, %f3178, %f2240, %f2247; - add.rn.f32 %f2250, %f2244, %f2249; - neg.f32 %f2251, %f2250; - add.rn.f32 %f2252, %f2244, %f2251; - add.rn.f32 %f2253, %f2252, %f2249; - mov.b32 %r244, %f2250; - setp.eq.s32 %p247, %r244, 1118925336; - add.s32 %r245, %r244, -1; - mov.b32 %f2254, %r245; - add.f32 %f2255, %f2253, 0f37000000; - selp.f32 %f2256, %f2254, %f2250, %p247; - selp.f32 %f473, %f2255, %f2253, %p247; - mul.f32 %f2257, %f2256, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2258, %f2257; - fma.rn.f32 %f2259, %f2258, %f3173, %f2256; - fma.rn.f32 %f2260, %f2258, %f3174, %f2259; - mul.f32 %f2261, %f2260, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2262, %f2261; - add.f32 %f2263, %f2258, 0f00000000; - ex2.approx.f32 %f2264, %f2263; - mul.f32 %f2265, %f2262, %f2264; - setp.lt.f32 %p248, %f2256, 0fC2D20000; - selp.f32 %f2266, 0f00000000, %f2265, %p248; - setp.gt.f32 %p249, %f2256, 0f42D20000; - selp.f32 %f3356, 0f7F800000, %f2266, %p249; - setp.eq.f32 %p250, %f3356, 0f7F800000; - @%p250 bra BB3_157; - - fma.rn.f32 %f3356, %f3356, %f473, %f3356; - -BB3_157: - setp.lt.f32 %p251, %f466, 0f00000000; - and.pred %p12, %p251, %p188; - mov.b32 %r246, %f3356; - xor.b32 %r247, %r246, -2147483648; - mov.b32 %f2267, %r247; - selp.f32 %f3358, %f2267, %f3356, %p12; - setp.eq.f32 %p253, %f466, 0f00000000; - @%p253 bra BB3_160; - bra.uni BB3_158; - -BB3_160: - add.f32 %f2270, %f466, %f466; - selp.f32 %f3358, %f2270, 0f00000000, %p188; - bra.uni BB3_161; - -BB3_158: - setp.geu.f32 %p254, %f466, 0f00000000; - @%p254 bra BB3_161; - - cvt.rzi.f32.f32 %f2269, %f1788; - setp.neu.f32 %p255, %f2269, 0f40000000; - selp.f32 %f3358, 0f7FFFFFFF, %f3358, %p255; - -BB3_161: - abs.f32 %f3095, %f466; - add.f32 %f2271, %f3095, 0f40000000; - mov.b32 %r48, %f2271; - setp.lt.s32 %p257, %r48, 2139095040; - @%p257 bra BB3_166; - - abs.f32 %f3196, %f466; - setp.gtu.f32 %p258, %f3196, 0f7F800000; - @%p258 bra BB3_165; - bra.uni BB3_163; - -BB3_165: - add.f32 %f3358, %f466, 0f40000000; - bra.uni BB3_166; - -BB3_163: - abs.f32 %f3197, %f466; - setp.neu.f32 %p259, %f3197, 0f7F800000; - @%p259 bra BB3_166; - - selp.f32 %f3358, 0fFF800000, 0f7F800000, %p12; - -BB3_166: - mov.f32 %f3101, 0f00000000; - mov.f32 %f3100, 0f3DAAAABD; - mov.f32 %f3099, 0f3C4CAF63; - mov.f32 %f3098, 0f3B18F0FE; - mov.f32 %f3097, 0fB5BFBE8E; - mov.f32 %f3096, 0fBF317200; - mul.f32 %f2274, %f3358, 0fBF000000; - setp.eq.f32 %p260, %f466, 0f3F800000; - selp.f32 %f2275, 0fBF000000, %f2274, %p260; - mul.f32 %f2276, %f2275, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2277, %f2276; - fma.rn.f32 %f2279, %f2277, %f3096, %f2275; - fma.rn.f32 %f2281, %f2277, %f3097, %f2279; - mul.f32 %f2282, %f2281, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2283, %f2282; - add.f32 %f2284, %f2277, 0f00000000; - ex2.approx.f32 %f2285, %f2284; - mul.f32 %f2286, %f2283, %f2285; - setp.lt.f32 %p261, %f2275, 0fC2D20000; - selp.f32 %f2287, 0f00000000, %f2286, %p261; - setp.gt.f32 %p262, %f2275, 0f42D20000; - selp.f32 %f2288, 0f7F800000, %f2287, %p262; - sub.f32 %f2289, %f465, %f2288; - mul.f32 %f2290, %f329, %f2289; - mul.f32 %f484, %f412, %f2290; - // inline asm - rcp.approx.ftz.f32 %f2272,%f346; - // inline asm - mul.f32 %f2291, %f2272, %f347; - mul.f32 %f2292, %f2291, %f2291; - fma.rn.f32 %f2295, %f3098, %f2292, %f3099; - fma.rn.f32 %f2297, %f2295, %f2292, %f3100; - mul.rn.f32 %f2298, %f2297, %f2292; - mul.rn.f32 %f2299, %f2298, %f2291; - sub.f32 %f2300, %f345, %f2291; - neg.f32 %f2301, %f2291; - add.f32 %f2302, %f2300, %f2300; - fma.rn.f32 %f2303, %f2301, %f345, %f2302; - mul.rn.f32 %f2304, %f2272, %f2303; - add.f32 %f2305, %f2299, %f2291; - sub.f32 %f2306, %f2291, %f2305; - add.f32 %f2307, %f2299, %f2306; - add.f32 %f2308, %f2304, %f2307; - add.f32 %f2309, %f2305, %f2308; - sub.f32 %f2310, %f2305, %f2309; - add.f32 %f2311, %f2308, %f2310; - add.f32 %f2312, %f348, %f2309; - sub.f32 %f2313, %f348, %f2312; - add.f32 %f2314, %f2309, %f2313; - add.f32 %f2315, %f2311, %f2314; - add.f32 %f2316, %f349, %f2315; - add.f32 %f2317, %f2312, %f2316; - sub.f32 %f2318, %f2312, %f2317; - add.f32 %f2319, %f2316, %f2318; - mul.rn.f32 %f2321, %f1788, %f2317; - neg.f32 %f2322, %f2321; - fma.rn.f32 %f2323, %f1788, %f2317, %f2322; - fma.rn.f32 %f2324, %f1788, %f2319, %f2323; - fma.rn.f32 %f2326, %f3101, %f2317, %f2324; - add.rn.f32 %f2327, %f2321, %f2326; - neg.f32 %f2328, %f2327; - add.rn.f32 %f2329, %f2321, %f2328; - add.rn.f32 %f2330, %f2329, %f2326; - mov.b32 %r248, %f2327; - setp.eq.s32 %p263, %r248, 1118925336; - add.s32 %r249, %r248, -1; - mov.b32 %f2331, %r249; - add.f32 %f2332, %f2330, 0f37000000; - selp.f32 %f2333, %f2331, %f2327, %p263; - selp.f32 %f485, %f2332, %f2330, %p263; - mul.f32 %f2334, %f2333, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2335, %f2334; - fma.rn.f32 %f2336, %f2335, %f3096, %f2333; - fma.rn.f32 %f2337, %f2335, %f3097, %f2336; - mul.f32 %f2338, %f2337, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2339, %f2338; - add.f32 %f2340, %f2335, 0f00000000; - ex2.approx.f32 %f2341, %f2340; - mul.f32 %f2342, %f2339, %f2341; - setp.lt.f32 %p264, %f2333, 0fC2D20000; - selp.f32 %f2343, 0f00000000, %f2342, %p264; - setp.gt.f32 %p265, %f2333, 0f42D20000; - selp.f32 %f3359, 0f7F800000, %f2343, %p265; - setp.eq.f32 %p266, %f3359, 0f7F800000; - @%p266 bra BB3_168; - - fma.rn.f32 %f3359, %f3359, %f485, %f3359; - -BB3_168: - setp.eq.f32 %p356, %f342, 0f00000000; - setp.geu.f32 %p355, %f342, 0f00000000; - mov.b32 %r250, %f3359; - xor.b32 %r251, %r250, -2147483648; - mov.b32 %f2344, %r251; - selp.f32 %f489, %f2344, %f3359, %p7; - selp.f32 %f3360, %f350, %f489, %p356; - @%p355 bra BB3_170; - - cvt.rzi.f32.f32 %f2346, %f1788; - setp.neu.f32 %p268, %f2346, 0f40000000; - selp.f32 %f3360, 0f7FFFFFFF, %f489, %p268; - -BB3_170: - abs.f32 %f3111, %f342; - setp.eq.f32 %p360, %f342, 0f3F800000; - add.f32 %f3110, %f3111, 0f40000000; - mov.b32 %r299, %f3110; - setp.gt.s32 %p359, %r299, 2139095039; - setp.neu.f32 %p358, %f3111, 0f7F800000; - setp.gtu.f32 %p357, %f3111, 0f7F800000; - mov.f32 %f3109, 0f00000000; - mov.f32 %f3108, 0f3DAAAABD; - mov.f32 %f3107, 0f3C4CAF63; - mov.f32 %f3106, 0f3B18F0FE; - mov.f32 %f3105, 0fB5BFBE8E; - mov.f32 %f3104, 0fBF317200; - selp.f32 %f3103, 0fFF800000, 0f7F800000, %p7; - add.f32 %f3102, %f342, 0f40000000; - selp.f32 %f2349, %f3102, %f3360, %p357; - selp.f32 %f2350, %f2349, %f3103, %p358; - selp.f32 %f2351, %f2350, %f3360, %p359; - mul.f32 %f2352, %f2351, 0fBF000000; - selp.f32 %f2353, 0fBF000000, %f2352, %p360; - mul.f32 %f2354, %f2353, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2355, %f2354; - fma.rn.f32 %f2357, %f2355, %f3104, %f2353; - fma.rn.f32 %f2359, %f2355, %f3105, %f2357; - mul.f32 %f2360, %f2359, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2361, %f2360; - add.f32 %f2362, %f2355, 0f00000000; - ex2.approx.f32 %f2363, %f2362; - mul.f32 %f2364, %f2361, %f2363; - setp.lt.f32 %p273, %f2353, 0fC2D20000; - selp.f32 %f2365, 0f00000000, %f2364, %p273; - setp.gt.f32 %p274, %f2353, 0f42D20000; - selp.f32 %f493, 0f7F800000, %f2365, %p274; - // inline asm - rcp.approx.ftz.f32 %f2347,%f356; - // inline asm - mul.f32 %f2366, %f2347, %f357; - mul.f32 %f2367, %f2366, %f2366; - fma.rn.f32 %f2370, %f3106, %f2367, %f3107; - fma.rn.f32 %f2372, %f2370, %f2367, %f3108; - mul.rn.f32 %f2373, %f2372, %f2367; - mul.rn.f32 %f2374, %f2373, %f2366; - sub.f32 %f2375, %f355, %f2366; - neg.f32 %f2376, %f2366; - add.f32 %f2377, %f2375, %f2375; - fma.rn.f32 %f2378, %f2376, %f355, %f2377; - mul.rn.f32 %f2379, %f2347, %f2378; - add.f32 %f2380, %f2374, %f2366; - sub.f32 %f2381, %f2366, %f2380; - add.f32 %f2382, %f2374, %f2381; - add.f32 %f2383, %f2379, %f2382; - add.f32 %f2384, %f2380, %f2383; - sub.f32 %f2385, %f2380, %f2384; - add.f32 %f2386, %f2383, %f2385; - add.f32 %f2387, %f358, %f2384; - sub.f32 %f2388, %f358, %f2387; - add.f32 %f2389, %f2384, %f2388; + setp.lt.f32 %p686, %f417, 0f00800000; + mul.f32 %f2260, %f417, 0f4B800000; + selp.f32 %f2261, %f2260, %f417, %p686; + selp.f32 %f2262, 0fC3170000, 0fC2FE0000, %p686; + mov.b32 %r781, %f2261; + and.b32 %r782, %r781, 8388607; + or.b32 %r783, %r782, 1065353216; + mov.b32 %f2263, %r783; + shr.u32 %r784, %r781, 23; + cvt.rn.f32.u32 %f2264, %r784; + add.f32 %f2265, %f2262, %f2264; + setp.gt.f32 %p687, %f2263, 0f3FB504F3; + mul.f32 %f2266, %f2263, 0f3F000000; + add.f32 %f2267, %f2265, 0f3F800000; + selp.f32 %f2268, %f2267, %f2265, %p687; + selp.f32 %f2269, %f2266, %f2263, %p687; + add.f32 %f2270, %f2269, 0fBF800000; + add.f32 %f2271, %f2269, 0f3F800000; + rcp.approx.ftz.f32 %f2272, %f2271; + add.f32 %f2273, %f2270, %f2270; + mul.f32 %f2275, %f2273, %f2272; + mul.f32 %f2276, %f2275, %f2275; + fma.rn.f32 %f2279, %f1645, %f2276, %f1644; + fma.rn.f32 %f2281, %f2279, %f2276, %f1647; + mul.rn.f32 %f2282, %f2281, %f2276; + mul.rn.f32 %f2283, %f2282, %f2275; + sub.f32 %f2284, %f2270, %f2275; + add.f32 %f2285, %f2284, %f2284; + neg.f32 %f2286, %f2275; + fma.rn.f32 %f2287, %f2286, %f2270, %f2285; + mul.rn.f32 %f2288, %f2272, %f2287; + add.f32 %f2289, %f2283, %f2275; + sub.f32 %f2290, %f2275, %f2289; + add.f32 %f2291, %f2283, %f2290; + add.f32 %f2292, %f2288, %f2291; + add.f32 %f2293, %f2289, %f2292; + sub.f32 %f2294, %f2289, %f2293; + add.f32 %f2295, %f2292, %f2294; + mul.rn.f32 %f2297, %f2268, %f1663; + mul.rn.f32 %f2299, %f2268, %f1665; + add.f32 %f2300, %f2297, %f2293; + sub.f32 %f2301, %f2297, %f2300; + add.f32 %f2302, %f2293, %f2301; + add.f32 %f2303, %f2295, %f2302; + add.f32 %f2304, %f2299, %f2303; + add.f32 %f2305, %f2300, %f2304; + sub.f32 %f2306, %f2300, %f2305; + add.f32 %f2307, %f2304, %f2306; + mul.rn.f32 %f2308, %f1584, %f2305; + neg.f32 %f2309, %f2308; + fma.rn.f32 %f2310, %f1584, %f2305, %f2309; + fma.rn.f32 %f2311, %f1584, %f2307, %f2310; + fma.rn.f32 %f2313, %f2062, %f2305, %f2311; + add.rn.f32 %f2314, %f2308, %f2313; + neg.f32 %f2315, %f2314; + add.rn.f32 %f2316, %f2308, %f2315; + add.rn.f32 %f2317, %f2316, %f2313; + mov.b32 %r785, %f2314; + setp.eq.s32 %p688, %r785, 1118925336; + add.s32 %r786, %r785, -1; + mov.b32 %f2318, %r786; + add.f32 %f2319, %f2317, 0f37000000; + selp.f32 %f418, %f2319, %f2317, %p688; + selp.f32 %f2320, %f2318, %f2314, %p688; + mul.rn.f32 %f2321, %f2320, %f1688; + cvt.rzi.f32.f32 %f2322, %f2321; + abs.f32 %f2323, %f2322; + setp.gt.f32 %p689, %f2323, 0f42FC0000; + mov.b32 %r787, %f2322; + and.b32 %r788, %r787, -2147483648; + or.b32 %r789, %r788, 1123811328; + mov.b32 %f2324, %r789; + selp.f32 %f2325, %f2324, %f2322, %p689; + fma.rn.f32 %f2327, %f2325, %f1694, %f2320; + fma.rn.f32 %f2329, %f2325, %f1696, %f2327; + mul.f32 %f2330, %f2329, 0f3FB8AA3B; + add.f32 %f2331, %f2325, 0f4B40007F; + mov.b32 %r790, %f2331; + shl.b32 %r791, %r790, 23; + mov.b32 %f2332, %r791; + ex2.approx.ftz.f32 %f2333, %f2330; + mul.f32 %f419, %f2333, %f2332; + setp.eq.f32 %p690, %f419, 0f7F800000; + @%p690 bra $L__BB3_430; + + fma.rn.f32 %f3111, %f419, %f418, %f419; + +$L__BB3_430: + setp.lt.f32 %p691, %f416, 0f00000000; + and.pred %p41, %p691, %p606; + setp.eq.f32 %p693, %f416, 0f00000000; + @%p693 bra $L__BB3_434; + bra.uni $L__BB3_431; + +$L__BB3_434: + add.f32 %f2338, %f416, %f416; + selp.f32 %f3113, %f2338, 0f00000000, %p606; + bra.uni $L__BB3_435; + +$L__BB3_431: + mov.b32 %r792, %f3111; + xor.b32 %r793, %r792, -2147483648; + mov.b32 %f2334, %r793; + selp.f32 %f3113, %f2334, %f3111, %p41; + setp.geu.f32 %p694, %f416, 0f00000000; + @%p694 bra $L__BB3_435; + + cvt.rzi.f32.f32 %f2336, %f1584; + setp.eq.f32 %p695, %f2336, 0f40000000; + @%p695 bra $L__BB3_435; + + mov.f32 %f3113, 0f7FFFFFFF; + +$L__BB3_435: + add.f32 %f2339, %f417, 0f40000000; + mov.b32 %r794, %f2339; + setp.lt.s32 %p697, %r794, 2139095040; + @%p697 bra $L__BB3_440; + + setp.gtu.f32 %p698, %f417, 0f7F800000; + @%p698 bra $L__BB3_439; + bra.uni $L__BB3_437; + +$L__BB3_439: + add.f32 %f3113, %f416, 0f40000000; + bra.uni $L__BB3_440; + +$L__BB3_437: + setp.neu.f32 %p699, %f417, 0f7F800000; + @%p699 bra $L__BB3_440; + + selp.f32 %f3113, 0fFF800000, 0f7F800000, %p41; + +$L__BB3_440: + mul.f32 %f2341, %f3113, 0fBF000000; + setp.eq.f32 %p700, %f416, 0f3F800000; + selp.f32 %f2342, 0fBF000000, %f2341, %p700; + fma.rn.f32 %f2345, %f2342, %f1976, %f1575; + cvt.sat.f32.f32 %f2348, %f2345; + fma.rm.f32 %f2350, %f2348, %f1979, %f1981; + add.f32 %f2351, %f2350, 0fCB40007F; + neg.f32 %f2352, %f2351; + fma.rn.f32 %f2353, %f2342, %f1688, %f2352; + fma.rn.f32 %f2355, %f2342, %f1994, %f2353; + mov.b32 %r795, %f2350; + shl.b32 %r796, %r795, 23; + mov.b32 %f2356, %r796; + ex2.approx.ftz.f32 %f2357, %f2355; + mul.f32 %f428, %f2357, %f2356; + div.rn.f32 %f429, %f354, %f3043; + abs.f32 %f430, %f429; + setp.lt.f32 %p701, %f430, 0f00800000; + mul.f32 %f2358, %f430, 0f4B800000; + selp.f32 %f2359, %f2358, %f430, %p701; + selp.f32 %f2360, 0fC3170000, 0fC2FE0000, %p701; + mov.b32 %r797, %f2359; + and.b32 %r798, %r797, 8388607; + or.b32 %r799, %r798, 1065353216; + mov.b32 %f2361, %r799; + shr.u32 %r800, %r797, 23; + cvt.rn.f32.u32 %f2362, %r800; + add.f32 %f2363, %f2360, %f2362; + setp.gt.f32 %p702, %f2361, 0f3FB504F3; + mul.f32 %f2364, %f2361, 0f3F000000; + add.f32 %f2365, %f2363, 0f3F800000; + selp.f32 %f2366, %f2365, %f2363, %p702; + selp.f32 %f2367, %f2364, %f2361, %p702; + add.f32 %f2368, %f2367, 0fBF800000; + add.f32 %f2369, %f2367, 0f3F800000; + rcp.approx.ftz.f32 %f2370, %f2369; + add.f32 %f2371, %f2368, %f2368; + mul.f32 %f2373, %f2371, %f2370; + mul.f32 %f2374, %f2373, %f2373; + fma.rn.f32 %f2377, %f1645, %f2374, %f1644; + fma.rn.f32 %f2379, %f2377, %f2374, %f1647; + mul.rn.f32 %f2380, %f2379, %f2374; + mul.rn.f32 %f2381, %f2380, %f2373; + sub.f32 %f2382, %f2368, %f2373; + add.f32 %f2383, %f2382, %f2382; + neg.f32 %f2384, %f2373; + fma.rn.f32 %f2385, %f2384, %f2368, %f2383; + mul.rn.f32 %f2386, %f2370, %f2385; + add.f32 %f2387, %f2381, %f2373; + sub.f32 %f2388, %f2373, %f2387; + add.f32 %f2389, %f2381, %f2388; add.f32 %f2390, %f2386, %f2389; - add.f32 %f2391, %f359, %f2390; - add.f32 %f2392, %f2387, %f2391; - sub.f32 %f2393, %f2387, %f2392; - add.f32 %f2394, %f2391, %f2393; - mul.rn.f32 %f2396, %f1788, %f2392; - neg.f32 %f2397, %f2396; - fma.rn.f32 %f2398, %f1788, %f2392, %f2397; - fma.rn.f32 %f2399, %f1788, %f2394, %f2398; - fma.rn.f32 %f2401, %f3109, %f2392, %f2399; - add.rn.f32 %f2402, %f2396, %f2401; - neg.f32 %f2403, %f2402; - add.rn.f32 %f2404, %f2396, %f2403; - add.rn.f32 %f2405, %f2404, %f2401; - mov.b32 %r252, %f2402; - setp.eq.s32 %p275, %r252, 1118925336; - add.s32 %r253, %r252, -1; - mov.b32 %f2406, %r253; - add.f32 %f2407, %f2405, 0f37000000; - selp.f32 %f2408, %f2406, %f2402, %p275; - selp.f32 %f494, %f2407, %f2405, %p275; - mul.f32 %f2409, %f2408, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2410, %f2409; - fma.rn.f32 %f2411, %f2410, %f3104, %f2408; - fma.rn.f32 %f2412, %f2410, %f3105, %f2411; - mul.f32 %f2413, %f2412, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2414, %f2413; - add.f32 %f2415, %f2410, 0f00000000; - ex2.approx.f32 %f2416, %f2415; - mul.f32 %f2417, %f2414, %f2416; - setp.lt.f32 %p276, %f2408, 0fC2D20000; - selp.f32 %f2418, 0f00000000, %f2417, %p276; - setp.gt.f32 %p277, %f2408, 0f42D20000; - selp.f32 %f3361, 0f7F800000, %f2418, %p277; - setp.eq.f32 %p278, %f3361, 0f7F800000; - @%p278 bra BB3_172; - - fma.rn.f32 %f3361, %f3361, %f494, %f3361; - -BB3_172: - setp.eq.f32 %p362, %f353, 0f00000000; - setp.geu.f32 %p361, %f353, 0f00000000; - mov.b32 %r254, %f3361; - xor.b32 %r255, %r254, -2147483648; - mov.b32 %f2419, %r255; - selp.f32 %f498, %f2419, %f3361, %p8; - selp.f32 %f3362, %f360, %f498, %p362; - @%p361 bra BB3_174; - - cvt.rzi.f32.f32 %f2421, %f1788; - setp.neu.f32 %p280, %f2421, 0f40000000; - selp.f32 %f3362, 0f7FFFFFFF, %f498, %p280; - -BB3_174: - abs.f32 %f3124, %f353; - cvt.rn.f32.s32 %f3123, %r323; - sub.f32 %f3122, %f3123, %f3321; - add.f32 %f3121, %f3122, 0f3F800000; - setp.eq.f32 %p366, %f353, 0f3F800000; - add.f32 %f3120, %f3124, 0f40000000; - mov.b32 %r300, %f3120; - setp.gt.s32 %p365, %r300, 2139095039; - setp.neu.f32 %p364, %f3124, 0f7F800000; - setp.gtu.f32 %p363, %f3124, 0f7F800000; - selp.f32 %f3119, 0fFF800000, 0f7F800000, %p8; - add.f32 %f3118, %f353, 0f40000000; - mov.f32 %f3117, 0f00000000; - mov.f32 %f3116, 0f3DAAAABD; - mov.f32 %f3115, 0f3C4CAF63; - mov.f32 %f3114, 0f3B18F0FE; - mov.f32 %f3113, 0fB5BFBE8E; - mov.f32 %f3112, 0fBF317200; - selp.f32 %f2424, %f3118, %f3362, %p363; - selp.f32 %f2425, %f2424, %f3119, %p364; - selp.f32 %f2426, %f2425, %f3362, %p365; - mul.f32 %f2427, %f2426, 0fBF000000; - selp.f32 %f2428, 0fBF000000, %f2427, %p366; - mul.f32 %f2429, %f2428, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2430, %f2429; - fma.rn.f32 %f2432, %f2430, %f3112, %f2428; - fma.rn.f32 %f2434, %f2430, %f3113, %f2432; - mul.f32 %f2435, %f2434, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2436, %f2435; - add.f32 %f2437, %f2430, 0f00000000; - ex2.approx.f32 %f2438, %f2437; - mul.f32 %f2439, %f2436, %f2438; - setp.lt.f32 %p285, %f2428, 0fC2D20000; - selp.f32 %f2440, 0f00000000, %f2439, %p285; - setp.gt.f32 %p286, %f2428, 0f42D20000; - selp.f32 %f2441, 0f7F800000, %f2440, %p286; - mul.f32 %f2442, %f3122, %f2441; - mul.f32 %f2443, %f3121, %f493; - sub.f32 %f2444, %f2443, %f2442; - mul.f32 %f2445, %f330, %f2444; - mul.f32 %f502, %f426, %f2445; - // inline asm - rcp.approx.ftz.f32 %f2422,%f2093; - // inline asm - mul.f32 %f2446, %f2422, %f451; - mul.f32 %f2447, %f2446, %f2446; - fma.rn.f32 %f2450, %f3114, %f2447, %f3115; - fma.rn.f32 %f2452, %f2450, %f2447, %f3116; - mul.rn.f32 %f2453, %f2452, %f2447; - mul.rn.f32 %f2454, %f2453, %f2446; - sub.f32 %f2455, %f449, %f2446; - neg.f32 %f2456, %f2446; - add.f32 %f2457, %f2455, %f2455; - fma.rn.f32 %f2458, %f2456, %f449, %f2457; - mul.rn.f32 %f2459, %f2422, %f2458; - add.f32 %f2460, %f2454, %f2446; - sub.f32 %f2461, %f2446, %f2460; - add.f32 %f2462, %f2454, %f2461; - add.f32 %f2463, %f2459, %f2462; - add.f32 %f2464, %f2460, %f2463; - sub.f32 %f2465, %f2460, %f2464; - add.f32 %f2466, %f2463, %f2465; - add.f32 %f2467, %f452, %f2464; - sub.f32 %f2468, %f452, %f2467; - add.f32 %f2469, %f2464, %f2468; - add.f32 %f2470, %f2466, %f2469; - add.f32 %f2471, %f453, %f2470; - add.f32 %f2472, %f2467, %f2471; - sub.f32 %f2473, %f2467, %f2472; - add.f32 %f2474, %f2471, %f2473; - mul.rn.f32 %f2476, %f1788, %f2472; - neg.f32 %f2477, %f2476; - fma.rn.f32 %f2478, %f1788, %f2472, %f2477; - fma.rn.f32 %f2479, %f1788, %f2474, %f2478; - fma.rn.f32 %f2481, %f3117, %f2472, %f2479; - add.rn.f32 %f2482, %f2476, %f2481; - neg.f32 %f2483, %f2482; - add.rn.f32 %f2484, %f2476, %f2483; - add.rn.f32 %f2485, %f2484, %f2481; - mov.b32 %r256, %f2482; - setp.eq.s32 %p287, %r256, 1118925336; - add.s32 %r257, %r256, -1; - mov.b32 %f2486, %r257; - add.f32 %f2487, %f2485, 0f37000000; - selp.f32 %f2488, %f2486, %f2482, %p287; - selp.f32 %f503, %f2487, %f2485, %p287; - mul.f32 %f2489, %f2488, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2490, %f2489; - fma.rn.f32 %f2491, %f2490, %f3112, %f2488; - fma.rn.f32 %f2492, %f2490, %f3113, %f2491; - mul.f32 %f2493, %f2492, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2494, %f2493; - add.f32 %f2495, %f2490, 0f00000000; - ex2.approx.f32 %f2496, %f2495; - mul.f32 %f2497, %f2494, %f2496; - setp.lt.f32 %p288, %f2488, 0fC2D20000; - selp.f32 %f2498, 0f00000000, %f2497, %p288; - setp.gt.f32 %p289, %f2488, 0f42D20000; - selp.f32 %f3363, 0f7F800000, %f2498, %p289; - setp.eq.f32 %p290, %f3363, 0f7F800000; - @%p290 bra BB3_176; - - fma.rn.f32 %f3363, %f3363, %f503, %f3363; - -BB3_176: - setp.eq.f32 %p367, %f447, 0f00000000; - mov.b32 %r258, %f3363; - xor.b32 %r259, %r258, -2147483648; - mov.b32 %f2499, %r259; - selp.f32 %f3365, %f2499, %f3363, %p11; - @%p367 bra BB3_179; - bra.uni BB3_177; - -BB3_179: - add.f32 %f2502, %f447, %f447; - selp.f32 %f3365, %f2502, 0f00000000, %p188; - bra.uni BB3_180; - -BB3_177: - setp.geu.f32 %p292, %f447, 0f00000000; - @%p292 bra BB3_180; - - cvt.rzi.f32.f32 %f2501, %f1788; - setp.neu.f32 %p293, %f2501, 0f40000000; - selp.f32 %f3365, 0f7FFFFFFF, %f3365, %p293; - -BB3_180: - abs.f32 %f3126, %f447; - add.f32 %f3125, %f3126, 0f40000000; - mov.b32 %r301, %f3125; - setp.lt.s32 %p368, %r301, 2139095040; - @%p368 bra BB3_185; - - abs.f32 %f3194, %f447; - setp.gtu.f32 %p296, %f3194, 0f7F800000; - @%p296 bra BB3_184; - bra.uni BB3_182; - -BB3_184: - add.f32 %f3365, %f447, 0f40000000; - bra.uni BB3_185; - -BB3_182: - abs.f32 %f3195, %f447; - setp.neu.f32 %p297, %f3195, 0f7F800000; - @%p297 bra BB3_185; - - selp.f32 %f3365, 0fFF800000, 0f7F800000, %p11; - -BB3_185: - setp.eq.f32 %p369, %f447, 0f3F800000; - mov.f32 %f3132, 0f00000000; - mov.f32 %f3131, 0f3DAAAABD; - mov.f32 %f3130, 0f3C4CAF63; - mov.f32 %f3129, 0f3B18F0FE; - mov.f32 %f3128, 0fB5BFBE8E; - mov.f32 %f3127, 0fBF317200; - mul.f32 %f2505, %f3365, 0fBF000000; - selp.f32 %f2506, 0fBF000000, %f2505, %p369; - mul.f32 %f2507, %f2506, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2508, %f2507; - fma.rn.f32 %f2510, %f2508, %f3127, %f2506; - fma.rn.f32 %f2512, %f2508, %f3128, %f2510; - mul.f32 %f2513, %f2512, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2514, %f2513; - add.f32 %f2515, %f2508, 0f00000000; - ex2.approx.f32 %f2516, %f2515; - mul.f32 %f2517, %f2514, %f2516; - setp.lt.f32 %p299, %f2506, 0fC2D20000; - selp.f32 %f2518, 0f00000000, %f2517, %p299; - setp.gt.f32 %p300, %f2506, 0f42D20000; - selp.f32 %f514, 0f7F800000, %f2518, %p300; - // inline asm - rcp.approx.ftz.f32 %f2503,%f2187; - // inline asm - mul.f32 %f2519, %f2503, %f470; - mul.f32 %f2520, %f2519, %f2519; - fma.rn.f32 %f2523, %f3129, %f2520, %f3130; - fma.rn.f32 %f2525, %f2523, %f2520, %f3131; - mul.rn.f32 %f2526, %f2525, %f2520; - mul.rn.f32 %f2527, %f2526, %f2519; - sub.f32 %f2528, %f468, %f2519; - neg.f32 %f2529, %f2519; - add.f32 %f2530, %f2528, %f2528; - fma.rn.f32 %f2531, %f2529, %f468, %f2530; - mul.rn.f32 %f2532, %f2503, %f2531; - add.f32 %f2533, %f2527, %f2519; - sub.f32 %f2534, %f2519, %f2533; - add.f32 %f2535, %f2527, %f2534; - add.f32 %f2536, %f2532, %f2535; - add.f32 %f2537, %f2533, %f2536; - sub.f32 %f2538, %f2533, %f2537; - add.f32 %f2539, %f2536, %f2538; - add.f32 %f2540, %f471, %f2537; - sub.f32 %f2541, %f471, %f2540; - add.f32 %f2542, %f2537, %f2541; - add.f32 %f2543, %f2539, %f2542; - add.f32 %f2544, %f472, %f2543; - add.f32 %f2545, %f2540, %f2544; - sub.f32 %f2546, %f2540, %f2545; - add.f32 %f2547, %f2544, %f2546; - mul.rn.f32 %f2549, %f1788, %f2545; - neg.f32 %f2550, %f2549; - fma.rn.f32 %f2551, %f1788, %f2545, %f2550; - fma.rn.f32 %f2552, %f1788, %f2547, %f2551; - fma.rn.f32 %f2554, %f3132, %f2545, %f2552; - add.rn.f32 %f2555, %f2549, %f2554; - neg.f32 %f2556, %f2555; - add.rn.f32 %f2557, %f2549, %f2556; - add.rn.f32 %f2558, %f2557, %f2554; - mov.b32 %r260, %f2555; - setp.eq.s32 %p301, %r260, 1118925336; - add.s32 %r261, %r260, -1; - mov.b32 %f2559, %r261; - add.f32 %f2560, %f2558, 0f37000000; - selp.f32 %f2561, %f2559, %f2555, %p301; - selp.f32 %f515, %f2560, %f2558, %p301; - mul.f32 %f2562, %f2561, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2563, %f2562; - fma.rn.f32 %f2564, %f2563, %f3127, %f2561; - fma.rn.f32 %f2565, %f2563, %f3128, %f2564; - mul.f32 %f2566, %f2565, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2567, %f2566; - add.f32 %f2568, %f2563, 0f00000000; - ex2.approx.f32 %f2569, %f2568; - mul.f32 %f2570, %f2567, %f2569; - setp.lt.f32 %p302, %f2561, 0fC2D20000; - selp.f32 %f2571, 0f00000000, %f2570, %p302; - setp.gt.f32 %p303, %f2561, 0f42D20000; - selp.f32 %f3366, 0f7F800000, %f2571, %p303; - setp.eq.f32 %p304, %f3366, 0f7F800000; - @%p304 bra BB3_187; - - fma.rn.f32 %f3366, %f3366, %f515, %f3366; - -BB3_187: - setp.eq.f32 %p372, %f466, 0f00000000; - mov.b32 %r262, %f3366; - xor.b32 %r263, %r262, -2147483648; - mov.b32 %f2572, %r263; - selp.f32 %f3368, %f2572, %f3366, %p12; - @%p372 bra BB3_190; - bra.uni BB3_188; - -BB3_190: - add.f32 %f2575, %f466, %f466; - selp.f32 %f3368, %f2575, 0f00000000, %p188; - bra.uni BB3_191; - -BB3_188: - setp.geu.f32 %p306, %f466, 0f00000000; - @%p306 bra BB3_191; - - cvt.rzi.f32.f32 %f2574, %f1788; - setp.neu.f32 %p307, %f2574, 0f40000000; - selp.f32 %f3368, 0f7FFFFFFF, %f3368, %p307; - -BB3_191: - abs.f32 %f3199, %f466; - add.f32 %f3198, %f3199, 0f40000000; - mov.b32 %r306, %f3198; - setp.lt.s32 %p373, %r306, 2139095040; - @%p373 bra BB3_196; - - abs.f32 %f3192, %f466; - setp.gtu.f32 %p310, %f3192, 0f7F800000; - @%p310 bra BB3_195; - bra.uni BB3_193; - -BB3_195: - add.f32 %f3368, %f466, 0f40000000; - bra.uni BB3_196; - -BB3_193: - abs.f32 %f3193, %f466; - setp.neu.f32 %p311, %f3193, 0f7F800000; - @%p311 bra BB3_196; - - selp.f32 %f3368, 0fFF800000, 0f7F800000, %p12; - -BB3_196: - setp.eq.f32 %p374, %f466, 0f3F800000; - cvt.rn.f32.s32 %f3137, %r324; - sub.f32 %f3136, %f3137, %f3320; - add.f32 %f3135, %f3136, 0f3F800000; - mov.f32 %f3134, 0fB5BFBE8E; - mov.f32 %f3133, 0fBF317200; - mul.f32 %f2576, %f3368, 0fBF000000; - selp.f32 %f2577, 0fBF000000, %f2576, %p374; - mul.f32 %f2578, %f2577, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2579, %f2578; - fma.rn.f32 %f2581, %f2579, %f3133, %f2577; - fma.rn.f32 %f2583, %f2579, %f3134, %f2581; - mul.f32 %f2584, %f2583, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2585, %f2584; - add.f32 %f2586, %f2579, 0f00000000; - ex2.approx.f32 %f2587, %f2586; - mul.f32 %f2588, %f2585, %f2587; - setp.lt.f32 %p313, %f2577, 0fC2D20000; - selp.f32 %f2589, 0f00000000, %f2588, %p313; - setp.gt.f32 %p314, %f2577, 0f42D20000; - selp.f32 %f2590, 0f7F800000, %f2589, %p314; - mul.f32 %f2591, %f3136, %f2590; - mul.f32 %f2592, %f3135, %f514; - sub.f32 %f2593, %f2592, %f2591; - mul.f32 %f2594, %f331, %f2593; - mul.f32 %f2595, %f412, %f2594; - mul.f32 %f2596, %f446, %f446; - div.rn.f32 %f2597, %f2596, %f427; - add.f32 %f3343, %f2597, %f3343; - mul.f32 %f2598, %f484, %f446; - div.rn.f32 %f2599, %f2598, %f427; - add.f32 %f3342, %f2599, %f3342; - mul.f32 %f2600, %f412, %f426; - mul.f32 %f2601, %f2600, %f446; - div.rn.f32 %f2602, %f2601, %f427; - add.f32 %f3341, %f2602, %f3341; - div.rn.f32 %f2603, %f446, %f427; - add.f32 %f3340, %f2603, %f3340; - mul.f32 %f2604, %f502, %f446; - div.rn.f32 %f2605, %f2604, %f427; - add.f32 %f3339, %f2605, %f3339; - mul.f32 %f2606, %f2595, %f446; - div.rn.f32 %f2607, %f2606, %f427; - add.f32 %f3338, %f2607, %f3338; - mul.f32 %f2608, %f484, %f484; - div.rn.f32 %f2609, %f2608, %f427; - add.f32 %f3337, %f2609, %f3337; - mul.f32 %f2610, %f2600, %f484; - div.rn.f32 %f2611, %f2610, %f427; - add.f32 %f3336, %f2611, %f3336; - div.rn.f32 %f2612, %f484, %f427; - add.f32 %f3335, %f2612, %f3335; - mul.f32 %f2613, %f502, %f484; - div.rn.f32 %f2614, %f2613, %f427; - add.f32 %f3334, %f2614, %f3334; - mul.f32 %f2615, %f2595, %f484; - div.rn.f32 %f2616, %f2615, %f427; - add.f32 %f3333, %f2616, %f3333; - mul.f32 %f2617, %f2600, %f2600; - div.rn.f32 %f2618, %f2617, %f427; - add.f32 %f3332, %f2618, %f3332; - div.rn.f32 %f2619, %f2600, %f427; - add.f32 %f3331, %f2619, %f3331; - mul.f32 %f2620, %f502, %f2600; - div.rn.f32 %f2621, %f2620, %f427; - add.f32 %f3330, %f2621, %f3330; - mul.f32 %f2622, %f2595, %f2600; - div.rn.f32 %f2623, %f2622, %f427; - add.f32 %f3329, %f2623, %f3329; - rcp.rn.f32 %f2624, %f427; - add.f32 %f3328, %f2624, %f3328; - div.rn.f32 %f2625, %f502, %f427; - add.f32 %f3327, %f2625, %f3327; - div.rn.f32 %f2626, %f2595, %f427; - add.f32 %f3326, %f2626, %f3326; - mul.f32 %f2627, %f502, %f502; - div.rn.f32 %f2628, %f2627, %f427; - add.f32 %f3325, %f2628, %f3325; - mul.f32 %f2629, %f2595, %f502; - div.rn.f32 %f2630, %f2629, %f427; - add.f32 %f3324, %f2630, %f3324; - mul.f32 %f2631, %f2595, %f2595; - div.rn.f32 %f2632, %f2631, %f427; - add.f32 %f3323, %f2632, %f3323; - setp.leu.f32 %p315, %f427, 0f00000000; - @%p315 bra BB3_204; - - setp.gt.f32 %p316, %f428, 0f00000000; - @%p316 bra BB3_199; - bra.uni BB3_198; - -BB3_199: - setp.lt.f32 %p317, %f427, 0f00800000; - mul.f32 %f2633, %f427, 0f4B000000; - selp.f32 %f548, %f2633, %f427, %p317; - selp.f32 %f2634, 0fC1B80000, 0f00000000, %p317; - mov.b32 %r264, %f548; - add.s32 %r265, %r264, -1059760811; - and.b32 %r266, %r265, -8388608; - sub.s32 %r267, %r264, %r266; - mov.b32 %f2635, %r267; - cvt.rn.f32.s32 %f2636, %r266; - mov.f32 %f2637, 0f34000000; - fma.rn.f32 %f2638, %f2636, %f2637, %f2634; - add.f32 %f2639, %f2635, 0fBF800000; - mov.f32 %f2640, 0f3E1039F6; - mov.f32 %f2641, 0fBE055027; - fma.rn.f32 %f2642, %f2641, %f2639, %f2640; - mov.f32 %f2643, 0fBDF8CDCC; - fma.rn.f32 %f2644, %f2642, %f2639, %f2643; - mov.f32 %f2645, 0f3E0F2955; - fma.rn.f32 %f2646, %f2644, %f2639, %f2645; - mov.f32 %f2647, 0fBE2AD8B9; - fma.rn.f32 %f2648, %f2646, %f2639, %f2647; - mov.f32 %f2649, 0f3E4CED0B; - fma.rn.f32 %f2650, %f2648, %f2639, %f2649; - mov.f32 %f2651, 0fBE7FFF22; - fma.rn.f32 %f2652, %f2650, %f2639, %f2651; - mov.f32 %f2653, 0f3EAAAA78; - fma.rn.f32 %f2654, %f2652, %f2639, %f2653; - mov.f32 %f2655, 0fBF000000; - fma.rn.f32 %f2656, %f2654, %f2639, %f2655; - mul.f32 %f2657, %f2639, %f2656; - fma.rn.f32 %f2658, %f2657, %f2639, %f2639; - mov.f32 %f2659, 0f3F317218; - fma.rn.f32 %f3369, %f2638, %f2659, %f2658; - setp.lt.u32 %p318, %r264, 2139095040; - @%p318 bra BB3_201; - - mov.f32 %f2660, 0f7F800000; - fma.rn.f32 %f3369, %f548, %f2660, %f2660; - -BB3_201: - setp.eq.f32 %p319, %f548, 0f00000000; - selp.f32 %f2661, 0fFF800000, %f3369, %p319; - mul.f32 %f2662, %f428, %f2661; - sub.f32 %f552, %f2662, %f427; - mul.f32 %f2663, %f428, 0f4B000000; - setp.lt.f32 %p320, %f428, 0f00800000; - selp.f32 %f553, %f2663, %f428, %p320; - selp.f32 %f2664, 0fC1B80000, 0f00000000, %p320; - mov.b32 %r268, %f553; - add.s32 %r269, %r268, -1059760811; - and.b32 %r270, %r269, -8388608; - sub.s32 %r271, %r268, %r270; - mov.b32 %f2665, %r271; - cvt.rn.f32.s32 %f2666, %r270; - fma.rn.f32 %f2668, %f2666, %f2637, %f2664; - add.f32 %f2669, %f2665, 0fBF800000; - fma.rn.f32 %f2672, %f2641, %f2669, %f2640; - fma.rn.f32 %f2674, %f2672, %f2669, %f2643; - fma.rn.f32 %f2676, %f2674, %f2669, %f2645; - fma.rn.f32 %f2678, %f2676, %f2669, %f2647; - fma.rn.f32 %f2680, %f2678, %f2669, %f2649; - fma.rn.f32 %f2682, %f2680, %f2669, %f2651; - fma.rn.f32 %f2684, %f2682, %f2669, %f2653; - fma.rn.f32 %f2686, %f2684, %f2669, %f2655; - mul.f32 %f2687, %f2669, %f2686; - fma.rn.f32 %f2688, %f2687, %f2669, %f2669; - fma.rn.f32 %f3370, %f2668, %f2659, %f2688; - setp.lt.u32 %p321, %r268, 2139095040; - @%p321 bra BB3_203; - - mov.f32 %f2690, 0f7F800000; - fma.rn.f32 %f3370, %f553, %f2690, %f2690; - -BB3_203: - setp.eq.f32 %p322, %f553, 0f00000000; - selp.f32 %f2691, 0fFF800000, %f3370, %p322; - mul.f32 %f2692, %f428, %f2691; - sub.f32 %f2693, %f552, %f2692; - add.f32 %f2694, %f428, %f2693; - add.f32 %f3371, %f3371, %f2694; - bra.uni BB3_204; - -BB3_198: - sub.f32 %f3371, %f3371, %f427; - -BB3_204: - add.s32 %r324, %r324, 1; - setp.lt.s32 %p323, %r324, %r71; - @%p323 bra BB3_124; - - st.local.v4.f32 [%rd2], {%f3343, %f3342, %f3341, %f3340}; - st.local.v4.f32 [%rd2+16], {%f3339, %f3338, %f3342, %f3337}; - st.local.v4.f32 [%rd2+32], {%f3336, %f3335, %f3334, %f3333}; - st.local.v4.f32 [%rd2+48], {%f3341, %f3336, %f3332, %f3331}; - st.local.v4.f32 [%rd2+64], {%f3330, %f3329, %f3340, %f3335}; - st.local.v4.f32 [%rd2+96], {%f3339, %f3334, %f3330, %f3327}; - st.local.v4.f32 [%rd2+80], {%f3331, %f3328, %f3327, %f3326}; - st.local.v4.f32 [%rd2+112], {%f3325, %f3324, %f3338, %f3333}; - st.local.v4.f32 [%rd2+128], {%f3329, %f3326, %f3324, %f3323}; - add.s32 %r323, %r323, 1; - setp.lt.s32 %p324, %r323, %r71; - @%p324 bra BB3_123; - -BB3_206: - mov.f32 %f3373, 0f00000000; - ld.local.v4.f32 {%f2696, %f2697, %f2698, %f2699}, [%rd2]; - rcp.rn.f32 %f560, %f2696; - mul.f32 %f561, %f560, %f2697; - st.local.f32 [%rd2+4], %f561; - mul.f32 %f562, %f560, %f2698; - mul.f32 %f563, %f560, %f2699; - st.local.v2.f32 [%rd2+8], {%f562, %f563}; - ld.local.v4.f32 {%f2704, %f2705, %f2706, %f2707}, [%rd2+16]; - mul.f32 %f564, %f560, %f2704; - mul.f32 %f565, %f560, %f2705; - st.local.v2.f32 [%rd2+16], {%f564, %f565}; - ld.local.f32 %f2712, [%rd2+4]; - fma.rn.f32 %f2713, %f2712, %f2706, 0f00000000; - sub.f32 %f2714, %f2707, %f2713; - ld.local.f32 %f566, [%rd2+24]; - st.local.f32 [%rd2+28], %f2714; - fma.rn.f32 %f2715, %f562, %f566, 0f00000000; - rcp.rn.f32 %f567, %f2714; - ld.local.v4.f32 {%f2716, %f2717, %f2718, %f2719}, [%rd2+32]; - sub.f32 %f2724, %f2716, %f2715; - mul.f32 %f568, %f567, %f2724; - fma.rn.f32 %f2725, %f563, %f566, 0f00000000; - sub.f32 %f2726, %f2717, %f2725; - mul.f32 %f569, %f567, %f2726; - fma.rn.f32 %f2727, %f564, %f566, 0f00000000; - sub.f32 %f2728, %f2718, %f2727; - mul.f32 %f570, %f567, %f2728; - fma.rn.f32 %f2729, %f565, %f566, 0f00000000; - sub.f32 %f2730, %f2719, %f2729; - mul.f32 %f571, %f567, %f2730; - st.local.v4.f32 [%rd2+32], {%f568, %f569, %f570, %f571}; - ld.local.v2.f32 {%f2731, %f2732}, [%rd2+48]; - ld.local.f32 %f2735, [%rd2+4]; - fma.rn.f32 %f2736, %f2735, %f2731, 0f00000000; - sub.f32 %f572, %f2732, %f2736; - st.local.f32 [%rd2+52], %f572; - add.s64 %rd129, %rd2, 48; - add.s64 %rd128, %rd2, 8; - mov.u32 %r325, -1; - -BB3_207: - ld.local.f32 %f2737, [%rd129]; - ld.local.f32 %f2738, [%rd128]; - fma.rn.f32 %f3373, %f2738, %f2737, %f3373; - add.s64 %rd129, %rd129, 4; - add.s64 %rd128, %rd128, 24; - add.s32 %r325, %r325, 1; - setp.lt.s32 %p325, %r325, 1; - @%p325 bra BB3_207; - - add.s64 %rd14, %rd2, 4; - ld.local.v4.f32 {%f2740, %f2741, %f2742, %f2743}, [%rd2+48]; - fma.rn.f32 %f2744, %f563, %f2740, 0f00000000; - fma.rn.f32 %f2745, %f569, %f572, %f2744; - sub.f32 %f2747, %f2742, %f3373; - rcp.rn.f32 %f576, %f2747; - sub.f32 %f2749, %f2743, %f2745; - mul.f32 %f577, %f576, %f2749; - ld.local.f32 %f2750, [%rd2+4]; - st.local.v2.f32 [%rd2+56], {%f2747, %f577}; - fma.rn.f32 %f2751, %f564, %f2740, 0f00000000; - fma.rn.f32 %f2752, %f570, %f572, %f2751; - ld.local.v4.f32 {%f2753, %f2754, %f2755, %f2756}, [%rd2+64]; - sub.f32 %f2761, %f2753, %f2752; - mul.f32 %f578, %f576, %f2761; - fma.rn.f32 %f2762, %f565, %f2740, 0f00000000; - fma.rn.f32 %f2763, %f571, %f572, %f2762; - sub.f32 %f2764, %f2754, %f2763; - mul.f32 %f579, %f576, %f2764; - st.local.v2.f32 [%rd2+64], {%f578, %f579}; - fma.rn.f32 %f2765, %f2750, %f2755, 0f00000000; - sub.f32 %f580, %f2756, %f2765; - st.local.f32 [%rd2+76], %f580; - add.s64 %rd131, %rd2, 72; - add.s64 %rd130, %rd2, 8; - mov.f32 %f3374, 0f00000000; - mov.u32 %r326, -1; - -BB3_209: - ld.local.f32 %f2766, [%rd131]; - ld.local.f32 %f2767, [%rd130]; - fma.rn.f32 %f3374, %f2767, %f2766, %f3374; - add.s64 %rd131, %rd131, 4; - add.s64 %rd130, %rd130, 24; - add.s32 %r326, %r326, 1; - setp.lt.s32 %p326, %r326, 1; - @%p326 bra BB3_209; - - ld.local.f32 %f2769, [%rd2+80]; - sub.f32 %f583, %f2769, %f3374; - st.local.f32 [%rd2+80], %f583; - add.s64 %rd133, %rd2, 72; - add.s64 %rd132, %rd2, 12; - mov.f32 %f3375, 0f00000000; - mov.u32 %r327, -1; - -BB3_211: - ld.local.f32 %f2770, [%rd133]; - ld.local.f32 %f2771, [%rd132]; - fma.rn.f32 %f3375, %f2771, %f2770, %f3375; - add.s64 %rd133, %rd133, 4; - add.s64 %rd132, %rd132, 24; - add.s32 %r327, %r327, 1; - setp.lt.s32 %p327, %r327, 2; - @%p327 bra BB3_211; - - ld.local.v2.f32 {%f2773, %f2774}, [%rd14+76]; - sub.f32 %f2776, %f2774, %f3375; - st.local.f32 [%rd14+80], %f2776; - ld.local.f32 %f586, [%rd14+68]; - fma.rn.f32 %f2777, %f564, %f586, 0f00000000; - fma.rn.f32 %f2778, %f570, %f580, %f2777; - fma.rn.f32 %f2779, %f578, %f583, %f2778; - rcp.rn.f32 %f587, %f2776; - ld.local.v2.f32 {%f2780, %f2781}, [%rd14+84]; - sub.f32 %f2784, %f2780, %f2779; - mul.f32 %f588, %f587, %f2784; - fma.rn.f32 %f2785, %f565, %f586, 0f00000000; - fma.rn.f32 %f2786, %f571, %f580, %f2785; - fma.rn.f32 %f2787, %f579, %f583, %f2786; + add.f32 %f2391, %f2387, %f2390; + sub.f32 %f2392, %f2387, %f2391; + add.f32 %f2393, %f2390, %f2392; + mul.rn.f32 %f2395, %f2366, %f1663; + mul.rn.f32 %f2397, %f2366, %f1665; + add.f32 %f2398, %f2395, %f2391; + sub.f32 %f2399, %f2395, %f2398; + add.f32 %f2400, %f2391, %f2399; + add.f32 %f2401, %f2393, %f2400; + add.f32 %f2402, %f2397, %f2401; + add.f32 %f2403, %f2398, %f2402; + sub.f32 %f2404, %f2398, %f2403; + add.f32 %f2405, %f2402, %f2404; + mul.rn.f32 %f2406, %f1584, %f2403; + neg.f32 %f2407, %f2406; + fma.rn.f32 %f2408, %f1584, %f2403, %f2407; + fma.rn.f32 %f2409, %f1584, %f2405, %f2408; + fma.rn.f32 %f2411, %f2062, %f2403, %f2409; + add.rn.f32 %f2412, %f2406, %f2411; + neg.f32 %f2413, %f2412; + add.rn.f32 %f2414, %f2406, %f2413; + add.rn.f32 %f2415, %f2414, %f2411; + mov.b32 %r801, %f2412; + setp.eq.s32 %p703, %r801, 1118925336; + add.s32 %r802, %r801, -1; + mov.b32 %f2416, %r802; + add.f32 %f2417, %f2415, 0f37000000; + selp.f32 %f431, %f2417, %f2415, %p703; + selp.f32 %f2418, %f2416, %f2412, %p703; + mul.rn.f32 %f2419, %f2418, %f1688; + cvt.rzi.f32.f32 %f2420, %f2419; + abs.f32 %f2421, %f2420; + setp.gt.f32 %p704, %f2421, 0f42FC0000; + mov.b32 %r803, %f2420; + and.b32 %r804, %r803, -2147483648; + or.b32 %r805, %r804, 1123811328; + mov.b32 %f2422, %r805; + selp.f32 %f2423, %f2422, %f2420, %p704; + fma.rn.f32 %f2425, %f2423, %f1694, %f2418; + fma.rn.f32 %f2427, %f2423, %f1696, %f2425; + mul.f32 %f2428, %f2427, 0f3FB8AA3B; + add.f32 %f2429, %f2423, 0f4B40007F; + mov.b32 %r806, %f2429; + shl.b32 %r807, %r806, 23; + mov.b32 %f2430, %r807; + ex2.approx.ftz.f32 %f2431, %f2428; + mul.f32 %f432, %f2431, %f2430; + setp.eq.f32 %p705, %f432, 0f7F800000; + mov.f32 %f3114, 0f7F800000; + @%p705 bra $L__BB3_442; + + fma.rn.f32 %f3114, %f432, %f431, %f432; + +$L__BB3_442: + setp.lt.f32 %p706, %f429, 0f00000000; + and.pred %p42, %p706, %p606; + setp.eq.f32 %p708, %f429, 0f00000000; + @%p708 bra $L__BB3_446; + bra.uni $L__BB3_443; + +$L__BB3_446: + add.f32 %f2436, %f429, %f429; + selp.f32 %f3116, %f2436, 0f00000000, %p606; + bra.uni $L__BB3_447; + +$L__BB3_443: + mov.b32 %r808, %f3114; + xor.b32 %r809, %r808, -2147483648; + mov.b32 %f2432, %r809; + selp.f32 %f3116, %f2432, %f3114, %p42; + setp.geu.f32 %p709, %f429, 0f00000000; + @%p709 bra $L__BB3_447; + + cvt.rzi.f32.f32 %f2434, %f1584; + setp.eq.f32 %p710, %f2434, 0f40000000; + @%p710 bra $L__BB3_447; + + mov.f32 %f3116, 0f7FFFFFFF; + +$L__BB3_447: + add.f32 %f2437, %f430, 0f40000000; + mov.b32 %r810, %f2437; + setp.lt.s32 %p712, %r810, 2139095040; + @%p712 bra $L__BB3_452; + + setp.gtu.f32 %p713, %f430, 0f7F800000; + @%p713 bra $L__BB3_451; + bra.uni $L__BB3_449; + +$L__BB3_451: + add.f32 %f3116, %f429, 0f40000000; + bra.uni $L__BB3_452; + +$L__BB3_449: + setp.neu.f32 %p714, %f430, 0f7F800000; + @%p714 bra $L__BB3_452; + + selp.f32 %f3116, 0fFF800000, 0f7F800000, %p42; + +$L__BB3_452: + mul.f32 %f2438, %f3116, 0fBF000000; + setp.eq.f32 %p715, %f429, 0f3F800000; + selp.f32 %f2439, 0fBF000000, %f2438, %p715; + fma.rn.f32 %f2442, %f2439, %f1976, %f1575; + cvt.sat.f32.f32 %f2445, %f2442; + fma.rm.f32 %f2447, %f2445, %f1979, %f1981; + add.f32 %f2448, %f2447, 0fCB40007F; + neg.f32 %f2449, %f2448; + fma.rn.f32 %f2450, %f2439, %f1688, %f2449; + fma.rn.f32 %f2452, %f2439, %f1994, %f2450; + mov.b32 %r811, %f2447; + shl.b32 %r812, %r811, 23; + mov.b32 %f2453, %r812; + ex2.approx.ftz.f32 %f2454, %f2452; + mul.f32 %f2455, %f2454, %f2453; + add.f32 %f2456, %f354, 0f3F800000; + mul.f32 %f2457, %f2456, %f428; + mul.f32 %f2458, %f354, %f2455; + sub.f32 %f2459, %f2457, %f2458; + mul.f32 %f2460, %f266, %f2459; + mul.f32 %f2461, %f352, %f2460; + mul.f32 %f2462, %f377, %f377; + div.rn.f32 %f2463, %f2462, %f365; + add.f32 %f3088, %f3088, %f2463; + mul.f32 %f2464, %f414, %f377; + div.rn.f32 %f2465, %f2464, %f365; + add.f32 %f3087, %f3087, %f2465; + mul.f32 %f2466, %f352, %f364; + mul.f32 %f2467, %f2466, %f377; + div.rn.f32 %f2468, %f2467, %f365; + add.f32 %f3086, %f3086, %f2468; + div.rn.f32 %f2469, %f377, %f365; + add.f32 %f3085, %f3085, %f2469; + mul.f32 %f2470, %f415, %f377; + div.rn.f32 %f2471, %f2470, %f365; + add.f32 %f3084, %f3084, %f2471; + mul.f32 %f2472, %f2461, %f377; + div.rn.f32 %f2473, %f2472, %f365; + add.f32 %f3083, %f3083, %f2473; + mul.f32 %f2474, %f414, %f414; + div.rn.f32 %f2475, %f2474, %f365; + add.f32 %f3082, %f3082, %f2475; + mul.f32 %f2476, %f2466, %f414; + div.rn.f32 %f2477, %f2476, %f365; + add.f32 %f3081, %f3081, %f2477; + div.rn.f32 %f2478, %f414, %f365; + add.f32 %f3080, %f3080, %f2478; + mul.f32 %f2479, %f415, %f414; + div.rn.f32 %f2480, %f2479, %f365; + add.f32 %f3079, %f3079, %f2480; + mul.f32 %f2481, %f2461, %f414; + div.rn.f32 %f2482, %f2481, %f365; + add.f32 %f3078, %f3078, %f2482; + mul.f32 %f2483, %f2466, %f2466; + div.rn.f32 %f2484, %f2483, %f365; + add.f32 %f3077, %f3077, %f2484; + div.rn.f32 %f2485, %f2466, %f365; + add.f32 %f3076, %f3076, %f2485; + mul.f32 %f2486, %f415, %f2466; + div.rn.f32 %f2487, %f2486, %f365; + add.f32 %f3075, %f3075, %f2487; + mul.f32 %f2488, %f2461, %f2466; + div.rn.f32 %f2489, %f2488, %f365; + add.f32 %f3074, %f3074, %f2489; + rcp.rn.f32 %f2490, %f365; + add.f32 %f3073, %f3073, %f2490; + div.rn.f32 %f2491, %f415, %f365; + add.f32 %f3072, %f3072, %f2491; + div.rn.f32 %f2492, %f2461, %f365; + add.f32 %f3071, %f3071, %f2492; + mul.f32 %f2493, %f415, %f415; + div.rn.f32 %f2494, %f2493, %f365; + add.f32 %f3089, %f3089, %f2494; + mul.f32 %f2495, %f2461, %f415; + div.rn.f32 %f2496, %f2495, %f365; + add.f32 %f3090, %f3090, %f2496; + mul.f32 %f2497, %f2461, %f2461; + div.rn.f32 %f2498, %f2497, %f365; + add.f32 %f3091, %f3091, %f2498; + setp.leu.f32 %p716, %f365, 0f00000000; + @%p716 bra $L__BB3_460; + + setp.gt.f32 %p717, %f366, 0f00000000; + @%p717 bra $L__BB3_455; + bra.uni $L__BB3_454; + +$L__BB3_455: + setp.lt.f32 %p718, %f365, 0f00800000; + mul.f32 %f2499, %f365, 0f4B000000; + selp.f32 %f463, %f2499, %f365, %p718; + selp.f32 %f2500, 0fC1B80000, 0f00000000, %p718; + mov.b32 %r813, %f463; + add.s32 %r814, %r813, -1059760811; + and.b32 %r815, %r814, -8388608; + sub.s32 %r816, %r813, %r815; + mov.b32 %f2501, %r816; + cvt.rn.f32.s32 %f2502, %r815; + mov.f32 %f2503, 0f34000000; + fma.rn.f32 %f2504, %f2502, %f2503, %f2500; + add.f32 %f2505, %f2501, 0fBF800000; + mov.f32 %f2506, 0f3E1039F6; + mov.f32 %f2507, 0fBE055027; + fma.rn.f32 %f2508, %f2507, %f2505, %f2506; + mov.f32 %f2509, 0fBDF8CDCC; + fma.rn.f32 %f2510, %f2508, %f2505, %f2509; + mov.f32 %f2511, 0f3E0F2955; + fma.rn.f32 %f2512, %f2510, %f2505, %f2511; + mov.f32 %f2513, 0fBE2AD8B9; + fma.rn.f32 %f2514, %f2512, %f2505, %f2513; + mov.f32 %f2515, 0f3E4CED0B; + fma.rn.f32 %f2516, %f2514, %f2505, %f2515; + mov.f32 %f2517, 0fBE7FFF22; + fma.rn.f32 %f2518, %f2516, %f2505, %f2517; + mov.f32 %f2519, 0f3EAAAA78; + fma.rn.f32 %f2520, %f2518, %f2505, %f2519; + mov.f32 %f2521, 0fBF000000; + fma.rn.f32 %f2522, %f2520, %f2505, %f2521; + mul.f32 %f2523, %f2505, %f2522; + fma.rn.f32 %f2524, %f2523, %f2505, %f2505; + mov.f32 %f2525, 0f3F317218; + fma.rn.f32 %f3117, %f2504, %f2525, %f2524; + setp.lt.u32 %p719, %r813, 2139095040; + @%p719 bra $L__BB3_457; + + mov.f32 %f2526, 0f7F800000; + fma.rn.f32 %f3117, %f463, %f2526, %f2526; + +$L__BB3_457: + setp.eq.f32 %p720, %f463, 0f00000000; + selp.f32 %f2527, 0fFF800000, %f3117, %p720; + mul.f32 %f2528, %f366, %f2527; + sub.f32 %f467, %f2528, %f365; + mul.f32 %f2529, %f366, 0f4B000000; + setp.lt.f32 %p721, %f366, 0f00800000; + selp.f32 %f468, %f2529, %f366, %p721; + selp.f32 %f2530, 0fC1B80000, 0f00000000, %p721; + mov.b32 %r817, %f468; + add.s32 %r818, %r817, -1059760811; + and.b32 %r819, %r818, -8388608; + sub.s32 %r820, %r817, %r819; + mov.b32 %f2531, %r820; + cvt.rn.f32.s32 %f2532, %r819; + fma.rn.f32 %f2534, %f2532, %f2503, %f2530; + add.f32 %f2535, %f2531, 0fBF800000; + fma.rn.f32 %f2538, %f2507, %f2535, %f2506; + fma.rn.f32 %f2540, %f2538, %f2535, %f2509; + fma.rn.f32 %f2542, %f2540, %f2535, %f2511; + fma.rn.f32 %f2544, %f2542, %f2535, %f2513; + fma.rn.f32 %f2546, %f2544, %f2535, %f2515; + fma.rn.f32 %f2548, %f2546, %f2535, %f2517; + fma.rn.f32 %f2550, %f2548, %f2535, %f2519; + fma.rn.f32 %f2552, %f2550, %f2535, %f2521; + mul.f32 %f2553, %f2535, %f2552; + fma.rn.f32 %f2554, %f2553, %f2535, %f2535; + fma.rn.f32 %f3118, %f2534, %f2525, %f2554; + setp.lt.u32 %p722, %r817, 2139095040; + @%p722 bra $L__BB3_459; + + mov.f32 %f2556, 0f7F800000; + fma.rn.f32 %f3118, %f468, %f2556, %f2556; + +$L__BB3_459: + setp.eq.f32 %p723, %f468, 0f00000000; + selp.f32 %f2557, 0fFF800000, %f3118, %p723; + mul.f32 %f2558, %f366, %f2557; + sub.f32 %f2559, %f467, %f2558; + add.f32 %f2560, %f366, %f2559; + add.f32 %f3119, %f3119, %f2560; + bra.uni $L__BB3_460; + +$L__BB3_454: + sub.f32 %f3119, %f3119, %f365; + +$L__BB3_460: + add.s32 %r855, %r855, 1; + setp.lt.s32 %p724, %r855, %r108; + @%p724 bra $L__BB3_376; + + add.s32 %r854, %r854, 1; + setp.lt.s32 %p725, %r854, %r108; + @%p725 bra $L__BB3_375; + +$L__BB3_462: + ld.param.u64 %rd52, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_6]; + ld.param.u64 %rd51, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_5]; + ld.param.u32 %r835, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_7]; + ld.param.u64 %rd50, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_4]; + mov.u32 %r834, %tid.x; + mov.u32 %r833, %ntid.x; + mov.u32 %r832, %ctaid.x; + mad.lo.s32 %r831, %r832, %r833, %r834; + rcp.rn.f32 %f2561, %f3088; + mov.f32 %f2562, 0f3F800000; + mul.f32 %f2563, %f2561, %f3087; + mul.f32 %f2564, %f2561, %f3086; + mul.f32 %f2565, %f2561, %f3085; + mul.f32 %f2566, %f2561, %f3084; + mul.f32 %f2567, %f2561, %f3083; + fma.rn.f32 %f2568, %f2563, %f3087, 0f00000000; + sub.f32 %f2570, %f3082, %f2568; + fma.rn.f32 %f2571, %f2564, %f3087, 0f00000000; + rcp.rn.f32 %f2572, %f2570; + sub.f32 %f2573, %f3081, %f2571; + mul.f32 %f2574, %f2572, %f2573; + fma.rn.f32 %f2575, %f2565, %f3087, 0f00000000; + sub.f32 %f2576, %f3080, %f2575; + mul.f32 %f2577, %f2572, %f2576; + fma.rn.f32 %f2578, %f2566, %f3087, 0f00000000; + sub.f32 %f2579, %f3079, %f2578; + mul.f32 %f2580, %f2572, %f2579; + fma.rn.f32 %f2581, %f2567, %f3087, 0f00000000; + sub.f32 %f2582, %f3078, %f2581; + mul.f32 %f2583, %f2572, %f2582; + fma.rn.f32 %f2584, %f2563, %f3086, 0f00000000; + sub.f32 %f2585, %f3081, %f2584; + fma.rn.f32 %f2586, %f2564, %f3086, 0f00000000; + fma.rn.f32 %f2587, %f2574, %f2585, %f2586; + sub.f32 %f2588, %f3077, %f2587; + fma.rn.f32 %f2589, %f2565, %f3086, 0f00000000; + fma.rn.f32 %f2590, %f2577, %f2585, %f2589; + rcp.rn.f32 %f2591, %f2588; + sub.f32 %f2592, %f3076, %f2590; + mul.f32 %f2593, %f2591, %f2592; + fma.rn.f32 %f2594, %f2566, %f3086, 0f00000000; + fma.rn.f32 %f2595, %f2580, %f2585, %f2594; + sub.f32 %f2596, %f3075, %f2595; + mul.f32 %f2597, %f2591, %f2596; + fma.rn.f32 %f2598, %f2567, %f3086, 0f00000000; + fma.rn.f32 %f2599, %f2583, %f2585, %f2598; + sub.f32 %f2600, %f3074, %f2599; + mul.f32 %f2601, %f2591, %f2600; + fma.rn.f32 %f2602, %f2563, %f3085, 0f00000000; + sub.f32 %f2603, %f3080, %f2602; + fma.rn.f32 %f2604, %f2564, %f3085, 0f00000000; + fma.rn.f32 %f2605, %f2574, %f2603, %f2604; + sub.f32 %f2606, %f3076, %f2605; + fma.rn.f32 %f2607, %f2565, %f3085, 0f00000000; + fma.rn.f32 %f2608, %f2577, %f2603, %f2607; + fma.rn.f32 %f2609, %f2593, %f2606, %f2608; + sub.f32 %f2610, %f3073, %f2609; + fma.rn.f32 %f2611, %f2566, %f3085, 0f00000000; + fma.rn.f32 %f2612, %f2580, %f2603, %f2611; + fma.rn.f32 %f2613, %f2597, %f2606, %f2612; + rcp.rn.f32 %f2614, %f2610; + sub.f32 %f2615, %f3072, %f2613; + mul.f32 %f2616, %f2614, %f2615; + fma.rn.f32 %f2617, %f2567, %f3085, 0f00000000; + fma.rn.f32 %f2618, %f2583, %f2603, %f2617; + fma.rn.f32 %f2619, %f2601, %f2606, %f2618; + sub.f32 %f2620, %f3071, %f2619; + mul.f32 %f2621, %f2614, %f2620; + fma.rn.f32 %f2622, %f2563, %f3084, 0f00000000; + sub.f32 %f2623, %f3079, %f2622; + fma.rn.f32 %f2624, %f2564, %f3084, 0f00000000; + fma.rn.f32 %f2625, %f2574, %f2623, %f2624; + sub.f32 %f2626, %f3075, %f2625; + fma.rn.f32 %f2627, %f2565, %f3084, 0f00000000; + fma.rn.f32 %f2628, %f2577, %f2623, %f2627; + fma.rn.f32 %f2629, %f2593, %f2626, %f2628; + sub.f32 %f2630, %f3072, %f2629; + fma.rn.f32 %f2631, %f2566, %f3084, 0f00000000; + fma.rn.f32 %f2632, %f2580, %f2623, %f2631; + fma.rn.f32 %f2633, %f2597, %f2626, %f2632; + fma.rn.f32 %f2634, %f2616, %f2630, %f2633; + sub.f32 %f2635, %f3089, %f2634; + fma.rn.f32 %f2636, %f2567, %f3084, 0f00000000; + fma.rn.f32 %f2637, %f2583, %f2623, %f2636; + fma.rn.f32 %f2638, %f2601, %f2626, %f2637; + fma.rn.f32 %f2639, %f2621, %f2630, %f2638; + rcp.rn.f32 %f2640, %f2635; + sub.f32 %f2641, %f3090, %f2639; + mul.f32 %f2642, %f2640, %f2641; + fma.rn.f32 %f2643, %f2563, %f3083, 0f00000000; + sub.f32 %f2644, %f3078, %f2643; + fma.rn.f32 %f2645, %f2564, %f3083, 0f00000000; + fma.rn.f32 %f2646, %f2574, %f2644, %f2645; + sub.f32 %f2647, %f3074, %f2646; + fma.rn.f32 %f2648, %f2565, %f3083, 0f00000000; + fma.rn.f32 %f2649, %f2577, %f2644, %f2648; + fma.rn.f32 %f2650, %f2593, %f2647, %f2649; + sub.f32 %f2651, %f3071, %f2650; + fma.rn.f32 %f2652, %f2566, %f3083, 0f00000000; + fma.rn.f32 %f2653, %f2580, %f2644, %f2652; + fma.rn.f32 %f2654, %f2597, %f2647, %f2653; + fma.rn.f32 %f2655, %f2616, %f2651, %f2654; + sub.f32 %f2656, %f3090, %f2655; + fma.rn.f32 %f2657, %f2567, %f3083, 0f00000000; + fma.rn.f32 %f2658, %f2583, %f2644, %f2657; + fma.rn.f32 %f2659, %f2601, %f2647, %f2658; + fma.rn.f32 %f2660, %f2621, %f2651, %f2659; + fma.rn.f32 %f2661, %f2642, %f2656, %f2660; + sub.f32 %f2662, %f3091, %f2661; + add.f32 %f2663, %f2563, 0f00000000; + sub.f32 %f2664, %f1552, %f2663; + add.f32 %f2665, %f2564, 0f00000000; + fma.rn.f32 %f2666, %f2574, %f2664, %f2665; + sub.f32 %f2667, %f1552, %f2666; + add.f32 %f2668, %f2565, 0f00000000; + fma.rn.f32 %f2669, %f2577, %f2664, %f2668; + fma.rn.f32 %f2670, %f2593, %f2667, %f2669; + sub.f32 %f2671, %f1552, %f2670; + add.f32 %f2672, %f2566, 0f00000000; + fma.rn.f32 %f2673, %f2580, %f2664, %f2672; + fma.rn.f32 %f2674, %f2597, %f2667, %f2673; + fma.rn.f32 %f2675, %f2616, %f2671, %f2674; + sub.f32 %f2676, %f1552, %f2675; + add.f32 %f2677, %f2567, 0f00000000; + fma.rn.f32 %f2678, %f2583, %f2664, %f2677; + fma.rn.f32 %f2679, %f2601, %f2667, %f2678; + fma.rn.f32 %f2680, %f2621, %f2671, %f2679; + fma.rn.f32 %f2681, %f2642, %f2676, %f2680; + sub.f32 %f2682, %f1552, %f2681; + div.rn.f32 %f2683, %f2682, %f2662; + fma.rn.f32 %f2684, %f2656, %f2683, 0f00000000; + sub.f32 %f2685, %f2676, %f2684; + mul.f32 %f2686, %f2640, %f2685; + fma.rn.f32 %f2687, %f2630, %f2686, 0f00000000; + fma.rn.f32 %f2688, %f2651, %f2683, %f2687; + sub.f32 %f2689, %f2671, %f2688; + mul.f32 %f2690, %f2614, %f2689; + fma.rn.f32 %f2691, %f2606, %f2690, 0f00000000; + fma.rn.f32 %f2692, %f2626, %f2686, %f2691; + fma.rn.f32 %f2693, %f2647, %f2683, %f2692; + sub.f32 %f2694, %f2667, %f2693; + mul.f32 %f2695, %f2591, %f2694; + fma.rn.f32 %f2696, %f2585, %f2695, 0f00000000; + fma.rn.f32 %f2697, %f2603, %f2690, %f2696; + fma.rn.f32 %f2698, %f2623, %f2686, %f2697; + fma.rn.f32 %f2699, %f2644, %f2683, %f2698; + sub.f32 %f2700, %f2664, %f2699; + mul.f32 %f2701, %f2572, %f2700; + fma.rn.f32 %f2702, %f3087, %f2701, 0f00000000; + fma.rn.f32 %f2703, %f3086, %f2695, %f2702; + fma.rn.f32 %f2704, %f3085, %f2690, %f2703; + fma.rn.f32 %f2705, %f3084, %f2686, %f2704; + fma.rn.f32 %f2706, %f3083, %f2683, %f2705; + sub.f32 %f2707, %f2562, %f2706; + mul.f32 %f2708, %f2561, %f2707; + fma.rn.f32 %f2709, %f2563, 0f00000000, 0f00000000; + sub.f32 %f2710, %f2562, %f2709; + fma.rn.f32 %f2711, %f2564, 0f00000000, 0f00000000; + fma.rn.f32 %f2712, %f2574, %f2710, %f2711; + sub.f32 %f2713, %f1552, %f2712; + fma.rn.f32 %f2714, %f2565, 0f00000000, 0f00000000; + fma.rn.f32 %f2715, %f2577, %f2710, %f2714; + fma.rn.f32 %f2716, %f2593, %f2713, %f2715; + sub.f32 %f2717, %f1552, %f2716; + fma.rn.f32 %f2718, %f2566, 0f00000000, 0f00000000; + fma.rn.f32 %f2719, %f2580, %f2710, %f2718; + fma.rn.f32 %f2720, %f2597, %f2713, %f2719; + fma.rn.f32 %f2721, %f2616, %f2717, %f2720; + sub.f32 %f2722, %f1552, %f2721; + fma.rn.f32 %f2723, %f2567, 0f00000000, 0f00000000; + fma.rn.f32 %f2724, %f2583, %f2710, %f2723; + fma.rn.f32 %f2725, %f2601, %f2713, %f2724; + fma.rn.f32 %f2726, %f2621, %f2717, %f2725; + fma.rn.f32 %f2727, %f2642, %f2722, %f2726; + sub.f32 %f2728, %f1552, %f2727; + div.rn.f32 %f2729, %f2728, %f2662; + fma.rn.f32 %f2730, %f2656, %f2729, 0f00000000; + sub.f32 %f2731, %f2722, %f2730; + mul.f32 %f2732, %f2640, %f2731; + fma.rn.f32 %f2733, %f2630, %f2732, 0f00000000; + fma.rn.f32 %f2734, %f2651, %f2729, %f2733; + sub.f32 %f2735, %f2717, %f2734; + mul.f32 %f2736, %f2614, %f2735; + fma.rn.f32 %f2737, %f2606, %f2736, 0f00000000; + fma.rn.f32 %f2738, %f2626, %f2732, %f2737; + fma.rn.f32 %f2739, %f2647, %f2729, %f2738; + sub.f32 %f2740, %f2713, %f2739; + mul.f32 %f2741, %f2591, %f2740; + fma.rn.f32 %f2742, %f2585, %f2741, 0f00000000; + fma.rn.f32 %f2743, %f2603, %f2736, %f2742; + fma.rn.f32 %f2744, %f2623, %f2732, %f2743; + fma.rn.f32 %f2745, %f2644, %f2729, %f2744; + sub.f32 %f2746, %f2710, %f2745; + mul.f32 %f2747, %f2572, %f2746; + sub.f32 %f2748, %f1552, %f2709; + fma.rn.f32 %f2749, %f2574, %f2748, %f2711; + sub.f32 %f2750, %f2562, %f2749; + fma.rn.f32 %f2751, %f2577, %f2748, %f2714; + fma.rn.f32 %f2752, %f2593, %f2750, %f2751; + sub.f32 %f2753, %f1552, %f2752; + fma.rn.f32 %f2754, %f2580, %f2748, %f2718; + fma.rn.f32 %f2755, %f2597, %f2750, %f2754; + fma.rn.f32 %f2756, %f2616, %f2753, %f2755; + sub.f32 %f2757, %f1552, %f2756; + fma.rn.f32 %f2758, %f2583, %f2748, %f2723; + fma.rn.f32 %f2759, %f2601, %f2750, %f2758; + fma.rn.f32 %f2760, %f2621, %f2753, %f2759; + fma.rn.f32 %f2761, %f2642, %f2757, %f2760; + sub.f32 %f2762, %f1552, %f2761; + div.rn.f32 %f2763, %f2762, %f2662; + fma.rn.f32 %f2764, %f2656, %f2763, 0f00000000; + sub.f32 %f2765, %f2757, %f2764; + mul.f32 %f2766, %f2640, %f2765; + fma.rn.f32 %f2767, %f2630, %f2766, 0f00000000; + fma.rn.f32 %f2768, %f2651, %f2763, %f2767; + sub.f32 %f2769, %f2753, %f2768; + mul.f32 %f2770, %f2614, %f2769; + fma.rn.f32 %f2771, %f2606, %f2770, 0f00000000; + fma.rn.f32 %f2772, %f2626, %f2766, %f2771; + fma.rn.f32 %f2773, %f2647, %f2763, %f2772; + sub.f32 %f2774, %f2750, %f2773; + mul.f32 %f2775, %f2591, %f2774; + sub.f32 %f2776, %f1552, %f2749; + fma.rn.f32 %f2777, %f2593, %f2776, %f2751; + sub.f32 %f2778, %f2562, %f2777; + fma.rn.f32 %f2779, %f2597, %f2776, %f2754; + fma.rn.f32 %f2780, %f2616, %f2778, %f2779; + sub.f32 %f2781, %f1552, %f2780; + fma.rn.f32 %f2782, %f2601, %f2776, %f2758; + fma.rn.f32 %f2783, %f2621, %f2778, %f2782; + fma.rn.f32 %f2784, %f2642, %f2781, %f2783; + sub.f32 %f2785, %f1552, %f2784; + div.rn.f32 %f2786, %f2785, %f2662; + fma.rn.f32 %f2787, %f2656, %f2786, 0f00000000; sub.f32 %f2788, %f2781, %f2787; - mul.f32 %f589, %f587, %f2788; - ld.local.f32 %f2789, [%rd14]; - st.local.v2.f32 [%rd14+84], {%f588, %f589}; - ld.local.v2.f32 {%f2790, %f2791}, [%rd14+92]; - fma.rn.f32 %f2794, %f2789, %f2790, 0f00000000; - sub.f32 %f590, %f2791, %f2794; - st.local.f32 [%rd14+96], %f590; - add.s64 %rd135, %rd2, 96; - add.s64 %rd134, %rd2, 8; - mov.f32 %f3376, 0f00000000; - mov.u32 %r328, -1; - -BB3_213: - ld.local.f32 %f2795, [%rd135]; - ld.local.f32 %f2796, [%rd134]; - fma.rn.f32 %f3376, %f2796, %f2795, %f3376; - add.s64 %rd135, %rd135, 4; - add.s64 %rd134, %rd134, 24; - add.s32 %r328, %r328, 1; - setp.lt.s32 %p328, %r328, 1; - @%p328 bra BB3_213; - - ld.local.f32 %f2798, [%rd2+104]; - sub.f32 %f593, %f2798, %f3376; - st.local.f32 [%rd2+104], %f593; - add.s64 %rd137, %rd2, 96; - add.s64 %rd136, %rd2, 12; - mov.f32 %f3377, 0f00000000; - mov.u32 %r329, -1; - -BB3_215: - ld.local.f32 %f2799, [%rd137]; - ld.local.f32 %f2800, [%rd136]; - fma.rn.f32 %f3377, %f2800, %f2799, %f3377; - add.s64 %rd137, %rd137, 4; - add.s64 %rd136, %rd136, 24; - add.s32 %r329, %r329, 1; - setp.lt.s32 %p329, %r329, 2; - @%p329 bra BB3_215; - - ld.local.f32 %f2802, [%rd2+108]; - sub.f32 %f596, %f2802, %f3377; - st.local.f32 [%rd2+108], %f596; - add.s64 %rd139, %rd2, 96; - add.s64 %rd138, %rd2, 16; - mov.f32 %f3378, 0f00000000; - mov.u32 %r330, -1; - -BB3_217: - ld.local.f32 %f2803, [%rd139]; - ld.local.f32 %f2804, [%rd138]; - fma.rn.f32 %f3378, %f2804, %f2803, %f3378; - add.s64 %rd139, %rd139, 4; - add.s64 %rd138, %rd138, 24; - add.s32 %r330, %r330, 1; - setp.lt.s32 %p330, %r330, 3; - @%p330 bra BB3_217; - - ld.local.v4.f32 {%f2806, %f2807, %f2808, %f2809}, [%rd14+108]; - ld.local.f32 %f599, [%rd14+92]; - fma.rn.f32 %f2814, %f565, %f599, 0f00000000; - fma.rn.f32 %f2815, %f571, %f590, %f2814; - fma.rn.f32 %f2816, %f579, %f593, %f2815; - fma.rn.f32 %f2817, %f589, %f596, %f2816; - sub.f32 %f2818, %f2806, %f3378; - rcp.rn.f32 %f600, %f2818; - sub.f32 %f2819, %f2807, %f2817; - mul.f32 %f601, %f600, %f2819; - ld.local.f32 %f2820, [%rd14]; - st.local.v2.f32 [%rd14+108], {%f2818, %f601}; - fma.rn.f32 %f2821, %f2820, %f2808, 0f00000000; - sub.f32 %f602, %f2809, %f2821; - st.local.f32 [%rd14+120], %f602; - add.s64 %rd141, %rd2, 120; - add.s64 %rd140, %rd2, 8; - mov.f32 %f3379, 0f00000000; - mov.u32 %r331, -1; - -BB3_219: - ld.local.f32 %f2822, [%rd141]; - ld.local.f32 %f2823, [%rd140]; - fma.rn.f32 %f3379, %f2823, %f2822, %f3379; - add.s64 %rd141, %rd141, 4; - add.s64 %rd140, %rd140, 24; - add.s32 %r331, %r331, 1; - setp.lt.s32 %p331, %r331, 1; - @%p331 bra BB3_219; - - ld.local.f32 %f2825, [%rd2+128]; - sub.f32 %f605, %f2825, %f3379; - st.local.f32 [%rd2+128], %f605; - add.s64 %rd143, %rd2, 120; - add.s64 %rd142, %rd2, 12; - mov.f32 %f3380, 0f00000000; - mov.u32 %r332, -1; - -BB3_221: - ld.local.f32 %f2826, [%rd143]; - ld.local.f32 %f2827, [%rd142]; - fma.rn.f32 %f3380, %f2827, %f2826, %f3380; - add.s64 %rd143, %rd143, 4; - add.s64 %rd142, %rd142, 24; - add.s32 %r332, %r332, 1; - setp.lt.s32 %p332, %r332, 2; - @%p332 bra BB3_221; - - ld.local.f32 %f2829, [%rd2+132]; - sub.f32 %f608, %f2829, %f3380; - st.local.f32 [%rd2+132], %f608; - add.s64 %rd145, %rd2, 120; - add.s64 %rd144, %rd2, 16; - mov.f32 %f3381, 0f00000000; - mov.u32 %r333, -1; - -BB3_223: - ld.local.f32 %f2830, [%rd145]; - ld.local.f32 %f2831, [%rd144]; - fma.rn.f32 %f3381, %f2831, %f2830, %f3381; - add.s64 %rd145, %rd145, 4; - add.s64 %rd144, %rd144, 24; - add.s32 %r333, %r333, 1; - setp.lt.s32 %p333, %r333, 3; - @%p333 bra BB3_223; - - ld.local.f32 %f2833, [%rd2+136]; - sub.f32 %f611, %f2833, %f3381; - st.local.f32 [%rd2+136], %f611; - add.s64 %rd147, %rd2, 120; - add.s64 %rd146, %rd2, 20; - mov.f32 %f3382, 0f00000000; - mov.u32 %r334, -1; - -BB3_225: - ld.local.f32 %f2834, [%rd147]; - ld.local.f32 %f2835, [%rd146]; - fma.rn.f32 %f3382, %f2835, %f2834, %f3382; - add.s64 %rd147, %rd147, 4; - add.s64 %rd146, %rd146, 24; - add.s32 %r334, %r334, 1; - setp.lt.s32 %p334, %r334, 4; - @%p334 bra BB3_225; - - ld.param.u64 %rd126, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_6]; - ld.param.u64 %rd125, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_5]; - ld.param.u32 %r294, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_7]; - ld.param.u64 %rd124, [_Z23kernel_MLEFit_XYNBSXSY_PKffiiPfS1_S1_i_param_4]; - mov.u32 %r293, %tid.x; - mov.u32 %r292, %ctaid.x; - mov.u32 %r291, %ntid.x; - mad.lo.s32 %r290, %r291, %r292, %r293; - ld.local.f32 %f2836, [%rd2+140]; - sub.f32 %f2837, %f2836, %f3382; - ld.local.f32 %f2838, [%rd2+120]; - st.local.f32 [%rd2+140], %f2837; - add.f32 %f2839, %f561, 0f00000000; - mov.f32 %f2840, 0f00000000; - sub.f32 %f2841, %f2840, %f2839; - add.f32 %f2842, %f562, 0f00000000; - fma.rn.f32 %f2843, %f568, %f2841, %f2842; - sub.f32 %f2844, %f2840, %f2843; - add.f32 %f2845, %f563, 0f00000000; - fma.rn.f32 %f2846, %f569, %f2841, %f2845; - fma.rn.f32 %f2847, %f577, %f2844, %f2846; - sub.f32 %f2848, %f2840, %f2847; - add.f32 %f2849, %f564, 0f00000000; - fma.rn.f32 %f2850, %f570, %f2841, %f2849; - fma.rn.f32 %f2851, %f578, %f2844, %f2850; - fma.rn.f32 %f2852, %f588, %f2848, %f2851; - sub.f32 %f2853, %f2840, %f2852; - add.f32 %f2854, %f565, 0f00000000; - fma.rn.f32 %f2855, %f571, %f2841, %f2854; - fma.rn.f32 %f2856, %f579, %f2844, %f2855; - fma.rn.f32 %f2857, %f589, %f2848, %f2856; - fma.rn.f32 %f2858, %f601, %f2853, %f2857; - sub.f32 %f2859, %f2840, %f2858; - div.rn.f32 %f2860, %f2859, %f2837; - fma.rn.f32 %f2861, %f611, %f2860, 0f00000000; - sub.f32 %f2862, %f2853, %f2861; - mul.f32 %f2863, %f600, %f2862; - fma.rn.f32 %f2864, %f596, %f2863, 0f00000000; - fma.rn.f32 %f2865, %f608, %f2860, %f2864; - sub.f32 %f2866, %f2848, %f2865; - mul.f32 %f2867, %f587, %f2866; - fma.rn.f32 %f2868, %f583, %f2867, 0f00000000; - fma.rn.f32 %f2869, %f593, %f2863, %f2868; - fma.rn.f32 %f2870, %f605, %f2860, %f2869; - sub.f32 %f2871, %f2844, %f2870; - mul.f32 %f2872, %f576, %f2871; - fma.rn.f32 %f2873, %f572, %f2872, 0f00000000; - fma.rn.f32 %f2874, %f580, %f2867, %f2873; - fma.rn.f32 %f2875, %f590, %f2863, %f2874; - fma.rn.f32 %f2876, %f602, %f2860, %f2875; - sub.f32 %f2877, %f2841, %f2876; - mul.f32 %f2878, %f567, %f2877; - fma.rn.f32 %f2879, %f566, %f2878, 0f00000000; - fma.rn.f32 %f2880, %f2740, %f2872, %f2879; - fma.rn.f32 %f2881, %f586, %f2867, %f2880; - fma.rn.f32 %f2882, %f599, %f2863, %f2881; - fma.rn.f32 %f2883, %f2838, %f2860, %f2882; - mov.f32 %f2884, 0f3F800000; - sub.f32 %f2885, %f2884, %f2883; - mul.f32 %f2886, %f560, %f2885; - fma.rn.f32 %f2887, %f561, 0f00000000, 0f00000000; - sub.f32 %f2888, %f2884, %f2887; - fma.rn.f32 %f2889, %f562, 0f00000000, 0f00000000; - fma.rn.f32 %f2890, %f568, %f2888, %f2889; - sub.f32 %f2891, %f2840, %f2890; - fma.rn.f32 %f2892, %f563, 0f00000000, 0f00000000; - fma.rn.f32 %f2893, %f569, %f2888, %f2892; - fma.rn.f32 %f2894, %f577, %f2891, %f2893; - sub.f32 %f2895, %f2840, %f2894; - fma.rn.f32 %f2896, %f564, 0f00000000, 0f00000000; - fma.rn.f32 %f2897, %f570, %f2888, %f2896; - fma.rn.f32 %f2898, %f578, %f2891, %f2897; - fma.rn.f32 %f2899, %f588, %f2895, %f2898; - sub.f32 %f2900, %f2840, %f2899; - fma.rn.f32 %f2901, %f565, 0f00000000, 0f00000000; - fma.rn.f32 %f2902, %f571, %f2888, %f2901; - fma.rn.f32 %f2903, %f579, %f2891, %f2902; - fma.rn.f32 %f2904, %f589, %f2895, %f2903; - fma.rn.f32 %f2905, %f601, %f2900, %f2904; - sub.f32 %f2906, %f2840, %f2905; - div.rn.f32 %f2907, %f2906, %f2837; - fma.rn.f32 %f2908, %f611, %f2907, 0f00000000; - sub.f32 %f2909, %f2900, %f2908; - mul.f32 %f2910, %f600, %f2909; - fma.rn.f32 %f2911, %f596, %f2910, 0f00000000; - fma.rn.f32 %f2912, %f608, %f2907, %f2911; - sub.f32 %f2913, %f2895, %f2912; - mul.f32 %f2914, %f587, %f2913; - fma.rn.f32 %f2915, %f583, %f2914, 0f00000000; - fma.rn.f32 %f2916, %f593, %f2910, %f2915; - fma.rn.f32 %f2917, %f605, %f2907, %f2916; - sub.f32 %f2918, %f2891, %f2917; - mul.f32 %f2919, %f576, %f2918; - fma.rn.f32 %f2920, %f572, %f2919, 0f00000000; - fma.rn.f32 %f2921, %f580, %f2914, %f2920; - fma.rn.f32 %f2922, %f590, %f2910, %f2921; - fma.rn.f32 %f2923, %f602, %f2907, %f2922; - sub.f32 %f2924, %f2888, %f2923; - mul.f32 %f2925, %f567, %f2924; - sub.f32 %f2926, %f2840, %f2887; - fma.rn.f32 %f2927, %f568, %f2926, %f2889; - sub.f32 %f2928, %f2884, %f2927; - fma.rn.f32 %f2929, %f569, %f2926, %f2892; - fma.rn.f32 %f2930, %f577, %f2928, %f2929; - sub.f32 %f2931, %f2840, %f2930; - fma.rn.f32 %f2932, %f570, %f2926, %f2896; - fma.rn.f32 %f2933, %f578, %f2928, %f2932; - fma.rn.f32 %f2934, %f588, %f2931, %f2933; - sub.f32 %f2935, %f2840, %f2934; - fma.rn.f32 %f2936, %f571, %f2926, %f2901; - fma.rn.f32 %f2937, %f579, %f2928, %f2936; - fma.rn.f32 %f2938, %f589, %f2931, %f2937; - fma.rn.f32 %f2939, %f601, %f2935, %f2938; - sub.f32 %f2940, %f2840, %f2939; - div.rn.f32 %f2941, %f2940, %f2837; - fma.rn.f32 %f2942, %f611, %f2941, 0f00000000; - sub.f32 %f2943, %f2935, %f2942; - mul.f32 %f2944, %f600, %f2943; - fma.rn.f32 %f2945, %f596, %f2944, 0f00000000; - fma.rn.f32 %f2946, %f608, %f2941, %f2945; - sub.f32 %f2947, %f2931, %f2946; - mul.f32 %f2948, %f587, %f2947; - fma.rn.f32 %f2949, %f583, %f2948, 0f00000000; - fma.rn.f32 %f2950, %f593, %f2944, %f2949; - fma.rn.f32 %f2951, %f605, %f2941, %f2950; - sub.f32 %f2952, %f2928, %f2951; - mul.f32 %f2953, %f576, %f2952; - sub.f32 %f2954, %f2840, %f2927; - fma.rn.f32 %f2955, %f577, %f2954, %f2929; - sub.f32 %f2956, %f2884, %f2955; - fma.rn.f32 %f2957, %f578, %f2954, %f2932; - fma.rn.f32 %f2958, %f588, %f2956, %f2957; - sub.f32 %f2959, %f2840, %f2958; - fma.rn.f32 %f2960, %f579, %f2954, %f2936; - fma.rn.f32 %f2961, %f589, %f2956, %f2960; - fma.rn.f32 %f2962, %f601, %f2959, %f2961; - sub.f32 %f2963, %f2840, %f2962; - div.rn.f32 %f2964, %f2963, %f2837; - fma.rn.f32 %f2965, %f611, %f2964, 0f00000000; - sub.f32 %f2966, %f2959, %f2965; - mul.f32 %f2967, %f600, %f2966; - fma.rn.f32 %f2968, %f596, %f2967, 0f00000000; - fma.rn.f32 %f2969, %f608, %f2964, %f2968; - sub.f32 %f2970, %f2956, %f2969; - mul.f32 %f2971, %f587, %f2970; - sub.f32 %f2972, %f2840, %f2955; - fma.rn.f32 %f2973, %f588, %f2972, %f2957; - sub.f32 %f2974, %f2884, %f2973; - fma.rn.f32 %f2975, %f589, %f2972, %f2960; - fma.rn.f32 %f2976, %f601, %f2974, %f2975; - sub.f32 %f2977, %f2840, %f2976; - div.rn.f32 %f2978, %f2977, %f2837; - fma.rn.f32 %f2979, %f611, %f2978, 0f00000000; - sub.f32 %f2980, %f2974, %f2979; - mul.f32 %f2981, %f600, %f2980; - sub.f32 %f2982, %f2840, %f2973; - fma.rn.f32 %f2983, %f601, %f2982, %f2975; - sub.f32 %f2984, %f2884, %f2983; - div.rn.f32 %f2985, %f2984, %f2837; - cvta.to.global.u64 %rd106, %rd124; - mul.wide.s32 %rd107, %r290, 4; - add.s64 %rd108, %rd106, %rd107; - st.global.f32 [%rd108], %f3321; - shl.b32 %r286, %r294, 2; - cvt.s64.s32 %rd109, %r286; - add.s64 %rd110, %rd108, %rd109; - st.global.f32 [%rd110], %f3320; - add.s64 %rd111, %rd110, %rd109; - st.global.f32 [%rd111], %f3319; - add.s64 %rd112, %rd111, %rd109; - st.global.f32 [%rd112], %f3226; - add.s64 %rd113, %rd112, %rd109; - st.global.f32 [%rd113], %f3317; - add.s64 %rd114, %rd113, %rd109; - st.global.f32 [%rd114], %f3316; - cvta.to.global.u64 %rd115, %rd125; - add.s64 %rd116, %rd115, %rd107; - st.global.f32 [%rd116], %f2886; - add.s64 %rd117, %rd116, %rd109; - st.global.f32 [%rd117], %f2925; - add.s64 %rd118, %rd117, %rd109; - st.global.f32 [%rd118], %f2953; - add.s64 %rd119, %rd118, %rd109; - st.global.f32 [%rd119], %f2971; - add.s64 %rd120, %rd119, %rd109; - st.global.f32 [%rd120], %f2981; - add.s64 %rd121, %rd120, %rd109; - st.global.f32 [%rd121], %f2985; - cvta.to.global.u64 %rd122, %rd126; - add.s64 %rd123, %rd122, %rd107; - st.global.f32 [%rd123], %f3371; - -BB3_227: + mul.f32 %f2789, %f2640, %f2788; + fma.rn.f32 %f2790, %f2630, %f2789, 0f00000000; + fma.rn.f32 %f2791, %f2651, %f2786, %f2790; + sub.f32 %f2792, %f2778, %f2791; + mul.f32 %f2793, %f2614, %f2792; + sub.f32 %f2794, %f1552, %f2777; + fma.rn.f32 %f2795, %f2616, %f2794, %f2779; + sub.f32 %f2796, %f2562, %f2795; + fma.rn.f32 %f2797, %f2621, %f2794, %f2782; + fma.rn.f32 %f2798, %f2642, %f2796, %f2797; + sub.f32 %f2799, %f1552, %f2798; + div.rn.f32 %f2800, %f2799, %f2662; + fma.rn.f32 %f2801, %f2656, %f2800, 0f00000000; + sub.f32 %f2802, %f2796, %f2801; + mul.f32 %f2803, %f2640, %f2802; + sub.f32 %f2804, %f1552, %f2795; + fma.rn.f32 %f2805, %f2642, %f2804, %f2797; + sub.f32 %f2806, %f2562, %f2805; + div.rn.f32 %f2807, %f2806, %f2662; + cvta.to.global.u64 %rd30, %rd50; + mul.wide.s32 %rd31, %r831, 4; + add.s64 %rd32, %rd30, %rd31; + st.global.f32 [%rd32], %f3048; + add.s32 %r825, %r831, %r835; + mul.wide.s32 %rd33, %r835, 4; + add.s64 %rd34, %rd32, %rd33; + st.global.f32 [%rd34], %f3047; + add.s32 %r826, %r825, %r835; + mul.wide.s32 %rd35, %r826, 4; + add.s64 %rd36, %rd30, %rd35; + st.global.f32 [%rd36], %f3046; + shl.b32 %r827, %r835, 2; + cvt.s64.s32 %rd37, %r827; + add.s64 %rd38, %rd36, %rd37; + st.global.f32 [%rd38], %f3045; + add.s64 %rd39, %rd38, %rd37; + st.global.f32 [%rd39], %f3044; + add.s64 %rd40, %rd39, %rd37; + st.global.f32 [%rd40], %f3043; + cvta.to.global.u64 %rd41, %rd51; + add.s64 %rd42, %rd41, %rd31; + st.global.f32 [%rd42], %f2708; + add.s64 %rd43, %rd42, %rd33; + st.global.f32 [%rd43], %f2747; + add.s64 %rd44, %rd41, %rd35; + st.global.f32 [%rd44], %f2775; + add.s64 %rd45, %rd44, %rd37; + st.global.f32 [%rd45], %f2793; + add.s64 %rd46, %rd45, %rd37; + st.global.f32 [%rd46], %f2803; + add.s64 %rd47, %rd46, %rd37; + st.global.f32 [%rd47], %f2807; + cvta.to.global.u64 %rd48, %rd52; + add.s64 %rd49, %rd48, %rd31; + st.global.f32 [%rd49], %f3119; + +$L__BB3_463: ret; -} +} // .globl _Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i .visible .entry _Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i( .param .u64 _Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_0, @@ -14973,2715 +25508,3577 @@ BB3_227: .param .u32 _Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_10 ) { - .local .align 16 .b8 __local_depot4[64]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<229>; - .reg .f32 %f<2188>; - .reg .b32 %r<270>; - .reg .b64 %rd<92>; - - - mov.u64 %SPL, __local_depot4; - ld.param.u64 %rd23, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_0]; - ld.param.u64 %rd24, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_1]; - ld.param.u64 %rd25, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_2]; - ld.param.f32 %f407, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_3]; - ld.param.u32 %r49, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_4]; - ld.param.u32 %r50, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_5]; - ld.param.u32 %r51, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_6]; - ld.param.u32 %r52, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_10]; - cvta.to.global.u64 %rd1, %rd23; - add.u64 %rd2, %SPL, 0; - mov.u32 %r53, %ntid.x; - mov.u32 %r54, %ctaid.x; - mov.u32 %r55, %tid.x; - mad.lo.s32 %r1, %r53, %r54, %r55; - setp.ge.s32 %p13, %r1, %r52; - @%p13 bra BB4_151; - - mov.u32 %r56, 0; - st.local.v4.u32 [%rd2], {%r56, %r56, %r56, %r56}; - st.local.v4.u32 [%rd2+16], {%r56, %r56, %r56, %r56}; - st.local.v4.u32 [%rd2+32], {%r56, %r56, %r56, %r56}; - st.local.v4.u32 [%rd2+48], {%r56, %r56, %r56, %r56}; - mul.lo.s32 %r57, %r49, %r49; - mul.lo.s32 %r2, %r57, %r1; - mov.f32 %f410, 0f00000000; - setp.lt.s32 %p14, %r49, 1; - mov.f32 %f1, %f410; - mov.f32 %f2, %f410; - mov.f32 %f3, %f410; - @%p14 bra BB4_15; - - and.b32 %r3, %r49, 3; - shl.b32 %r4, %r49, 2; - mov.f32 %f413, 0f00000000; - mov.u32 %r58, 0; - mov.u32 %r250, %r58; - mov.f32 %f1, %f413; - mov.f32 %f2, %f413; - mov.f32 %f3, %f413; - -BB4_3: - cvt.rn.f32.s32 %f4, %r250; - setp.eq.s32 %p15, %r3, 0; - @%p15 bra BB4_4; - - setp.eq.s32 %p16, %r3, 1; - @%p16 bra BB4_6; - bra.uni BB4_7; - -BB4_6: - mov.u32 %r252, %r58; - bra.uni BB4_11; - -BB4_4: - mov.u32 %r254, %r58; - mov.f32 %f2070, %f1; - mov.f32 %f2071, %f2; - mov.f32 %f2072, %f3; - mov.f32 %f1, %f413; - mov.f32 %f2, %f413; - mov.f32 %f3, %f413; - bra.uni BB4_12; - -BB4_7: - setp.eq.s32 %p17, %r3, 2; - @%p17 bra BB4_8; - bra.uni BB4_9; - -BB4_8: - mov.u32 %r251, %r58; - bra.uni BB4_10; - -BB4_9: - add.s32 %r63, %r250, %r2; - mul.wide.s32 %rd32, %r63, 4; - add.s64 %rd33, %rd1, %rd32; - ld.global.f32 %f417, [%rd33]; - fma.rn.f32 %f3, %f4, %f417, %f3; - fma.rn.f32 %f2, %f417, 0f00000000, %f2; - add.f32 %f1, %f1, %f417; - mov.u32 %r251, 1; - -BB4_10: - neg.s32 %r64, %r251; - and.b32 %r65, %r64, %r49; - add.s32 %r66, %r65, %r250; - add.s32 %r67, %r66, %r2; - mul.wide.s32 %rd34, %r67, 4; - add.s64 %rd35, %rd1, %rd34; - ld.global.f32 %f418, [%rd35]; - fma.rn.f32 %f3, %f4, %f418, %f3; - cvt.rn.f32.s32 %f419, %r251; - fma.rn.f32 %f2, %f419, %f418, %f2; - add.f32 %f1, %f1, %f418; - add.s32 %r252, %r251, 1; - -BB4_11: - mad.lo.s32 %r68, %r252, %r49, %r250; - add.s32 %r69, %r68, %r2; - mul.wide.s32 %rd36, %r69, 4; - add.s64 %rd37, %rd1, %rd36; - ld.global.f32 %f420, [%rd37]; - fma.rn.f32 %f2072, %f4, %f420, %f3; - cvt.rn.f32.s32 %f421, %r252; - fma.rn.f32 %f2071, %f421, %f420, %f2; - add.f32 %f2070, %f1, %f420; - add.s32 %r254, %r252, 1; - mov.f32 %f1, %f2070; - mov.f32 %f2, %f2071; - mov.f32 %f3, %f2072; - -BB4_12: - setp.lt.u32 %p18, %r49, 4; - @%p18 bra BB4_14; - -BB4_13: - mad.lo.s32 %r70, %r254, %r49, %r250; - add.s32 %r71, %r70, %r2; - mul.wide.s32 %rd38, %r71, 4; - add.s64 %rd39, %rd1, %rd38; - ld.global.f32 %f422, [%rd39]; - fma.rn.f32 %f423, %f4, %f422, %f2072; - cvt.rn.f32.s32 %f424, %r254; - fma.rn.f32 %f425, %f424, %f422, %f2071; - add.f32 %f426, %f2070, %f422; - cvt.s64.s32 %rd40, %r4; - add.s64 %rd41, %rd39, %rd40; - ld.global.f32 %f427, [%rd41]; - fma.rn.f32 %f428, %f4, %f427, %f423; - add.s32 %r72, %r254, 1; - cvt.rn.f32.s32 %f429, %r72; - fma.rn.f32 %f430, %f429, %f427, %f425; - add.f32 %f431, %f426, %f427; - add.s64 %rd42, %rd41, %rd40; - ld.global.f32 %f432, [%rd42]; - fma.rn.f32 %f433, %f4, %f432, %f428; - add.s32 %r73, %r254, 2; - cvt.rn.f32.s32 %f434, %r73; - fma.rn.f32 %f435, %f434, %f432, %f430; - add.f32 %f436, %f431, %f432; - add.s64 %rd43, %rd42, %rd40; - ld.global.f32 %f437, [%rd43]; - fma.rn.f32 %f2072, %f4, %f437, %f433; - add.s32 %r74, %r254, 3; - cvt.rn.f32.s32 %f438, %r74; - fma.rn.f32 %f2071, %f438, %f437, %f435; - add.f32 %f2070, %f436, %f437; - add.s32 %r254, %r254, 4; - setp.lt.s32 %p19, %r254, %r49; - mov.f32 %f1, %f2070; - mov.f32 %f2, %f2071; - mov.f32 %f3, %f2072; - @%p19 bra BB4_13; - -BB4_14: - add.s32 %r250, %r250, 1; - setp.lt.s32 %p20, %r250, %r49; - @%p20 bra BB4_3; - -BB4_15: - div.rn.f32 %f2152, %f3, %f1; - div.rn.f32 %f2151, %f2, %f1; - mov.f32 %f441, 0f3F000000; - div.rn.f32 %f442, %f441, %f407; - div.rn.f32 %f40, %f442, %f407; - mov.f32 %f2081, 0f51BA43B7; - mov.f32 %f2082, %f410; - @%p14 bra BB4_34; - - and.b32 %r14, %r49, 3; - mov.f32 %f2082, 0f00000000; - mov.u32 %r75, 0; - mov.f32 %f2081, 0f51BA43B7; - mov.u32 %r255, %r75; - -BB4_17: - mov.u32 %r256, %r75; - -BB4_18: - cvt.rn.f32.s32 %f447, %r256; - mul.f32 %f448, %f447, %f447; - mul.f32 %f45, %f40, %f448; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f446, 0f00000000; - mov.f32 %f2099, %f446; - mov.f32 %f2100, %f446; - mov.u32 %r257, %r75; - -BB4_19: - sub.s32 %r79, %r257, %r255; - cvt.rn.f32.s32 %f50, %r79; - mul.lo.s32 %r18, %r257, %r49; - setp.eq.s32 %p22, %r14, 0; - @%p22 bra BB4_20; - - setp.eq.s32 %p23, %r14, 1; - @%p23 bra BB4_24; - bra.uni BB4_22; - -BB4_24: - mul.f32 %f463, %f50, %f50; - mul.f32 %f2090, %f40, %f463; - neg.f32 %f464, %f2090; - mul.f32 %f465, %f2090, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f466, %f465; - mov.f32 %f467, 0fBF317200; - fma.rn.f32 %f468, %f466, %f467, %f464; - mov.f32 %f469, 0fB5BFBE8E; - fma.rn.f32 %f470, %f466, %f469, %f468; - mul.f32 %f471, %f470, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f472, %f471; - add.f32 %f473, %f466, 0f00000000; - ex2.approx.f32 %f474, %f473; - mul.f32 %f2089, %f472, %f474; - mov.u32 %r259, 0; - bra.uni BB4_27; - -BB4_20: - mov.f32 %f2093, %f2099; - mov.f32 %f2094, %f2100; - mov.u32 %r261, %r75; - mov.f32 %f2099, %f446; - mov.f32 %f2100, %f446; - bra.uni BB4_28; - -BB4_22: - setp.ne.s32 %p24, %r14, 2; - @%p24 bra BB4_25; - - mul.f32 %f451, %f50, %f50; - mul.f32 %f2090, %f40, %f451; - neg.f32 %f452, %f2090; - mul.f32 %f453, %f2090, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f454, %f453; - mov.f32 %f455, 0fBF317200; - fma.rn.f32 %f456, %f454, %f455, %f452; - mov.f32 %f457, 0fB5BFBE8E; - fma.rn.f32 %f458, %f454, %f457, %f456; - mul.f32 %f459, %f458, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f460, %f459; - add.f32 %f461, %f454, 0f00000000; - ex2.approx.f32 %f462, %f461; - mul.f32 %f2089, %f460, %f462; - mov.u32 %r258, 0; - bra.uni BB4_26; - -BB4_25: - setp.lt.f32 %p25, %f45, 0fC2D20000; - mul.f32 %f475, %f50, %f50; - mul.f32 %f2090, %f40, %f475; - neg.f32 %f476, %f2090; - mul.f32 %f477, %f2090, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f478, %f477; - mov.f32 %f479, 0fBF317200; - fma.rn.f32 %f480, %f478, %f479, %f476; - mov.f32 %f481, 0fB5BFBE8E; - fma.rn.f32 %f482, %f478, %f481, %f480; - mul.f32 %f483, %f482, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f484, %f483; - add.f32 %f485, %f478, 0f00000000; - ex2.approx.f32 %f486, %f485; - mul.f32 %f2089, %f484, %f486; - setp.gt.f32 %p26, %f2090, 0f42D20000; - selp.f32 %f487, 0f00000000, %f2089, %p26; - setp.lt.f32 %p27, %f2090, 0fC2D20000; - selp.f32 %f488, 0f7F800000, %f487, %p27; - cvt.rzi.f32.f32 %f489, %f47; - fma.rn.f32 %f490, %f489, %f479, %f46; - fma.rn.f32 %f491, %f489, %f481, %f490; - mul.f32 %f492, %f491, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f493, %f492; - add.f32 %f494, %f489, 0f00000000; - ex2.approx.f32 %f495, %f494; - mul.f32 %f496, %f493, %f495; - setp.gt.f32 %p28, %f45, 0f42D20000; - selp.f32 %f497, 0f00000000, %f496, %p28; - selp.f32 %f498, 0f7F800000, %f497, %p25; - mul.f32 %f499, %f488, %f498; - add.s32 %r83, %r18, %r2; - mul.wide.s32 %rd44, %r83, 4; - add.s64 %rd45, %rd1, %rd44; - ld.global.f32 %f500, [%rd45]; - fma.rn.f32 %f2100, %f500, %f499, %f2100; - add.f32 %f2099, %f2099, %f499; - mov.u32 %r258, 1; - -BB4_26: - sub.s32 %r84, %r256, %r258; - cvt.rn.f32.s32 %f501, %r84; - mul.f32 %f502, %f501, %f501; - setp.gt.f32 %p29, %f2090, 0f42D20000; - selp.f32 %f503, 0f00000000, %f2089, %p29; - setp.lt.f32 %p30, %f2090, 0fC2D20000; - selp.f32 %f504, 0f7F800000, %f503, %p30; - mul.f32 %f505, %f40, %f502; - neg.f32 %f506, %f505; - mul.f32 %f507, %f505, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f508, %f507; - mov.f32 %f509, 0fBF317200; - fma.rn.f32 %f510, %f508, %f509, %f506; - mov.f32 %f511, 0fB5BFBE8E; - fma.rn.f32 %f512, %f508, %f511, %f510; - mul.f32 %f513, %f512, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f514, %f513; - add.f32 %f515, %f508, 0f00000000; - ex2.approx.f32 %f516, %f515; - mul.f32 %f517, %f514, %f516; - setp.gt.f32 %p31, %f505, 0f42D20000; - selp.f32 %f518, 0f00000000, %f517, %p31; - setp.lt.f32 %p32, %f505, 0fC2D20000; - selp.f32 %f519, 0f7F800000, %f518, %p32; - mul.f32 %f520, %f504, %f519; - add.s32 %r85, %r258, %r18; - add.s32 %r86, %r85, %r2; - mul.wide.s32 %rd46, %r86, 4; - add.s64 %rd47, %rd1, %rd46; - ld.global.f32 %f521, [%rd47]; - fma.rn.f32 %f2100, %f521, %f520, %f2100; - add.f32 %f2099, %f2099, %f520; - add.s32 %r259, %r258, 1; - -BB4_27: - sub.s32 %r87, %r256, %r259; - cvt.rn.f32.s32 %f522, %r87; - mul.f32 %f523, %f522, %f522; - setp.gt.f32 %p33, %f2090, 0f42D20000; - selp.f32 %f524, 0f00000000, %f2089, %p33; - setp.lt.f32 %p34, %f2090, 0fC2D20000; - selp.f32 %f525, 0f7F800000, %f524, %p34; - mul.f32 %f526, %f40, %f523; - neg.f32 %f527, %f526; - mul.f32 %f528, %f526, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f529, %f528; - mov.f32 %f530, 0fBF317200; - fma.rn.f32 %f531, %f529, %f530, %f527; - mov.f32 %f532, 0fB5BFBE8E; - fma.rn.f32 %f533, %f529, %f532, %f531; - mul.f32 %f534, %f533, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f535, %f534; - add.f32 %f536, %f529, 0f00000000; - ex2.approx.f32 %f537, %f536; - mul.f32 %f538, %f535, %f537; - setp.gt.f32 %p35, %f526, 0f42D20000; - selp.f32 %f539, 0f00000000, %f538, %p35; - setp.lt.f32 %p36, %f526, 0fC2D20000; - selp.f32 %f540, 0f7F800000, %f539, %p36; - mul.f32 %f541, %f525, %f540; - add.s32 %r88, %r259, %r18; - add.s32 %r89, %r88, %r2; - mul.wide.s32 %rd48, %r89, 4; - add.s64 %rd49, %rd1, %rd48; - ld.global.f32 %f542, [%rd49]; - fma.rn.f32 %f2094, %f542, %f541, %f2100; - add.f32 %f2093, %f2099, %f541; - add.s32 %r261, %r259, 1; - mov.f32 %f2099, %f2093; - mov.f32 %f2100, %f2094; - -BB4_28: - setp.lt.u32 %p37, %r49, 4; - @%p37 bra BB4_31; - - mul.f32 %f543, %f50, %f50; - mul.f32 %f544, %f40, %f543; - neg.f32 %f545, %f544; - mul.f32 %f546, %f544, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f547, %f546; - mov.f32 %f548, 0fBF317200; - fma.rn.f32 %f549, %f547, %f548, %f545; - mov.f32 %f550, 0fB5BFBE8E; - fma.rn.f32 %f551, %f547, %f550, %f549; - mul.f32 %f552, %f551, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f553, %f552; - add.f32 %f554, %f547, 0f00000000; - ex2.approx.f32 %f555, %f554; - mul.f32 %f556, %f553, %f555; - setp.gt.f32 %p38, %f544, 0f42D20000; - selp.f32 %f557, 0f00000000, %f556, %p38; - setp.lt.f32 %p39, %f544, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f557, %p39; - mov.f32 %f2099, %f2093; - mov.f32 %f2100, %f2094; - -BB4_30: - sub.s32 %r90, %r256, %r261; - cvt.rn.f32.s32 %f558, %r90; - mul.f32 %f559, %f558, %f558; - mul.f32 %f560, %f40, %f559; - neg.f32 %f561, %f560; - mul.f32 %f562, %f560, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f563, %f562; - fma.rn.f32 %f565, %f563, %f548, %f561; - fma.rn.f32 %f567, %f563, %f550, %f565; - mul.f32 %f568, %f567, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f569, %f568; - add.f32 %f570, %f563, 0f00000000; - ex2.approx.f32 %f571, %f570; - mul.f32 %f572, %f569, %f571; - setp.gt.f32 %p40, %f560, 0f42D20000; - selp.f32 %f573, 0f00000000, %f572, %p40; - setp.lt.f32 %p41, %f560, 0fC2D20000; - selp.f32 %f574, 0f7F800000, %f573, %p41; - mul.f32 %f575, %f75, %f574; - add.s32 %r91, %r261, %r18; - add.s32 %r92, %r91, %r2; - mul.wide.s32 %rd50, %r92, 4; - add.s64 %rd51, %rd1, %rd50; - ld.global.f32 %f576, [%rd51]; - fma.rn.f32 %f577, %f576, %f575, %f2100; - add.f32 %f578, %f2099, %f575; - add.s32 %r93, %r261, 1; - sub.s32 %r94, %r256, %r93; - cvt.rn.f32.s32 %f579, %r94; - mul.f32 %f580, %f579, %f579; - mul.f32 %f581, %f40, %f580; - neg.f32 %f582, %f581; - mul.f32 %f583, %f581, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f584, %f583; - fma.rn.f32 %f585, %f584, %f548, %f582; - fma.rn.f32 %f586, %f584, %f550, %f585; - mul.f32 %f587, %f586, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f588, %f587; - add.f32 %f589, %f584, 0f00000000; - ex2.approx.f32 %f590, %f589; - mul.f32 %f591, %f588, %f590; - setp.gt.f32 %p42, %f581, 0f42D20000; - selp.f32 %f592, 0f00000000, %f591, %p42; - setp.lt.f32 %p43, %f581, 0fC2D20000; - selp.f32 %f593, 0f7F800000, %f592, %p43; - mul.f32 %f594, %f75, %f593; - ld.global.f32 %f595, [%rd51+4]; - fma.rn.f32 %f596, %f595, %f594, %f577; - add.f32 %f597, %f578, %f594; - add.s32 %r95, %r261, 2; - sub.s32 %r96, %r256, %r95; - cvt.rn.f32.s32 %f598, %r96; - mul.f32 %f599, %f598, %f598; - mul.f32 %f600, %f40, %f599; - neg.f32 %f601, %f600; - mul.f32 %f602, %f600, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f603, %f602; - fma.rn.f32 %f604, %f603, %f548, %f601; - fma.rn.f32 %f605, %f603, %f550, %f604; - mul.f32 %f606, %f605, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f607, %f606; - add.f32 %f608, %f603, 0f00000000; - ex2.approx.f32 %f609, %f608; - mul.f32 %f610, %f607, %f609; - setp.gt.f32 %p44, %f600, 0f42D20000; - selp.f32 %f611, 0f00000000, %f610, %p44; - setp.lt.f32 %p45, %f600, 0fC2D20000; - selp.f32 %f612, 0f7F800000, %f611, %p45; - mul.f32 %f613, %f75, %f612; - ld.global.f32 %f614, [%rd51+8]; - fma.rn.f32 %f615, %f614, %f613, %f596; - add.f32 %f616, %f597, %f613; - add.s32 %r97, %r261, 3; - sub.s32 %r98, %r256, %r97; - cvt.rn.f32.s32 %f617, %r98; - mul.f32 %f618, %f617, %f617; - mul.f32 %f619, %f40, %f618; - neg.f32 %f620, %f619; - mul.f32 %f621, %f619, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f622, %f621; - fma.rn.f32 %f623, %f622, %f548, %f620; - fma.rn.f32 %f624, %f622, %f550, %f623; - mul.f32 %f625, %f624, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f626, %f625; - add.f32 %f627, %f622, 0f00000000; - ex2.approx.f32 %f628, %f627; - mul.f32 %f629, %f626, %f628; - setp.gt.f32 %p46, %f619, 0f42D20000; - selp.f32 %f630, 0f00000000, %f629, %p46; - setp.lt.f32 %p47, %f619, 0fC2D20000; - selp.f32 %f631, 0f7F800000, %f630, %p47; - mul.f32 %f632, %f75, %f631; - ld.global.f32 %f633, [%rd51+12]; - fma.rn.f32 %f2100, %f633, %f632, %f615; - add.f32 %f2099, %f616, %f632; - add.s32 %r261, %r261, 4; - setp.lt.s32 %p48, %r261, %r49; - @%p48 bra BB4_30; - -BB4_31: - add.s32 %r257, %r257, 1; - setp.lt.s32 %p49, %r257, %r49; - @%p49 bra BB4_19; - - div.rn.f32 %f634, %f2100, %f2099; - max.f32 %f2082, %f2082, %f634; - min.f32 %f2081, %f2081, %f634; - add.s32 %r256, %r256, 1; - setp.lt.s32 %p50, %r256, %r49; - @%p50 bra BB4_18; - - add.s32 %r255, %r255, 1; - setp.lt.s32 %p51, %r255, %r49; - @%p51 bra BB4_17; - -BB4_34: - sub.f32 %f636, %f2082, %f2081; - add.f32 %f637, %f636, %f636; - fma.rn.f32 %f638, %f636, 0f40000000, %f637; - mul.f32 %f639, %f638, 0f40490FD8; - mul.f32 %f640, %f639, %f407; - mul.f32 %f641, %f640, %f407; - max.f32 %f2150, %f410, %f641; - setp.lt.s32 %p52, %r51, 1; - @%p52 bra BB4_88; - - cvt.rn.f32.s32 %f87, %r50; - mul.f32 %f644, %f407, %f407; - mul.f32 %f88, %f644, %f407; - mov.u32 %r262, 0; - cvta.to.global.u64 %rd52, %rd24; - cvta.to.global.u64 %rd58, %rd25; - -BB4_36: - mov.f32 %f2116, 0f00000000; - mov.f32 %f2117, %f2116; - mov.f32 %f2118, %f2116; - mov.f32 %f2119, %f2116; - mov.f32 %f2120, %f2116; - mov.f32 %f2121, %f2116; - mov.f32 %f2122, %f2116; - mov.f32 %f2123, %f2116; - @%p14 bra BB4_87; - - div.rn.f32 %f661, %f2150, 0fC0206C98; - div.rn.f32 %f94, %f661, %f407; - div.rn.f32 %f95, %f661, %f88; - mov.u32 %r263, 0; - mov.f32 %f2116, 0f00000000; - mov.f32 %f2117, %f2116; - mov.f32 %f2118, %f2116; - mov.f32 %f2119, %f2116; - mov.f32 %f2120, %f2116; - mov.f32 %f2121, %f2116; - mov.f32 %f2122, %f2116; - mov.f32 %f2123, %f2116; - -BB4_38: - mov.u32 %r264, 0; - cvt.rn.f32.s32 %f104, %r263; - sub.f32 %f105, %f104, %f2152; - add.f32 %f662, %f105, 0f3F800000; - sqrt.rn.f32 %f106, %f40; - mul.f32 %f107, %f662, %f106; - abs.f32 %f108, %f107; - mul.f32 %f109, %f107, %f107; - mul.f32 %f110, %f105, %f106; - abs.f32 %f111, %f110; - shl.b32 %r106, %r1, 1; - mul.wide.s32 %rd53, %r106, 4; - add.s64 %rd54, %rd52, %rd53; - ld.global.f32 %f113, [%rd54+4]; - ld.global.f32 %f114, [%rd54]; - add.f32 %f663, %f104, 0f3F800000; - sub.f32 %f115, %f663, %f2152; - div.rn.f32 %f116, %f115, %f407; - mov.f32 %f664, 0f3F800000; - cvt.rzi.f32.f32 %f665, %f664; - add.f32 %f666, %f665, %f665; - mov.f32 %f667, 0f40000000; - sub.f32 %f668, %f667, %f666; - abs.f32 %f117, %f668; - setp.eq.f32 %p54, %f117, 0f3F800000; - abs.f32 %f118, %f116; - setp.lt.f32 %p55, %f118, 0f00800000; - mul.f32 %f669, %f118, 0f4B800000; - selp.f32 %f670, 0fC3170000, 0fC2FE0000, %p55; - selp.f32 %f671, %f669, %f118, %p55; - mov.b32 %r107, %f671; - and.b32 %r108, %r107, 8388607; - or.b32 %r109, %r108, 1065353216; - mov.b32 %f672, %r109; - shr.u32 %r110, %r107, 23; - cvt.rn.f32.u32 %f673, %r110; - add.f32 %f674, %f670, %f673; - setp.gt.f32 %p56, %f672, 0f3FB504F3; - mul.f32 %f675, %f672, 0f3F000000; - add.f32 %f676, %f674, 0f3F800000; - selp.f32 %f677, %f675, %f672, %p56; - selp.f32 %f678, %f676, %f674, %p56; - add.f32 %f119, %f677, 0fBF800000; - add.f32 %f120, %f677, 0f3F800000; - add.f32 %f121, %f119, %f119; - mov.f32 %f679, 0f3F317200; - mul.rn.f32 %f122, %f678, %f679; - mov.f32 %f680, 0f35BFBE8E; - mul.rn.f32 %f123, %f678, %f680; - setp.lt.f32 %p57, %f116, 0f00000000; - and.pred %p1, %p57, %p54; - div.rn.f32 %f124, %f105, %f407; - abs.f32 %f125, %f124; - setp.lt.f32 %p58, %f125, 0f00800000; - mul.f32 %f681, %f125, 0f4B800000; - selp.f32 %f682, 0fC3170000, 0fC2FE0000, %p58; - selp.f32 %f683, %f681, %f125, %p58; - mov.b32 %r111, %f683; - and.b32 %r112, %r111, 8388607; - or.b32 %r113, %r112, 1065353216; - mov.b32 %f684, %r113; - shr.u32 %r114, %r111, 23; - cvt.rn.f32.u32 %f685, %r114; - add.f32 %f686, %f682, %f685; - setp.gt.f32 %p59, %f684, 0f3FB504F3; - mul.f32 %f687, %f684, 0f3F000000; - add.f32 %f688, %f686, 0f3F800000; - selp.f32 %f689, %f687, %f684, %p59; - selp.f32 %f690, %f688, %f686, %p59; - add.f32 %f126, %f689, 0fBF800000; - add.f32 %f127, %f689, 0f3F800000; - add.f32 %f128, %f126, %f126; - mul.rn.f32 %f129, %f690, %f679; - mul.rn.f32 %f130, %f690, %f680; - setp.lt.f32 %p60, %f124, 0f00000000; - and.pred %p2, %p60, %p54; - -BB4_39: - setp.ltu.f32 %p61, %f108, 0f3F800000; - @%p61 bra BB4_41; - bra.uni BB4_40; - -BB4_41: - mov.f32 %f709, 0f3BA0C9F8; - mov.f32 %f710, 0fBA1268FB; - fma.rn.f32 %f711, %f710, %f109, %f709; - mov.f32 %f712, 0fBCDABFD4; - fma.rn.f32 %f713, %f711, %f109, %f712; - mov.f32 %f714, 0f3DE70331; - fma.rn.f32 %f715, %f713, %f109, %f714; - mov.f32 %f716, 0fBEC09330; - fma.rn.f32 %f717, %f715, %f109, %f716; - mov.f32 %f718, 0f3F906EBA; - fma.rn.f32 %f719, %f717, %f109, %f718; - mul.f32 %f2124, %f107, %f719; - bra.uni BB4_42; - -BB4_40: - mov.f32 %f1941, 0f3F800000; - setp.ltu.f32 %p62, %f108, 0f407AD445; - mov.f32 %f691, 0f3A03BB71; - mov.f32 %f692, 0fB7B730FB; - fma.rn.f32 %f693, %f692, %f108, %f691; - mov.f32 %f694, 0fBBACA3B3; - fma.rn.f32 %f695, %f693, %f108, %f694; - mov.f32 %f696, 0f3D0A7445; - fma.rn.f32 %f697, %f695, %f108, %f696; - mov.f32 %f698, 0fBE1B3B75; - fma.rn.f32 %f699, %f697, %f108, %f698; - mov.f32 %f700, 0fBF6B385A; - fma.rn.f32 %f701, %f699, %f108, %f700; - mov.f32 %f702, 0fBFD0316E; - fma.rn.f32 %f703, %f701, %f108, %f702; - mov.f32 %f704, 0fBA031CCE; - fma.rn.f32 %f705, %f703, %f108, %f704; - ex2.approx.ftz.f32 %f706, %f705; - sub.f32 %f708, %f1941, %f706; - mov.b32 %r115, %f708; - selp.b32 %r116, %r115, 1065353216, %p62; - mov.b32 %r117, %f107; - and.b32 %r118, %r117, -2147483648; - or.b32 %r119, %r116, %r118; - mov.b32 %f2124, %r119; - -BB4_42: - setp.ltu.f32 %p63, %f111, 0f3F800000; - @%p63 bra BB4_44; - bra.uni BB4_43; - -BB4_44: - cvt.rn.f32.s32 %f1977, %r263; - sub.f32 %f1976, %f1977, %f2152; - mul.f32 %f1975, %f1976, %f106; - mul.f32 %f1974, %f1975, %f1975; - mov.f32 %f738, 0f3BA0C9F8; - mov.f32 %f739, 0fBA1268FB; - fma.rn.f32 %f740, %f739, %f1974, %f738; - mov.f32 %f741, 0fBCDABFD4; - fma.rn.f32 %f742, %f740, %f1974, %f741; - mov.f32 %f743, 0f3DE70331; - fma.rn.f32 %f744, %f742, %f1974, %f743; - mov.f32 %f745, 0fBEC09330; - fma.rn.f32 %f746, %f744, %f1974, %f745; - mov.f32 %f747, 0f3F906EBA; - fma.rn.f32 %f748, %f746, %f1974, %f747; - mul.f32 %f2125, %f1975, %f748; - bra.uni BB4_45; - -BB4_43: - cvt.rn.f32.s32 %f1985, %r263; - sub.f32 %f1984, %f1985, %f2152; - mul.f32 %f1983, %f1984, %f106; - mov.f32 %f1942, 0f3F800000; - setp.ltu.f32 %p64, %f111, 0f407AD445; - mov.f32 %f720, 0f3A03BB71; - mov.f32 %f721, 0fB7B730FB; - fma.rn.f32 %f722, %f721, %f111, %f720; - mov.f32 %f723, 0fBBACA3B3; - fma.rn.f32 %f724, %f722, %f111, %f723; - mov.f32 %f725, 0f3D0A7445; - fma.rn.f32 %f726, %f724, %f111, %f725; - mov.f32 %f727, 0fBE1B3B75; - fma.rn.f32 %f728, %f726, %f111, %f727; - mov.f32 %f729, 0fBF6B385A; - fma.rn.f32 %f730, %f728, %f111, %f729; - mov.f32 %f731, 0fBFD0316E; - fma.rn.f32 %f732, %f730, %f111, %f731; - mov.f32 %f733, 0fBA031CCE; - fma.rn.f32 %f734, %f732, %f111, %f733; - ex2.approx.ftz.f32 %f735, %f734; - sub.f32 %f737, %f1942, %f735; - mov.b32 %r120, %f737; - selp.b32 %r121, %r120, 1065353216, %p64; - mov.b32 %r122, %f1983; - and.b32 %r123, %r122, -2147483648; - or.b32 %r124, %r121, %r123; - mov.b32 %f2125, %r124; - -BB4_45: - sub.f32 %f749, %f2124, %f2125; - mul.f32 %f145, %f749, 0f3F000000; - cvt.rn.f32.s32 %f146, %r264; - sub.f32 %f147, %f146, %f2151; - add.f32 %f750, %f147, 0f3F800000; - mul.f32 %f148, %f750, %f106; - abs.f32 %f149, %f148; - setp.ltu.f32 %p65, %f149, 0f3F800000; - @%p65 bra BB4_47; - bra.uni BB4_46; - -BB4_47: - mul.f32 %f769, %f148, %f148; - mov.f32 %f770, 0f3BA0C9F8; - mov.f32 %f771, 0fBA1268FB; - fma.rn.f32 %f772, %f771, %f769, %f770; - mov.f32 %f773, 0fBCDABFD4; - fma.rn.f32 %f774, %f772, %f769, %f773; - mov.f32 %f775, 0f3DE70331; - fma.rn.f32 %f776, %f774, %f769, %f775; - mov.f32 %f777, 0fBEC09330; - fma.rn.f32 %f778, %f776, %f769, %f777; - mov.f32 %f779, 0f3F906EBA; - fma.rn.f32 %f780, %f778, %f769, %f779; - mul.f32 %f2126, %f148, %f780; - bra.uni BB4_48; - -BB4_46: - mov.f32 %f1943, 0f3F800000; - mov.f32 %f751, 0f3A03BB71; - mov.f32 %f752, 0fB7B730FB; - fma.rn.f32 %f753, %f752, %f149, %f751; - mov.f32 %f754, 0fBBACA3B3; - fma.rn.f32 %f755, %f753, %f149, %f754; - mov.f32 %f756, 0f3D0A7445; - fma.rn.f32 %f757, %f755, %f149, %f756; - mov.f32 %f758, 0fBE1B3B75; - fma.rn.f32 %f759, %f757, %f149, %f758; - mov.f32 %f760, 0fBF6B385A; - fma.rn.f32 %f761, %f759, %f149, %f760; - mov.f32 %f762, 0fBFD0316E; - fma.rn.f32 %f763, %f761, %f149, %f762; - mov.f32 %f764, 0fBA031CCE; - fma.rn.f32 %f765, %f763, %f149, %f764; - ex2.approx.ftz.f32 %f766, %f765; - sub.f32 %f768, %f1943, %f766; - mov.b32 %r125, %f768; - setp.ltu.f32 %p66, %f149, 0f407AD445; - selp.b32 %r126, %r125, 1065353216, %p66; - mov.b32 %r127, %f148; - and.b32 %r128, %r127, -2147483648; - or.b32 %r129, %r126, %r128; - mov.b32 %f2126, %r129; - -BB4_48: - cvt.rn.f32.s32 %f1945, %r264; - sub.f32 %f1944, %f1945, %f2151; - mul.f32 %f153, %f1944, %f106; - abs.f32 %f154, %f153; - setp.ltu.f32 %p67, %f154, 0f3F800000; - @%p67 bra BB4_50; - bra.uni BB4_49; - -BB4_50: - mul.f32 %f799, %f153, %f153; - mov.f32 %f800, 0f3BA0C9F8; - mov.f32 %f801, 0fBA1268FB; - fma.rn.f32 %f802, %f801, %f799, %f800; - mov.f32 %f803, 0fBCDABFD4; - fma.rn.f32 %f804, %f802, %f799, %f803; - mov.f32 %f805, 0f3DE70331; - fma.rn.f32 %f806, %f804, %f799, %f805; - mov.f32 %f807, 0fBEC09330; - fma.rn.f32 %f808, %f806, %f799, %f807; - mov.f32 %f809, 0f3F906EBA; - fma.rn.f32 %f810, %f808, %f799, %f809; - mul.f32 %f2127, %f153, %f810; - bra.uni BB4_51; - -BB4_49: - mov.f32 %f1946, 0f3F800000; - mov.f32 %f781, 0f3A03BB71; - mov.f32 %f782, 0fB7B730FB; - fma.rn.f32 %f783, %f782, %f154, %f781; - mov.f32 %f784, 0fBBACA3B3; - fma.rn.f32 %f785, %f783, %f154, %f784; - mov.f32 %f786, 0f3D0A7445; - fma.rn.f32 %f787, %f785, %f154, %f786; - mov.f32 %f788, 0fBE1B3B75; - fma.rn.f32 %f789, %f787, %f154, %f788; - mov.f32 %f790, 0fBF6B385A; - fma.rn.f32 %f791, %f789, %f154, %f790; - mov.f32 %f792, 0fBFD0316E; - fma.rn.f32 %f793, %f791, %f154, %f792; - mov.f32 %f794, 0fBA031CCE; - fma.rn.f32 %f795, %f793, %f154, %f794; - ex2.approx.ftz.f32 %f796, %f795; - sub.f32 %f798, %f1946, %f796; - mov.b32 %r130, %f798; - setp.ltu.f32 %p68, %f154, 0f407AD445; - selp.b32 %r131, %r130, 1065353216, %p68; - mov.b32 %r132, %f153; - and.b32 %r133, %r132, -2147483648; - or.b32 %r134, %r131, %r133; - mov.b32 %f2127, %r134; - -BB4_51: - mov.f32 %f1948, 0f40000000; - cvt.rn.f32.s32 %f1947, %r263; - sub.f32 %f813, %f2126, %f2127; - mul.f32 %f158, %f813, 0f3F000000; - mul.f32 %f814, %f145, %f2150; - fma.rn.f32 %f159, %f158, %f814, %f2081; - mad.lo.s32 %r135, %r264, %r49, %r263; - add.s32 %r136, %r135, %r2; - mul.wide.s32 %rd56, %r136, 4; - add.s64 %rd57, %rd1, %rd56; - ld.global.f32 %f160, [%rd57]; - add.f32 %f815, %f146, %f113; - fma.rn.f32 %f816, %f87, %f815, %f114; - add.f32 %f817, %f1947, %f816; - cvt.rzi.s32.f32 %r137, %f817; - mul.wide.s32 %rd59, %r137, 4; - add.s64 %rd60, %rd58, %rd59; - ld.global.f32 %f2148, [%rd60]; - // inline asm - rcp.approx.ftz.f32 %f811,%f120; - // inline asm - mul.f32 %f818, %f811, %f121; - mul.f32 %f819, %f818, %f818; - mov.f32 %f820, 0f3C4CAF63; - mov.f32 %f821, 0f3B18F0FE; - fma.rn.f32 %f822, %f821, %f819, %f820; - mov.f32 %f823, 0f3DAAAABD; - fma.rn.f32 %f824, %f822, %f819, %f823; - mul.rn.f32 %f825, %f824, %f819; - mul.rn.f32 %f826, %f825, %f818; - sub.f32 %f827, %f119, %f818; - neg.f32 %f828, %f818; - add.f32 %f829, %f827, %f827; - fma.rn.f32 %f830, %f828, %f119, %f829; - mul.rn.f32 %f831, %f811, %f830; - add.f32 %f832, %f826, %f818; - sub.f32 %f833, %f818, %f832; - add.f32 %f834, %f826, %f833; - add.f32 %f835, %f831, %f834; - add.f32 %f836, %f832, %f835; - sub.f32 %f837, %f832, %f836; - add.f32 %f838, %f835, %f837; - add.f32 %f839, %f122, %f836; - sub.f32 %f840, %f122, %f839; + .reg .pred %p<382>; + .reg .f32 %f<1829>; + .reg .b32 %r<526>; + .reg .f64 %fd<359>; + .reg .b64 %rd<56>; + + + ld.param.u64 %rd11, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_0]; + ld.param.u64 %rd6, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_1]; + ld.param.u64 %rd7, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_2]; + ld.param.f32 %f325, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_3]; + ld.param.u32 %r85, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_4]; + ld.param.u32 %r88, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_10]; + cvta.to.global.u64 %rd1, %rd11; + mov.u32 %r89, %ntid.x; + mov.u32 %r90, %ctaid.x; + mov.u32 %r91, %tid.x; + mad.lo.s32 %r1, %r90, %r89, %r91; + setp.ge.s32 %p19, %r1, %r88; + @%p19 bra $L__BB4_261; + + mul.lo.s32 %r92, %r85, %r85; + mul.lo.s32 %r2, %r92, %r1; + setp.lt.s32 %p20, %r85, 1; + mov.f32 %f1721, 0f00000000; + mov.f32 %f1712, %f1721; + mov.f32 %f1713, %f1721; + mov.f32 %f1714, %f1721; + @%p20 bra $L__BB4_11; + + add.s32 %r3, %r85, -1; + and.b32 %r4, %r85, 3; + sub.s32 %r5, %r85, %r4; + shl.b32 %r6, %r85, 2; + mov.u32 %r93, 0; + setp.lt.u32 %p21, %r3, 3; + setp.eq.s32 %p23, %r4, 0; + setp.eq.s32 %p24, %r4, 1; + setp.eq.s32 %p25, %r4, 2; + cvt.s64.s32 %rd14, %r6; + mov.u32 %r511, %r93; + +$L__BB4_3: + cvt.rn.f32.s32 %f4, %r511; + mov.u32 %r514, %r93; + @%p21 bra $L__BB4_6; + + mov.u32 %r514, %r93; + mov.u32 %r513, %r5; + +$L__BB4_5: + mad.lo.s32 %r96, %r514, %r85, %r511; + add.s32 %r97, %r96, %r2; + mul.wide.s32 %rd12, %r97, 4; + add.s64 %rd13, %rd1, %rd12; + ld.global.f32 %f333, [%rd13]; + fma.rn.f32 %f334, %f333, %f4, %f1712; + cvt.rn.f32.s32 %f335, %r514; + fma.rn.f32 %f336, %f333, %f335, %f1713; + add.f32 %f337, %f1714, %f333; + add.s64 %rd15, %rd13, %rd14; + ld.global.f32 %f338, [%rd15]; + fma.rn.f32 %f339, %f338, %f4, %f334; + add.s32 %r98, %r514, 1; + cvt.rn.f32.s32 %f340, %r98; + fma.rn.f32 %f341, %f338, %f340, %f336; + add.f32 %f342, %f337, %f338; + add.s64 %rd16, %rd15, %rd14; + ld.global.f32 %f343, [%rd16]; + fma.rn.f32 %f344, %f343, %f4, %f339; + add.s32 %r99, %r514, 2; + cvt.rn.f32.s32 %f345, %r99; + fma.rn.f32 %f346, %f343, %f345, %f341; + add.f32 %f347, %f342, %f343; + add.s64 %rd17, %rd16, %rd14; + ld.global.f32 %f348, [%rd17]; + fma.rn.f32 %f1712, %f348, %f4, %f344; + add.s32 %r100, %r514, 3; + cvt.rn.f32.s32 %f349, %r100; + fma.rn.f32 %f1713, %f348, %f349, %f346; + add.f32 %f1714, %f347, %f348; + add.s32 %r514, %r514, 4; + add.s32 %r513, %r513, -4; + setp.ne.s32 %p22, %r513, 0; + @%p22 bra $L__BB4_5; + +$L__BB4_6: + @%p23 bra $L__BB4_10; + + mad.lo.s32 %r13, %r514, %r85, %r511; + add.s32 %r101, %r13, %r2; + mul.wide.s32 %rd18, %r101, 4; + add.s64 %rd19, %rd1, %rd18; + ld.global.f32 %f350, [%rd19]; + fma.rn.f32 %f1712, %f350, %f4, %f1712; + cvt.rn.f32.s32 %f351, %r514; + fma.rn.f32 %f1713, %f350, %f351, %f1713; + add.f32 %f1714, %f1714, %f350; + @%p24 bra $L__BB4_10; + + add.s32 %r14, %r13, %r85; + add.s32 %r102, %r14, %r2; + mul.wide.s32 %rd20, %r102, 4; + add.s64 %rd21, %rd1, %rd20; + ld.global.f32 %f352, [%rd21]; + fma.rn.f32 %f1712, %f352, %f4, %f1712; + add.s32 %r103, %r514, 1; + cvt.rn.f32.s32 %f353, %r103; + fma.rn.f32 %f1713, %f352, %f353, %f1713; + add.f32 %f1714, %f1714, %f352; + @%p25 bra $L__BB4_10; + + add.s32 %r104, %r514, 2; + add.s32 %r105, %r14, %r85; + add.s32 %r106, %r105, %r2; + mul.wide.s32 %rd22, %r106, 4; + add.s64 %rd23, %rd1, %rd22; + ld.global.f32 %f354, [%rd23]; + fma.rn.f32 %f1712, %f354, %f4, %f1712; + cvt.rn.f32.s32 %f355, %r104; + fma.rn.f32 %f1713, %f354, %f355, %f1713; + add.f32 %f1714, %f1714, %f354; + +$L__BB4_10: + add.s32 %r511, %r511, 1; + setp.lt.s32 %p26, %r511, %r85; + @%p26 bra $L__BB4_3; + +$L__BB4_11: + div.rn.f32 %f1777, %f1712, %f1714; + div.rn.f32 %f1776, %f1713, %f1714; + mov.f32 %f358, 0f3F000000; + div.rn.f32 %f359, %f358, %f325; + div.rn.f32 %f34, %f359, %f325; + mov.f32 %f1774, 0f51BA43B7; + @%p20 bra $L__BB4_51; + + cvt.f64.f32 %fd1, %f34; + mov.f64 %fd128, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd128; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p28, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p28; + mov.u32 %r107, 0; + or.b32 %r20, %r19, -2147483648; + mul.wide.s32 %rd24, %r2, 4; + add.s64 %rd2, %rd1, %rd24; + setp.eq.s32 %p30, %r17, 1062207488; + setp.lt.s32 %p31, %r16, 0; + setp.ne.s32 %p36, %r18, 1071644672; + setp.eq.s32 %p63, %r18, 2146435072; + mov.u32 %r515, %r107; + +$L__BB4_13: + mov.u32 %r516, %r107; + +$L__BB4_14: + mov.u32 %r110, 1; + sub.s32 %r24, %r110, %r516; + mov.f32 %f1724, 0f00000000; + mov.f32 %f1725, %f1724; + mov.u32 %r517, %r107; + +$L__BB4_15: + add.s32 %r519, %r516, -1; + sub.s32 %r26, %r517, %r515; + cvt.rn.f32.s32 %f364, %r26; + cvt.f64.f32 %fd2, %f364; + { + .reg .b32 %temp; + mov.b64 {%temp, %r27}, %fd2; + } + abs.f64 %fd129, %fd2; + { // callseq 87, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd129; + .param .b64 param1; + st.param.f64 [param1+0], %fd128; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 87 + setp.lt.s32 %p29, %r27, 0; + and.pred %p1, %p29, %p30; + selp.b32 %r112, %r27, 0, %p30; + or.b32 %r113, %r112, 2146435072; + selp.b32 %r28, %r113, %r112, %p31; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r114}, %fd4; + } + and.b32 %r29, %r114, 2146435072; + setp.ne.s32 %p32, %r29, 2146435072; + setp.gtu.f64 %p33, %fd129, 0d7FF0000000000000; + setp.gt.f64 %p34, %fd129, 0d3FF0000000000000; + selp.b32 %r115, 2146435072, 0, %p34; + xor.b32 %r116, %r115, 2146435072; + selp.b32 %r117, %r116, %r115, %p31; + setp.eq.s32 %p35, %r26, -1; + selp.b32 %r30, 1072693248, %r117, %p35; + and.b32 %r31, %r27, 2147483647; + and.pred %p37, %p36, %p1; + selp.b32 %r32, %r20, %r19, %p37; + or.pred %p2, %p32, %p33; + mul.lo.s32 %r118, %r85, %r517; + mul.wide.s32 %rd25, %r118, 4; + add.s64 %rd55, %rd2, %rd25; + mov.u32 %r518, %r24; + mov.u32 %r520, %r107; + +$L__BB4_16: + not.pred %p38, %p1; + mov.f64 %fd327, %fd3; + @%p38 bra $L__BB4_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r119}, %fd3; + } + xor.b32 %r120, %r119, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r121, %temp}, %fd3; + } + mov.b64 %fd327, {%r121, %r120}; + +$L__BB4_18: + setp.eq.s32 %p39, %r26, 0; + @%p39 bra $L__BB4_22; + + setp.gt.s32 %p40, %r27, -1; + @%p40 bra $L__BB4_23; + + cvt.rzi.f64.f64 %fd132, %fd128; + setp.eq.f64 %p41, %fd132, 0d4000000000000000; + @%p41 bra $L__BB4_23; + + mov.f64 %fd327, 0dFFF8000000000000; + bra.uni $L__BB4_23; + +$L__BB4_22: + mov.u32 %r122, 0; + mov.b64 %fd327, {%r122, %r28}; + +$L__BB4_23: + selp.f64 %fd328, %fd327, %fd4, %p32; + @%p2 bra $L__BB4_28; + + { + .reg .b32 %temp; + mov.b64 {%r123, %temp}, %fd128; + } + setp.eq.s32 %p44, %r123, 0; + and.pred %p45, %p63, %p44; + @%p45 bra $L__BB4_27; + bra.uni $L__BB4_25; + +$L__BB4_27: + mov.u32 %r126, 0; + mov.b64 %fd328, {%r126, %r30}; + bra.uni $L__BB4_28; + +$L__BB4_25: + setp.ne.s32 %p46, %r31, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r124, %temp}, %fd2; + } + setp.ne.s32 %p47, %r124, 0; + or.pred %p48, %p46, %p47; + mov.f64 %fd328, %fd327; + @%p48 bra $L__BB4_28; + + mov.u32 %r125, 0; + mov.b64 %fd328, {%r125, %r32}; + +$L__BB4_28: + setp.eq.s32 %p49, %r26, 1; + selp.f64 %fd135, 0d3FF0000000000000, %fd328, %p49; + mov.f64 %fd136, 0d3FF0000000000000; + mul.f64 %fd13, %fd135, %fd1; + neg.f64 %fd137, %fd13; + mov.f64 %fd138, 0d4338000000000000; + mov.f64 %fd139, 0d3FF71547652B82FE; + fma.rn.f64 %fd140, %fd137, %fd139, %fd138; + { + .reg .b32 %temp; + mov.b64 {%r36, %temp}, %fd140; + } + mov.f64 %fd141, 0dC338000000000000; + add.rn.f64 %fd142, %fd140, %fd141; + mov.f64 %fd143, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd144, %fd142, %fd143, %fd137; + mov.f64 %fd145, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd146, %fd142, %fd145, %fd144; + mov.f64 %fd147, 0d3E928AF3FCA213EA; + mov.f64 %fd148, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd149, %fd148, %fd146, %fd147; + mov.f64 %fd150, 0d3EC71DEE62401315; + fma.rn.f64 %fd151, %fd149, %fd146, %fd150; + mov.f64 %fd152, 0d3EFA01997C89EB71; + fma.rn.f64 %fd153, %fd151, %fd146, %fd152; + mov.f64 %fd154, 0d3F2A01A014761F65; + fma.rn.f64 %fd155, %fd153, %fd146, %fd154; + mov.f64 %fd156, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd157, %fd155, %fd146, %fd156; + mov.f64 %fd158, 0d3F81111111122322; + fma.rn.f64 %fd159, %fd157, %fd146, %fd158; + mov.f64 %fd160, 0d3FA55555555502A1; + fma.rn.f64 %fd161, %fd159, %fd146, %fd160; + mov.f64 %fd162, 0d3FC5555555555511; + fma.rn.f64 %fd163, %fd161, %fd146, %fd162; + mov.f64 %fd164, 0d3FE000000000000B; + fma.rn.f64 %fd165, %fd163, %fd146, %fd164; + fma.rn.f64 %fd166, %fd165, %fd146, %fd136; + fma.rn.f64 %fd167, %fd166, %fd146, %fd136; + { + .reg .b32 %temp; + mov.b64 {%r37, %temp}, %fd167; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r38}, %fd167; + } + shl.b32 %r127, %r36, 20; + add.s32 %r128, %r38, %r127; + mov.b64 %fd329, {%r37, %r128}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r129}, %fd137; + } + mov.b32 %f365, %r129; + abs.f32 %f43, %f365; + setp.lt.f32 %p50, %f43, 0f4086232B; + @%p50 bra $L__BB4_31; + + setp.gt.f64 %p51, %fd13, 0d8000000000000000; + mov.f64 %fd168, 0d7FF0000000000000; + sub.f64 %fd169, %fd168, %fd13; + selp.f64 %fd329, 0d0000000000000000, %fd169, %p51; + setp.geu.f32 %p52, %f43, 0f40874800; + @%p52 bra $L__BB4_31; + + shr.u32 %r130, %r36, 31; + add.s32 %r131, %r36, %r130; + shr.s32 %r132, %r131, 1; + shl.b32 %r133, %r132, 20; + add.s32 %r134, %r38, %r133; + mov.b64 %fd170, {%r37, %r134}; + sub.s32 %r135, %r36, %r132; + shl.b32 %r136, %r135, 20; + add.s32 %r137, %r136, 1072693248; + mov.u32 %r138, 0; + mov.b64 %fd171, {%r138, %r137}; + mul.f64 %fd329, %fd170, %fd171; + +$L__BB4_31: + add.s32 %r139, %r519, 1; + cvt.rn.f32.s32 %f366, %r139; + cvt.f64.f32 %fd18, %f366; + { + .reg .b32 %temp; + mov.b64 {%temp, %r39}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 88, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd128; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd331, [retval0+0]; + } // callseq 88 + setp.lt.s32 %p53, %r39, 0; + and.pred %p3, %p53, %p30; + not.pred %p55, %p3; + @%p55 bra $L__BB4_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r140}, %fd331; + } + xor.b32 %r141, %r140, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r142, %temp}, %fd331; + } + mov.b64 %fd331, {%r142, %r141}; + +$L__BB4_33: + setp.eq.s32 %p56, %r518, 1; + @%p56 bra $L__BB4_37; + bra.uni $L__BB4_34; + +$L__BB4_37: + mov.u32 %r143, 0; + selp.b32 %r144, %r39, 0, %p30; + or.b32 %r145, %r144, 2146435072; + selp.b32 %r146, %r145, %r144, %p31; + mov.b64 %fd331, {%r143, %r146}; + bra.uni $L__BB4_38; + +$L__BB4_34: + setp.gt.s32 %p57, %r39, -1; + @%p57 bra $L__BB4_38; + + cvt.rzi.f64.f64 %fd174, %fd128; + setp.eq.f64 %p58, %fd174, 0d4000000000000000; + @%p58 bra $L__BB4_38; + + mov.f64 %fd331, 0dFFF8000000000000; + +$L__BB4_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r147}, %fd25; + } + and.b32 %r148, %r147, 2146435072; + setp.ne.s32 %p61, %r148, 2146435072; + mov.f64 %fd332, %fd331; + @%p61 bra $L__BB4_44; + + setp.gtu.f64 %p62, %fd19, 0d7FF0000000000000; + mov.f64 %fd332, %fd25; + @%p62 bra $L__BB4_44; + + { + .reg .b32 %temp; + mov.b64 {%r149, %temp}, %fd128; + } + setp.eq.s32 %p64, %r149, 0; + and.pred %p65, %p63, %p64; + @%p65 bra $L__BB4_43; + bra.uni $L__BB4_41; + +$L__BB4_43: + mov.u32 %r154, 0; + setp.gt.f64 %p72, %fd19, 0d3FF0000000000000; + selp.b32 %r155, 2146435072, 0, %p72; + xor.b32 %r156, %r155, 2146435072; + selp.b32 %r157, %r156, %r155, %p31; + setp.eq.s32 %p73, %r519, -2; + selp.b32 %r158, 1072693248, %r157, %p73; + mov.b64 %fd332, {%r154, %r158}; + bra.uni $L__BB4_44; + +$L__BB4_41: + { + .reg .b32 %temp; + mov.b64 {%r150, %temp}, %fd18; + } + and.b32 %r151, %r39, 2147483647; + setp.ne.s32 %p66, %r151, 2146435072; + setp.ne.s32 %p67, %r150, 0; + or.pred %p68, %p66, %p67; + mov.f64 %fd332, %fd331; + @%p68 bra $L__BB4_44; + + and.pred %p70, %p36, %p3; + selp.b32 %r152, %r20, %r19, %p70; + mov.u32 %r153, 0; + mov.b64 %fd332, {%r153, %r152}; + +$L__BB4_44: + mov.f64 %fd325, 0d3FF0000000000000; + mov.f64 %fd324, 0d3FE000000000000B; + mov.f64 %fd323, 0d3FC5555555555511; + mov.f64 %fd322, 0d3FA55555555502A1; + mov.f64 %fd321, 0d3F81111111122322; + mov.f64 %fd320, 0d3F56C16C1852B7AF; + mov.f64 %fd319, 0d3F2A01A014761F65; + mov.f64 %fd318, 0d3EFA01997C89EB71; + mov.f64 %fd317, 0d3EC71DEE62401315; + mov.f64 %fd316, 0d3E928AF3FCA213EA; + mov.f64 %fd315, 0d3E5ADE1569CE2BDF; + mov.f64 %fd314, 0dBC7ABC9E3B39803F; + mov.f64 %fd313, 0dBFE62E42FEFA39EF; + mov.f64 %fd312, 0dC338000000000000; + mov.f64 %fd311, 0d4338000000000000; + mov.f64 %fd310, 0d3FF71547652B82FE; + setp.eq.s32 %p74, %r519, 0; + selp.f64 %fd177, 0d3FF0000000000000, %fd332, %p74; + mul.f64 %fd29, %fd177, %fd1; + neg.f64 %fd179, %fd29; + fma.rn.f64 %fd182, %fd179, %fd310, %fd311; + { + .reg .b32 %temp; + mov.b64 {%r40, %temp}, %fd182; + } + add.rn.f64 %fd184, %fd182, %fd312; + fma.rn.f64 %fd186, %fd184, %fd313, %fd179; + fma.rn.f64 %fd188, %fd184, %fd314, %fd186; + fma.rn.f64 %fd191, %fd315, %fd188, %fd316; + fma.rn.f64 %fd193, %fd191, %fd188, %fd317; + fma.rn.f64 %fd195, %fd193, %fd188, %fd318; + fma.rn.f64 %fd197, %fd195, %fd188, %fd319; + fma.rn.f64 %fd199, %fd197, %fd188, %fd320; + fma.rn.f64 %fd201, %fd199, %fd188, %fd321; + fma.rn.f64 %fd203, %fd201, %fd188, %fd322; + fma.rn.f64 %fd205, %fd203, %fd188, %fd323; + fma.rn.f64 %fd207, %fd205, %fd188, %fd324; + fma.rn.f64 %fd208, %fd207, %fd188, %fd325; + fma.rn.f64 %fd209, %fd208, %fd188, %fd325; + { + .reg .b32 %temp; + mov.b64 {%r41, %temp}, %fd209; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r42}, %fd209; + } + shl.b32 %r159, %r40, 20; + add.s32 %r160, %r42, %r159; + mov.b64 %fd333, {%r41, %r160}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r161}, %fd179; + } + mov.b32 %f367, %r161; + abs.f32 %f44, %f367; + setp.lt.f32 %p75, %f44, 0f4086232B; + @%p75 bra $L__BB4_47; + + setp.gt.f64 %p76, %fd29, 0d8000000000000000; + mov.f64 %fd210, 0d7FF0000000000000; + sub.f64 %fd211, %fd210, %fd29; + selp.f64 %fd333, 0d0000000000000000, %fd211, %p76; + setp.geu.f32 %p77, %f44, 0f40874800; + @%p77 bra $L__BB4_47; + + shr.u32 %r162, %r40, 31; + add.s32 %r163, %r40, %r162; + shr.s32 %r164, %r163, 1; + shl.b32 %r165, %r164, 20; + add.s32 %r166, %r42, %r165; + mov.b64 %fd212, {%r41, %r166}; + sub.s32 %r167, %r40, %r164; + shl.b32 %r168, %r167, 20; + add.s32 %r169, %r168, 1072693248; + mov.u32 %r170, 0; + mov.b64 %fd213, {%r170, %r169}; + mul.f64 %fd333, %fd212, %fd213; + +$L__BB4_47: + ld.global.f32 %f368, [%rd55]; + cvt.f64.f32 %fd214, %f368; + mul.f64 %fd215, %fd329, %fd333; + cvt.f64.f32 %fd216, %f1725; + fma.rn.f64 %fd217, %fd215, %fd214, %fd216; + cvt.rn.f32.f64 %f1725, %fd217; + cvt.f64.f32 %fd218, %f1724; + add.f64 %fd219, %fd215, %fd218; + cvt.rn.f32.f64 %f1724, %fd219; + add.s32 %r519, %r519, -1; + add.s32 %r518, %r518, 1; + add.s64 %rd55, %rd55, 4; + add.s32 %r520, %r520, 1; + setp.lt.s32 %p78, %r520, %r85; + @%p78 bra $L__BB4_16; + + add.s32 %r517, %r517, 1; + setp.lt.s32 %p79, %r517, %r85; + @%p79 bra $L__BB4_15; + + div.rn.f32 %f369, %f1725, %f1724; + max.f32 %f1721, %f1721, %f369; + min.f32 %f1774, %f1774, %f369; + add.s32 %r516, %r516, 1; + setp.lt.s32 %p80, %r516, %r85; + @%p80 bra $L__BB4_14; + + add.s32 %r515, %r515, 1; + setp.lt.s32 %p81, %r515, %r85; + @%p81 bra $L__BB4_13; + +$L__BB4_51: + ld.param.u32 %r508, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_6]; + mov.f32 %f1699, 0f00000000; + sub.f32 %f371, %f1721, %f1774; + add.f32 %f372, %f371, %f371; + fma.rn.f32 %f373, %f371, 0f40000000, %f372; + mul.f32 %f374, %f373, 0f40490FD8; + mul.f32 %f375, %f374, %f325; + mul.f32 %f376, %f375, %f325; + max.f32 %f1775, %f1699, %f376; + setp.lt.s32 %p82, %r508, 1; + @%p82 bra $L__BB4_205; + + ld.param.u32 %r509, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_5]; + cvt.rn.f32.s32 %f52, %r509; + cvt.f64.f32 %fd220, %f325; + add.f64 %fd34, %fd220, 0d4008000000000000; + mov.u32 %r521, 0; + cvta.to.global.u64 %rd26, %rd6; + mov.f64 %fd222, 0d4008000000000000; + abs.f64 %fd223, %fd220; + setp.gtu.f64 %p101, %fd223, 0d7FF0000000000000; + setp.gt.f64 %p102, %fd223, 0d3FF0000000000000; + setp.eq.f32 %p103, %f325, 0fBF800000; + cvta.to.global.u64 %rd31, %rd7; + +$L__BB4_53: + mov.f32 %f1741, 0f00000000; + mov.f32 %f1742, %f1741; + mov.f32 %f1743, %f1741; + mov.f32 %f1744, %f1741; + mov.f32 %f1745, %f1741; + mov.f32 %f1746, %f1741; + mov.f32 %f1747, %f1741; + mov.f32 %f1748, %f1741; + @%p20 bra $L__BB4_204; + + mov.f32 %f1741, 0f00000000; + div.rn.f32 %f395, %f1775, 0fC0206C98; + div.rn.f32 %f58, %f395, %f325; + cvt.f64.f32 %fd35, %f395; + shl.b32 %r177, %r1, 1; + mul.wide.s32 %rd27, %r177, 4; + add.s64 %rd28, %rd26, %rd27; + ld.global.f32 %f59, [%rd28+4]; + ld.global.f32 %f60, [%rd28]; + mov.u32 %r522, 0; + +$L__BB4_55: + mov.u32 %r523, 0; + mov.f32 %f1646, 0f00000000; + cvt.rn.f32.s32 %f69, %r522; + sub.f32 %f396, %f69, %f1777; + add.f32 %f397, %f396, 0f3F000000; + sqrt.rn.f32 %f70, %f34; + mul.f32 %f398, %f70, %f397; + abs.f32 %f71, %f398; + setp.ge.f32 %p84, %f71, 0f3F8060FE; + mul.f32 %f399, %f398, %f398; + selp.f32 %f400, %f71, %f399, %p84; + selp.f32 %f401, 0f3789CA3C, 0f38B1E96A, %p84; + selp.f32 %f402, 0fB9F560B9, 0fBA574D20, %p84; + fma.rn.f32 %f403, %f401, %f400, %f402; + selp.f32 %f404, 0f3BAC840B, 0f3BAAD5EA, %p84; + fma.rn.f32 %f405, %f403, %f400, %f404; + selp.f32 %f406, 0fBD0C8162, 0fBCDC1BE7, %p84; + fma.rn.f32 %f407, %f405, %f400, %f406; + selp.f32 %f408, 0f3E1CF906, 0f3DE718AF, %p84; + fma.rn.f32 %f409, %f407, %f400, %f408; + selp.f32 %f410, 0f3F6A937E, 0fBEC093AC, %p84; + fma.rn.f32 %f411, %f409, %f400, %f410; + selp.f32 %f412, 0f3F20D842, 0f3E0375D3, %p84; + fma.rn.f32 %f413, %f411, %f400, %f412; + neg.f32 %f414, %f71; + selp.f32 %f415, %f414, %f398, %p84; + fma.rn.f32 %f72, %f413, %f415, %f415; + mov.b32 %r179, %f398; + and.b32 %r51, %r179, -2147483648; + add.f32 %f73, %f396, 0fBF000000; + mul.f32 %f416, %f70, %f73; + abs.f32 %f74, %f416; + setp.ge.f32 %p85, %f74, 0f3F8060FE; + mul.f32 %f417, %f416, %f416; + selp.f32 %f418, %f74, %f417, %p85; + selp.f32 %f419, 0f3789CA3C, 0f38B1E96A, %p85; + selp.f32 %f420, 0fB9F560B9, 0fBA574D20, %p85; + fma.rn.f32 %f421, %f419, %f418, %f420; + selp.f32 %f422, 0f3BAC840B, 0f3BAAD5EA, %p85; + fma.rn.f32 %f423, %f421, %f418, %f422; + selp.f32 %f424, 0fBD0C8162, 0fBCDC1BE7, %p85; + fma.rn.f32 %f425, %f423, %f418, %f424; + selp.f32 %f426, 0f3E1CF906, 0f3DE718AF, %p85; + fma.rn.f32 %f427, %f425, %f418, %f426; + selp.f32 %f428, 0f3F6A937E, 0fBEC093AC, %p85; + fma.rn.f32 %f429, %f427, %f418, %f428; + selp.f32 %f430, 0f3F20D842, 0f3E0375D3, %p85; + fma.rn.f32 %f431, %f429, %f418, %f430; + neg.f32 %f432, %f74; + selp.f32 %f433, %f432, %f416, %p85; + fma.rn.f32 %f75, %f431, %f433, %f433; + mov.b32 %r180, %f416; + and.b32 %r52, %r180, -2147483648; + add.f32 %f434, %f69, 0f3F000000; + sub.f32 %f76, %f434, %f1777; + div.rn.f32 %f77, %f76, %f325; + mov.f32 %f435, 0f3F800000; + cvt.rzi.f32.f32 %f436, %f435; + add.f32 %f437, %f436, %f436; + mov.f32 %f438, 0f40000000; + sub.f32 %f439, %f438, %f437; + abs.f32 %f78, %f439; + setp.eq.f32 %p86, %f78, 0f3F800000; + abs.f32 %f79, %f77; + setp.lt.f32 %p87, %f79, 0f00800000; + mul.f32 %f440, %f79, 0f4B800000; + selp.f32 %f441, %f440, %f79, %p87; + selp.f32 %f442, 0fC3170000, 0fC2FE0000, %p87; + mov.b32 %r181, %f441; + and.b32 %r182, %r181, 8388607; + or.b32 %r183, %r182, 1065353216; + mov.b32 %f443, %r183; + shr.u32 %r184, %r181, 23; + cvt.rn.f32.u32 %f444, %r184; + add.f32 %f445, %f442, %f444; + setp.gt.f32 %p88, %f443, 0f3FB504F3; + mul.f32 %f446, %f443, 0f3F000000; + add.f32 %f447, %f445, 0f3F800000; + selp.f32 %f448, %f447, %f445, %p88; + selp.f32 %f449, %f446, %f443, %p88; + add.f32 %f450, %f449, 0fBF800000; + add.f32 %f451, %f449, 0f3F800000; + rcp.approx.ftz.f32 %f452, %f451; + add.f32 %f453, %f450, %f450; + mul.f32 %f454, %f453, %f452; + mul.f32 %f455, %f454, %f454; + mov.f32 %f456, 0f3C4CAF63; + mov.f32 %f457, 0f3B18F0FE; + fma.rn.f32 %f458, %f457, %f455, %f456; + mov.f32 %f459, 0f3DAAAABD; + fma.rn.f32 %f460, %f458, %f455, %f459; + mul.rn.f32 %f461, %f460, %f455; + mul.rn.f32 %f462, %f461, %f454; + sub.f32 %f463, %f450, %f454; + add.f32 %f464, %f463, %f463; + neg.f32 %f465, %f454; + fma.rn.f32 %f466, %f465, %f450, %f464; + mul.rn.f32 %f467, %f452, %f466; + add.f32 %f468, %f462, %f454; + sub.f32 %f469, %f454, %f468; + add.f32 %f470, %f462, %f469; + add.f32 %f471, %f467, %f470; + add.f32 %f472, %f468, %f471; + sub.f32 %f473, %f468, %f472; + add.f32 %f474, %f471, %f473; + mov.f32 %f475, 0f3F317200; + mul.rn.f32 %f476, %f448, %f475; + mov.f32 %f477, 0f35BFBE8E; + mul.rn.f32 %f478, %f448, %f477; + add.f32 %f479, %f476, %f472; + sub.f32 %f480, %f476, %f479; + add.f32 %f481, %f472, %f480; + add.f32 %f482, %f474, %f481; + add.f32 %f483, %f478, %f482; + add.f32 %f484, %f479, %f483; + sub.f32 %f485, %f479, %f484; + add.f32 %f486, %f483, %f485; + mul.rn.f32 %f487, %f438, %f484; + neg.f32 %f488, %f487; + fma.rn.f32 %f489, %f438, %f484, %f488; + fma.rn.f32 %f490, %f438, %f486, %f489; + fma.rn.f32 %f492, %f1646, %f484, %f490; + add.rn.f32 %f493, %f487, %f492; + neg.f32 %f494, %f493; + add.rn.f32 %f495, %f487, %f494; + add.rn.f32 %f496, %f495, %f492; + mov.b32 %r185, %f493; + setp.eq.s32 %p89, %r185, 1118925336; + add.s32 %r186, %r185, -1; + mov.b32 %f497, %r186; + add.f32 %f498, %f496, 0f37000000; + selp.f32 %f80, %f498, %f496, %p89; + selp.f32 %f499, %f497, %f493, %p89; + mov.f32 %f500, 0f3FB8AA3B; + mul.rn.f32 %f501, %f499, %f500; + cvt.rzi.f32.f32 %f502, %f501; + abs.f32 %f503, %f502; + setp.gt.f32 %p90, %f503, 0f42FC0000; + mov.b32 %r187, %f502; + and.b32 %r188, %r187, -2147483648; + or.b32 %r189, %r188, 1123811328; + mov.b32 %f504, %r189; + selp.f32 %f505, %f504, %f502, %p90; + mov.f32 %f506, 0fBF317218; + fma.rn.f32 %f507, %f505, %f506, %f499; + mov.f32 %f508, 0f3102E308; + fma.rn.f32 %f509, %f505, %f508, %f507; + mul.f32 %f510, %f509, 0f3FB8AA3B; + add.f32 %f511, %f505, 0f4B40007F; + mov.b32 %r190, %f511; + shl.b32 %r191, %r190, 23; + mov.b32 %f512, %r191; + ex2.approx.ftz.f32 %f513, %f510; + mul.f32 %f81, %f513, %f512; + setp.lt.f32 %p91, %f77, 0f00000000; + and.pred %p4, %p91, %p86; + add.f32 %f514, %f77, %f77; + selp.f32 %f82, %f514, 0f00000000, %p86; + div.rn.f32 %f83, %f73, %f325; + abs.f32 %f84, %f83; + setp.lt.f32 %p92, %f84, 0f00800000; + mul.f32 %f516, %f84, 0f4B800000; + selp.f32 %f517, %f516, %f84, %p92; + selp.f32 %f518, 0fC3170000, 0fC2FE0000, %p92; + mov.b32 %r192, %f517; + and.b32 %r193, %r192, 8388607; + or.b32 %r194, %r193, 1065353216; + mov.b32 %f519, %r194; + shr.u32 %r195, %r192, 23; + cvt.rn.f32.u32 %f520, %r195; + add.f32 %f521, %f518, %f520; + setp.gt.f32 %p93, %f519, 0f3FB504F3; + mul.f32 %f522, %f519, 0f3F000000; + add.f32 %f523, %f521, 0f3F800000; + selp.f32 %f524, %f523, %f521, %p93; + selp.f32 %f525, %f522, %f519, %p93; + add.f32 %f526, %f525, 0fBF800000; + add.f32 %f527, %f525, 0f3F800000; + rcp.approx.ftz.f32 %f528, %f527; + add.f32 %f529, %f526, %f526; + mul.f32 %f530, %f529, %f528; + mul.f32 %f531, %f530, %f530; + fma.rn.f32 %f532, %f457, %f531, %f456; + fma.rn.f32 %f533, %f532, %f531, %f459; + mul.rn.f32 %f534, %f533, %f531; + mul.rn.f32 %f535, %f534, %f530; + sub.f32 %f536, %f526, %f530; + add.f32 %f537, %f536, %f536; + neg.f32 %f538, %f530; + fma.rn.f32 %f539, %f538, %f526, %f537; + mul.rn.f32 %f540, %f528, %f539; + add.f32 %f541, %f535, %f530; + sub.f32 %f542, %f530, %f541; + add.f32 %f543, %f535, %f542; + add.f32 %f544, %f540, %f543; + add.f32 %f545, %f541, %f544; + sub.f32 %f546, %f541, %f545; + add.f32 %f547, %f544, %f546; + mul.rn.f32 %f548, %f524, %f475; + mul.rn.f32 %f549, %f524, %f477; + add.f32 %f550, %f548, %f545; + sub.f32 %f551, %f548, %f550; + add.f32 %f552, %f545, %f551; + add.f32 %f553, %f547, %f552; + add.f32 %f554, %f549, %f553; + add.f32 %f555, %f550, %f554; + sub.f32 %f556, %f550, %f555; + add.f32 %f557, %f554, %f556; + mul.rn.f32 %f558, %f438, %f555; + neg.f32 %f559, %f558; + fma.rn.f32 %f560, %f438, %f555, %f559; + fma.rn.f32 %f561, %f438, %f557, %f560; + fma.rn.f32 %f562, %f1646, %f555, %f561; + add.rn.f32 %f563, %f558, %f562; + neg.f32 %f564, %f563; + add.rn.f32 %f565, %f558, %f564; + add.rn.f32 %f566, %f565, %f562; + mov.b32 %r196, %f563; + setp.eq.s32 %p94, %r196, 1118925336; + add.s32 %r197, %r196, -1; + mov.b32 %f567, %r197; + add.f32 %f568, %f566, 0f37000000; + selp.f32 %f85, %f568, %f566, %p94; + selp.f32 %f569, %f567, %f563, %p94; + mul.rn.f32 %f570, %f569, %f500; + cvt.rzi.f32.f32 %f571, %f570; + abs.f32 %f572, %f571; + setp.gt.f32 %p95, %f572, 0f42FC0000; + mov.b32 %r198, %f571; + and.b32 %r199, %r198, -2147483648; + or.b32 %r200, %r199, 1123811328; + mov.b32 %f573, %r200; + selp.f32 %f574, %f573, %f571, %p95; + fma.rn.f32 %f575, %f574, %f506, %f569; + fma.rn.f32 %f576, %f574, %f508, %f575; + mul.f32 %f577, %f576, 0f3FB8AA3B; + add.f32 %f578, %f574, 0f4B40007F; + mov.b32 %r201, %f578; + shl.b32 %r202, %r201, 23; + mov.b32 %f579, %r202; + ex2.approx.ftz.f32 %f580, %f577; + mul.f32 %f86, %f580, %f579; + setp.lt.f32 %p96, %f83, 0f00000000; + and.pred %p5, %p96, %p86; + add.f32 %f581, %f83, %f83; + selp.f32 %f87, %f581, 0f00000000, %p86; + { + .reg .b32 %temp; + mov.b64 {%temp, %r55}, %fd220; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r203}, %fd222; + } + and.b32 %r204, %r203, 2146435072; + setp.eq.s32 %p97, %r204, 1073741824; + { // callseq 89, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd223; + .param .b64 param1; + st.param.f64 [param1+0], %fd222; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd36, [retval0+0]; + } // callseq 89 + setp.lt.s32 %p98, %r55, 0; + and.pred %p6, %p98, %p97; + selp.b32 %r205, %r55, 0, %p97; + setp.lt.s32 %p99, %r203, 0; + or.b32 %r206, %r205, 2146435072; + selp.b32 %r56, %r206, %r205, %p99; + and.b32 %r58, %r203, 2147483647; + setp.gt.s32 %p104, %r203, -1; + selp.b32 %r211, 2146435072, 0, %p104; + setp.ne.s32 %p105, %r58, 1071644672; + and.pred %p106, %p105, %p6; + or.b32 %r212, %r211, -2147483648; + selp.b32 %r60, %r212, %r211, %p106; + mov.f64 %fd224, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r61}, %fd224; + } + and.b32 %r63, %r61, 2147483647; + setp.gt.s32 %p107, %r61, -1; + selp.b32 %r64, 2146435072, 0, %p107; + or.b32 %r65, %r64, -2147483648; + +$L__BB4_56: + cvt.rn.f32.s32 %f1651, %r522; + sub.f32 %f1650, %f1651, %f1777; + add.f32 %f1649, %f1650, 0f3F000000; + mul.f32 %f1648, %f70, %f1649; + abs.f32 %f1647, %f1648; + setp.ltu.f32 %p108, %f1647, 0f3F8060FE; + mov.f32 %f1749, %f72; + @%p108 bra $L__BB4_58; + + mov.f32 %f1691, 0f3F800000; + ex2.approx.ftz.f32 %f583, %f72; + sub.f32 %f585, %f1691, %f583; + mov.b32 %r214, %f585; + or.b32 %r215, %r51, %r214; + mov.b32 %f1749, %r215; + +$L__BB4_58: + cvt.rn.f32.s32 %f1656, %r522; + sub.f32 %f1655, %f1656, %f1777; + add.f32 %f1654, %f1655, 0fBF000000; + mul.f32 %f1653, %f70, %f1654; + abs.f32 %f1652, %f1653; + setp.ltu.f32 %p109, %f1652, 0f3F8060FE; + mov.f32 %f1750, %f75; + @%p109 bra $L__BB4_60; + + mov.f32 %f1690, 0f3F800000; + ex2.approx.ftz.f32 %f586, %f75; + sub.f32 %f588, %f1690, %f586; + mov.b32 %r216, %f588; + or.b32 %r217, %r52, %r216; + mov.b32 %f1750, %r217; + +$L__BB4_60: + sub.f32 %f589, %f1749, %f1750; + mul.f32 %f101, %f589, 0f3F000000; + cvt.rn.f32.s32 %f102, %r523; + sub.f32 %f103, %f102, %f1776; + add.f32 %f590, %f103, 0f3F000000; + mul.f32 %f104, %f70, %f590; + abs.f32 %f591, %f104; + setp.ltu.f32 %p110, %f591, 0f3F8060FE; + setp.ge.f32 %p111, %f591, 0f3F8060FE; + mul.f32 %f592, %f104, %f104; + selp.f32 %f593, %f591, %f592, %p111; + selp.f32 %f594, 0f3789CA3C, 0f38B1E96A, %p111; + selp.f32 %f595, 0fB9F560B9, 0fBA574D20, %p111; + fma.rn.f32 %f596, %f594, %f593, %f595; + selp.f32 %f597, 0f3BAC840B, 0f3BAAD5EA, %p111; + fma.rn.f32 %f598, %f596, %f593, %f597; + selp.f32 %f599, 0fBD0C8162, 0fBCDC1BE7, %p111; + fma.rn.f32 %f600, %f598, %f593, %f599; + selp.f32 %f601, 0f3E1CF906, 0f3DE718AF, %p111; + fma.rn.f32 %f602, %f600, %f593, %f601; + selp.f32 %f603, 0f3F6A937E, 0fBEC093AC, %p111; + fma.rn.f32 %f604, %f602, %f593, %f603; + selp.f32 %f605, 0f3F20D842, 0f3E0375D3, %p111; + fma.rn.f32 %f606, %f604, %f593, %f605; + neg.f32 %f607, %f591; + selp.f32 %f608, %f607, %f104, %p111; + fma.rn.f32 %f1751, %f606, %f608, %f608; + @%p110 bra $L__BB4_62; + + mov.f32 %f1689, 0f3F800000; + ex2.approx.ftz.f32 %f609, %f1751; + sub.f32 %f611, %f1689, %f609; + mov.b32 %r218, %f611; + mov.b32 %r219, %f104; + and.b32 %r220, %r219, -2147483648; + or.b32 %r221, %r220, %r218; + mov.b32 %f1751, %r221; + +$L__BB4_62: + cvt.rn.f32.s32 %f1658, %r523; + sub.f32 %f1657, %f1658, %f1776; + add.f32 %f108, %f1657, 0fBF000000; + mul.f32 %f109, %f70, %f108; + abs.f32 %f612, %f109; + setp.ltu.f32 %p112, %f612, 0f3F8060FE; + setp.ge.f32 %p113, %f612, 0f3F8060FE; + mul.f32 %f613, %f109, %f109; + selp.f32 %f614, %f612, %f613, %p113; + selp.f32 %f615, 0f3789CA3C, 0f38B1E96A, %p113; + selp.f32 %f616, 0fB9F560B9, 0fBA574D20, %p113; + fma.rn.f32 %f617, %f615, %f614, %f616; + selp.f32 %f618, 0f3BAC840B, 0f3BAAD5EA, %p113; + fma.rn.f32 %f619, %f617, %f614, %f618; + selp.f32 %f620, 0fBD0C8162, 0fBCDC1BE7, %p113; + fma.rn.f32 %f621, %f619, %f614, %f620; + selp.f32 %f622, 0f3E1CF906, 0f3DE718AF, %p113; + fma.rn.f32 %f623, %f621, %f614, %f622; + selp.f32 %f624, 0f3F6A937E, 0fBEC093AC, %p113; + fma.rn.f32 %f625, %f623, %f614, %f624; + selp.f32 %f626, 0f3F20D842, 0f3E0375D3, %p113; + fma.rn.f32 %f627, %f625, %f614, %f626; + neg.f32 %f628, %f612; + selp.f32 %f629, %f628, %f109, %p113; + fma.rn.f32 %f1752, %f627, %f629, %f629; + @%p112 bra $L__BB4_64; + + mov.f32 %f1688, 0f3F800000; + ex2.approx.ftz.f32 %f630, %f1752; + sub.f32 %f632, %f1688, %f630; + mov.b32 %r222, %f632; + mov.b32 %r223, %f109; + and.b32 %r224, %r223, -2147483648; + or.b32 %r225, %r224, %r222; + mov.b32 %f1752, %r225; + +$L__BB4_64: + cvt.rn.f32.s32 %f1659, %r522; + sub.f32 %f634, %f1751, %f1752; + mul.f32 %f113, %f634, 0f3F000000; + mul.f32 %f635, %f101, %f1775; + fma.rn.f32 %f114, %f113, %f635, %f1774; + mad.lo.s32 %r226, %r523, %r85, %r522; + add.s32 %r227, %r226, %r2; + mul.wide.s32 %rd29, %r227, 4; + add.s64 %rd30, %rd1, %rd29; + ld.global.f32 %f115, [%rd30]; + add.f32 %f636, %f59, %f102; + fma.rn.f32 %f637, %f636, %f52, %f60; + add.f32 %f638, %f637, %f1659; + cvt.rzi.s32.f32 %r228, %f638; + mul.wide.s32 %rd32, %r228, 4; + add.s64 %rd33, %rd31, %rd32; + ld.global.f32 %f1773, [%rd33]; + setp.eq.f32 %p114, %f81, 0f7F800000; + mov.f32 %f1753, 0f7F800000; + @%p114 bra $L__BB4_66; + + fma.rn.f32 %f1753, %f81, %f80, %f81; + +$L__BB4_66: + setp.geu.f32 %p377, %f77, 0f00000000; + mov.b32 %r229, %f1753; + xor.b32 %r230, %r229, -2147483648; + mov.b32 %f639, %r230; + selp.f32 %f119, %f639, %f1753, %p4; + setp.eq.f32 %p115, %f77, 0f00000000; + selp.f32 %f1754, %f82, %f119, %p115; + @%p377 bra $L__BB4_69; + + mov.f32 %f1660, 0f40000000; + cvt.rzi.f32.f32 %f641, %f1660; + setp.eq.f32 %p116, %f641, 0f40000000; + mov.f32 %f1754, %f119; + @%p116 bra $L__BB4_69; + + mov.f32 %f1754, 0f7FFFFFFF; + +$L__BB4_69: + abs.f32 %f1663, %f77; + mov.f32 %f1662, 0f3FB8AA3B; + add.f32 %f1661, %f1663, 0f40000000; + mov.b32 %r487, %f1661; + add.f32 %f644, %f77, 0f40000000; + setp.gtu.f32 %p117, %f1663, 0f7F800000; + mov.f32 %f1755, 0f7F800000; + selp.f32 %f645, %f644, %f1754, %p117; + selp.f32 %f646, 0fFF800000, 0f7F800000, %p4; + setp.neu.f32 %p118, %f1663, 0f7F800000; + selp.f32 %f647, %f645, %f646, %p118; + setp.gt.s32 %p119, %r487, 2139095039; + selp.f32 %f648, %f647, %f1754, %p119; + mul.f32 %f649, %f648, 0fBF000000; + setp.eq.f32 %p120, %f77, 0f3F800000; + selp.f32 %f650, 0fBF000000, %f649, %p120; + mov.f32 %f652, 0f3BBB989D; + fma.rn.f32 %f653, %f650, %f652, %f358; + mov.f32 %f655, 0f437C0000; + cvt.sat.f32.f32 %f656, %f653; + mov.f32 %f657, 0f4B400001; + fma.rm.f32 %f658, %f656, %f655, %f657; + add.f32 %f659, %f658, 0fCB40007F; + neg.f32 %f660, %f659; + fma.rn.f32 %f661, %f650, %f1662, %f660; + mov.f32 %f662, 0f32A57060; + fma.rn.f32 %f663, %f650, %f662, %f661; + mov.b32 %r231, %f658; + shl.b32 %r232, %r231, 23; + mov.b32 %f664, %r232; + ex2.approx.ftz.f32 %f665, %f663; + mul.f32 %f122, %f665, %f664; + setp.eq.f32 %p121, %f86, 0f7F800000; + @%p121 bra $L__BB4_71; + + fma.rn.f32 %f1755, %f86, %f85, %f86; + +$L__BB4_71: + setp.geu.f32 %p378, %f83, 0f00000000; + mov.b32 %r233, %f1755; + xor.b32 %r234, %r233, -2147483648; + mov.b32 %f666, %r234; + selp.f32 %f125, %f666, %f1755, %p5; + setp.eq.f32 %p122, %f83, 0f00000000; + selp.f32 %f1756, %f87, %f125, %p122; + @%p378 bra $L__BB4_74; + + mov.f32 %f1664, 0f40000000; + cvt.rzi.f32.f32 %f668, %f1664; + setp.eq.f32 %p123, %f668, 0f40000000; + mov.f32 %f1756, %f125; + @%p123 bra $L__BB4_74; + + mov.f32 %f1756, 0f7FFFFFFF; + +$L__BB4_74: + abs.f32 %f1672, %f83; + mov.f32 %f1671, 0f32A57060; + mov.f32 %f1670, 0f4B400001; + mov.f32 %f1669, 0f437C0000; + mov.f32 %f1668, 0f3BBB989D; + add.f32 %f1667, %f1672, 0f40000000; + mov.b32 %r488, %f1667; + selp.f32 %f1666, 0fFF800000, 0f7F800000, %p5; + mov.f32 %f1665, 0f3FB8AA3B; + add.f32 %f670, %f83, 0f40000000; + setp.gtu.f32 %p124, %f1672, 0f7F800000; + selp.f32 %f671, %f670, %f1756, %p124; + setp.neu.f32 %p125, %f1672, 0f7F800000; + selp.f32 %f672, %f671, %f1666, %p125; + setp.gt.s32 %p126, %r488, 2139095039; + selp.f32 %f673, %f672, %f1756, %p126; + mul.f32 %f674, %f673, 0fBF000000; + setp.eq.f32 %p127, %f83, 0f3F800000; + selp.f32 %f675, 0fBF000000, %f674, %p127; + fma.rn.f32 %f678, %f675, %f1668, %f358; + cvt.sat.f32.f32 %f681, %f678; + fma.rm.f32 %f683, %f681, %f1669, %f1670; + add.f32 %f684, %f683, 0fCB40007F; + neg.f32 %f685, %f684; + fma.rn.f32 %f686, %f675, %f1665, %f685; + fma.rn.f32 %f688, %f675, %f1671, %f686; + mov.b32 %r235, %f683; + shl.b32 %r236, %r235, 23; + mov.b32 %f689, %r236; + ex2.approx.ftz.f32 %f690, %f688; + mul.f32 %f128, %f690, %f689; + sub.f32 %f691, %f122, %f128; + mul.f32 %f692, %f58, %f691; + mul.f32 %f129, %f113, %f692; + not.pred %p128, %p6; + mov.f64 %fd335, %fd36; + @%p128 bra $L__BB4_76; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r237}, %fd36; + } + xor.b32 %r238, %r237, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r239, %temp}, %fd36; + } + mov.b64 %fd335, {%r239, %r238}; + +$L__BB4_76: + setp.eq.f32 %p129, %f325, 0f00000000; + @%p129 bra $L__BB4_80; + bra.uni $L__BB4_77; + +$L__BB4_80: + mov.u32 %r240, 0; + mov.b64 %fd335, {%r240, %r56}; + bra.uni $L__BB4_81; + +$L__BB4_77: + { + .reg .b32 %temp; + mov.b64 {%temp, %r489}, %fd220; + } + setp.gt.s32 %p130, %r489, -1; + @%p130 bra $L__BB4_81; + + cvt.rzi.f64.f64 %fd226, %fd222; + setp.eq.f64 %p131, %fd226, 0d4008000000000000; + @%p131 bra $L__BB4_81; + + mov.f64 %fd335, 0dFFF8000000000000; + +$L__BB4_81: + { + .reg .b32 %temp; + mov.b64 {%temp, %r491}, %fd34; + } + and.b32 %r490, %r491, 2146435072; + setp.ne.s32 %p380, %r490, 2146435072; + or.pred %p379, %p380, %p101; + selp.f64 %fd336, %fd335, %fd34, %p380; + @%p379 bra $L__BB4_86; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r493}, %fd222; + } + and.b32 %r492, %r493, 2147483647; + setp.eq.s32 %p133, %r492, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r241, %temp}, %fd222; + } + setp.eq.s32 %p134, %r241, 0; + and.pred %p135, %p133, %p134; + @%p135 bra $L__BB4_85; + bra.uni $L__BB4_83; + +$L__BB4_85: + { + .reg .b32 %temp; + mov.b64 {%temp, %r499}, %fd222; + } + setp.lt.s32 %p381, %r499, 0; + selp.b32 %r498, 2146435072, 0, %p102; + xor.b32 %r497, %r498, 2146435072; + selp.b32 %r496, %r497, %r498, %p381; + selp.b32 %r495, 1072693248, %r496, %p103; + mov.u32 %r245, 0; + mov.b64 %fd336, {%r245, %r495}; + bra.uni $L__BB4_86; + +$L__BB4_83: + { + .reg .b32 %temp; + mov.b64 {%temp, %r494}, %fd220; + } + and.b32 %r242, %r494, 2147483647; + setp.ne.s32 %p136, %r242, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r243, %temp}, %fd220; + } + setp.ne.s32 %p137, %r243, 0; + or.pred %p138, %p136, %p137; + mov.f64 %fd336, %fd335; + @%p138 bra $L__BB4_86; + + mov.u32 %r244, 0; + mov.b64 %fd336, {%r244, %r60}; + +$L__BB4_86: + cvt.rn.f32.s32 %f1687, %r522; + cvt.rn.f32.s32 %f1686, %r523; + mov.f32 %f1685, 0f3102E308; + mov.f32 %f1684, 0fBF317218; + mov.f32 %f1683, 0f35BFBE8E; + mov.f32 %f1682, 0f3F317200; + mov.f32 %f1681, 0f3DAAAABD; + mov.f32 %f1680, 0f3C4CAF63; + mov.f32 %f1679, 0f3B18F0FE; + add.f32 %f1678, %f1687, 0f3F000000; + sub.f32 %f1677, %f1678, %f1777; + sub.f32 %f1676, %f1687, %f1777; + add.f32 %f1675, %f1676, 0fBF000000; + mov.f32 %f1674, 0f3FB8AA3B; + mov.f32 %f1673, 0f40000000; + setp.eq.f32 %p139, %f325, 0f3F800000; + selp.f64 %fd230, 0d3FF0000000000000, %fd336, %p139; + div.rn.f64 %fd45, %fd35, %fd230; + mul.f32 %f694, %f1675, %f128; + mul.f32 %f695, %f1677, %f122; + sub.f32 %f696, %f695, %f694; + cvt.f64.f32 %fd231, %f696; + mul.f64 %fd232, %fd45, %fd231; + cvt.f64.f32 %fd233, %f113; + mul.f64 %fd234, %fd232, %fd233; + cvt.rn.f32.f64 %f130, %fd234; + add.f32 %f697, %f1686, 0f3F000000; + sub.f32 %f131, %f697, %f1776; + div.rn.f32 %f132, %f131, %f325; + abs.f32 %f133, %f132; + setp.lt.f32 %p140, %f133, 0f00800000; + mul.f32 %f698, %f133, 0f4B800000; + selp.f32 %f699, %f698, %f133, %p140; + selp.f32 %f700, 0fC3170000, 0fC2FE0000, %p140; + mov.b32 %r246, %f699; + and.b32 %r247, %r246, 8388607; + or.b32 %r248, %r247, 1065353216; + mov.b32 %f701, %r248; + shr.u32 %r249, %r246, 23; + cvt.rn.f32.u32 %f702, %r249; + add.f32 %f703, %f700, %f702; + setp.gt.f32 %p141, %f701, 0f3FB504F3; + mul.f32 %f704, %f701, 0f3F000000; + add.f32 %f705, %f703, 0f3F800000; + selp.f32 %f706, %f705, %f703, %p141; + selp.f32 %f707, %f704, %f701, %p141; + add.f32 %f708, %f707, 0fBF800000; + add.f32 %f709, %f707, 0f3F800000; + rcp.approx.ftz.f32 %f710, %f709; + add.f32 %f711, %f708, %f708; + mul.f32 %f713, %f711, %f710; + mul.f32 %f714, %f713, %f713; + fma.rn.f32 %f717, %f1679, %f714, %f1680; + fma.rn.f32 %f719, %f717, %f714, %f1681; + mul.rn.f32 %f720, %f719, %f714; + mul.rn.f32 %f721, %f720, %f713; + sub.f32 %f722, %f708, %f713; + add.f32 %f723, %f722, %f722; + neg.f32 %f724, %f713; + fma.rn.f32 %f725, %f724, %f708, %f723; + mul.rn.f32 %f726, %f710, %f725; + add.f32 %f727, %f721, %f713; + sub.f32 %f728, %f713, %f727; + add.f32 %f729, %f721, %f728; + add.f32 %f730, %f726, %f729; + add.f32 %f731, %f727, %f730; + sub.f32 %f732, %f727, %f731; + add.f32 %f733, %f730, %f732; + mul.rn.f32 %f735, %f706, %f1682; + mul.rn.f32 %f737, %f706, %f1683; + add.f32 %f738, %f735, %f731; + sub.f32 %f739, %f735, %f738; + add.f32 %f740, %f731, %f739; + add.f32 %f741, %f733, %f740; + add.f32 %f742, %f737, %f741; + add.f32 %f743, %f738, %f742; + sub.f32 %f744, %f738, %f743; + add.f32 %f745, %f742, %f744; + mul.rn.f32 %f746, %f1673, %f743; + neg.f32 %f747, %f746; + fma.rn.f32 %f748, %f1673, %f743, %f747; + fma.rn.f32 %f749, %f1673, %f745, %f748; + mov.f32 %f750, 0f00000000; + fma.rn.f32 %f751, %f750, %f743, %f749; + add.rn.f32 %f752, %f746, %f751; + neg.f32 %f753, %f752; + add.rn.f32 %f754, %f746, %f753; + add.rn.f32 %f755, %f754, %f751; + mov.b32 %r250, %f752; + setp.eq.s32 %p142, %r250, 1118925336; + add.s32 %r251, %r250, -1; + mov.b32 %f756, %r251; + add.f32 %f757, %f755, 0f37000000; + selp.f32 %f134, %f757, %f755, %p142; + selp.f32 %f758, %f756, %f752, %p142; + mul.rn.f32 %f760, %f758, %f1674; + cvt.rzi.f32.f32 %f761, %f760; + abs.f32 %f762, %f761; + setp.gt.f32 %p143, %f762, 0f42FC0000; + mov.b32 %r252, %f761; + and.b32 %r253, %r252, -2147483648; + or.b32 %r254, %r253, 1123811328; + mov.b32 %f763, %r254; + selp.f32 %f764, %f763, %f761, %p143; + fma.rn.f32 %f766, %f764, %f1684, %f758; + fma.rn.f32 %f768, %f764, %f1685, %f766; + mul.f32 %f769, %f768, 0f3FB8AA3B; + add.f32 %f770, %f764, 0f4B40007F; + mov.b32 %r255, %f770; + shl.b32 %r256, %r255, 23; + mov.b32 %f771, %r256; + ex2.approx.ftz.f32 %f772, %f769; + mul.f32 %f135, %f772, %f771; + setp.eq.f32 %p144, %f135, 0f7F800000; + mov.f32 %f1757, 0f7F800000; + @%p144 bra $L__BB4_88; + + fma.rn.f32 %f1757, %f135, %f134, %f135; + +$L__BB4_88: + setp.lt.f32 %p145, %f132, 0f00000000; + and.pred %p10, %p145, %p86; + setp.eq.f32 %p147, %f132, 0f00000000; + @%p147 bra $L__BB4_92; + bra.uni $L__BB4_89; + +$L__BB4_92: + add.f32 %f777, %f132, %f132; + selp.f32 %f1759, %f777, 0f00000000, %p86; + bra.uni $L__BB4_93; + +$L__BB4_89: + mov.b32 %r257, %f1757; + xor.b32 %r258, %r257, -2147483648; + mov.b32 %f773, %r258; + selp.f32 %f1759, %f773, %f1757, %p10; + setp.geu.f32 %p148, %f132, 0f00000000; + @%p148 bra $L__BB4_93; + + mov.f32 %f1695, 0f40000000; + cvt.rzi.f32.f32 %f775, %f1695; + setp.eq.f32 %p149, %f775, 0f40000000; + @%p149 bra $L__BB4_93; + + mov.f32 %f1759, 0f7FFFFFFF; + +$L__BB4_93: + abs.f32 %f1615, %f132; + add.f32 %f778, %f1615, 0f40000000; + mov.b32 %r259, %f778; + setp.lt.s32 %p151, %r259, 2139095040; + @%p151 bra $L__BB4_98; + + abs.f32 %f1693, %f132; + setp.gtu.f32 %p152, %f1693, 0f7F800000; + @%p152 bra $L__BB4_97; + bra.uni $L__BB4_95; + +$L__BB4_97: + add.f32 %f1759, %f132, 0f40000000; + bra.uni $L__BB4_98; + +$L__BB4_95: + abs.f32 %f1694, %f132; + setp.neu.f32 %p153, %f1694, 0f7F800000; + @%p153 bra $L__BB4_98; + + selp.f32 %f1759, 0fFF800000, 0f7F800000, %p10; + +$L__BB4_98: + mov.f32 %f1632, 0f00000000; + cvt.rn.f32.s32 %f1631, %r523; + sub.f32 %f1630, %f1631, %f1776; + add.f32 %f1629, %f1630, 0fBF000000; + mov.f32 %f1628, 0f3102E308; + mov.f32 %f1627, 0fBF317218; + mov.f32 %f1626, 0f35BFBE8E; + mov.f32 %f1625, 0f3F317200; + mov.f32 %f1624, 0f3DAAAABD; + mov.f32 %f1623, 0f3C4CAF63; + mov.f32 %f1622, 0f3B18F0FE; + mov.f32 %f1621, 0f32A57060; + mov.f32 %f1620, 0f4B400001; + mov.f32 %f1619, 0f437C0000; + mov.f32 %f1618, 0f3BBB989D; + mov.f32 %f1617, 0f3FB8AA3B; + mov.f32 %f1616, 0f40000000; + mul.f32 %f780, %f1759, 0fBF000000; + setp.eq.f32 %p154, %f132, 0f3F800000; + selp.f32 %f781, 0fBF000000, %f780, %p154; + fma.rn.f32 %f784, %f781, %f1618, %f358; + cvt.sat.f32.f32 %f787, %f784; + fma.rm.f32 %f789, %f787, %f1619, %f1620; + add.f32 %f790, %f789, 0fCB40007F; + neg.f32 %f791, %f790; + fma.rn.f32 %f792, %f781, %f1617, %f791; + fma.rn.f32 %f794, %f781, %f1621, %f792; + mov.b32 %r260, %f789; + shl.b32 %r261, %r260, 23; + mov.b32 %f795, %r261; + ex2.approx.ftz.f32 %f796, %f794; + mul.f32 %f144, %f796, %f795; + div.rn.f32 %f145, %f1629, %f325; + abs.f32 %f146, %f145; + setp.lt.f32 %p155, %f146, 0f00800000; + mul.f32 %f797, %f146, 0f4B800000; + selp.f32 %f798, %f797, %f146, %p155; + selp.f32 %f799, 0fC3170000, 0fC2FE0000, %p155; + mov.b32 %r262, %f798; + and.b32 %r263, %r262, 8388607; + or.b32 %r264, %r263, 1065353216; + mov.b32 %f800, %r264; + shr.u32 %r265, %r262, 23; + cvt.rn.f32.u32 %f801, %r265; + add.f32 %f802, %f799, %f801; + setp.gt.f32 %p156, %f800, 0f3FB504F3; + mul.f32 %f803, %f800, 0f3F000000; + add.f32 %f804, %f802, 0f3F800000; + selp.f32 %f805, %f804, %f802, %p156; + selp.f32 %f806, %f803, %f800, %p156; + add.f32 %f807, %f806, 0fBF800000; + add.f32 %f808, %f806, 0f3F800000; + rcp.approx.ftz.f32 %f809, %f808; + add.f32 %f810, %f807, %f807; + mul.f32 %f812, %f810, %f809; + mul.f32 %f813, %f812, %f812; + fma.rn.f32 %f816, %f1622, %f813, %f1623; + fma.rn.f32 %f818, %f816, %f813, %f1624; + mul.rn.f32 %f819, %f818, %f813; + mul.rn.f32 %f820, %f819, %f812; + sub.f32 %f821, %f807, %f812; + add.f32 %f822, %f821, %f821; + neg.f32 %f823, %f812; + fma.rn.f32 %f824, %f823, %f807, %f822; + mul.rn.f32 %f825, %f809, %f824; + add.f32 %f826, %f820, %f812; + sub.f32 %f827, %f812, %f826; + add.f32 %f828, %f820, %f827; + add.f32 %f829, %f825, %f828; + add.f32 %f830, %f826, %f829; + sub.f32 %f831, %f826, %f830; + add.f32 %f832, %f829, %f831; + mul.rn.f32 %f834, %f805, %f1625; + mul.rn.f32 %f836, %f805, %f1626; + add.f32 %f837, %f834, %f830; + sub.f32 %f838, %f834, %f837; + add.f32 %f839, %f830, %f838; + add.f32 %f840, %f832, %f839; add.f32 %f841, %f836, %f840; - add.f32 %f842, %f838, %f841; - add.f32 %f843, %f123, %f842; - add.f32 %f844, %f839, %f843; - sub.f32 %f845, %f839, %f844; - add.f32 %f846, %f843, %f845; - mul.rn.f32 %f848, %f1948, %f844; - neg.f32 %f849, %f848; - fma.rn.f32 %f850, %f1948, %f844, %f849; - fma.rn.f32 %f851, %f1948, %f846, %f850; - mov.f32 %f852, 0f00000000; - fma.rn.f32 %f853, %f852, %f844, %f851; - add.rn.f32 %f854, %f848, %f853; - neg.f32 %f855, %f854; - add.rn.f32 %f856, %f848, %f855; - add.rn.f32 %f857, %f856, %f853; - mov.b32 %r138, %f854; - setp.eq.s32 %p69, %r138, 1118925336; - add.s32 %r139, %r138, -1; - mov.b32 %f858, %r139; - add.f32 %f859, %f857, 0f37000000; - selp.f32 %f860, %f858, %f854, %p69; - selp.f32 %f162, %f859, %f857, %p69; - mul.f32 %f861, %f860, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f862, %f861; - mov.f32 %f863, 0fBF317200; - fma.rn.f32 %f864, %f862, %f863, %f860; - mov.f32 %f865, 0fB5BFBE8E; - fma.rn.f32 %f866, %f862, %f865, %f864; + add.f32 %f842, %f837, %f841; + sub.f32 %f843, %f837, %f842; + add.f32 %f844, %f841, %f843; + mul.rn.f32 %f845, %f1616, %f842; + neg.f32 %f846, %f845; + fma.rn.f32 %f847, %f1616, %f842, %f846; + fma.rn.f32 %f848, %f1616, %f844, %f847; + fma.rn.f32 %f850, %f1632, %f842, %f848; + add.rn.f32 %f851, %f845, %f850; + neg.f32 %f852, %f851; + add.rn.f32 %f853, %f845, %f852; + add.rn.f32 %f854, %f853, %f850; + mov.b32 %r266, %f851; + setp.eq.s32 %p157, %r266, 1118925336; + add.s32 %r267, %r266, -1; + mov.b32 %f855, %r267; + add.f32 %f856, %f854, 0f37000000; + selp.f32 %f147, %f856, %f854, %p157; + selp.f32 %f857, %f855, %f851, %p157; + mul.rn.f32 %f858, %f857, %f1617; + cvt.rzi.f32.f32 %f859, %f858; + abs.f32 %f860, %f859; + setp.gt.f32 %p158, %f860, 0f42FC0000; + mov.b32 %r268, %f859; + and.b32 %r269, %r268, -2147483648; + or.b32 %r270, %r269, 1123811328; + mov.b32 %f861, %r270; + selp.f32 %f862, %f861, %f859, %p158; + fma.rn.f32 %f864, %f862, %f1627, %f857; + fma.rn.f32 %f866, %f862, %f1628, %f864; mul.f32 %f867, %f866, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f868, %f867; - add.f32 %f869, %f862, 0f00000000; - ex2.approx.f32 %f870, %f869; - mul.f32 %f871, %f868, %f870; - setp.lt.f32 %p70, %f860, 0fC2D20000; - selp.f32 %f872, 0f00000000, %f871, %p70; - setp.gt.f32 %p71, %f860, 0f42D20000; - selp.f32 %f2128, 0f7F800000, %f872, %p71; - setp.eq.f32 %p72, %f2128, 0f7F800000; - @%p72 bra BB4_53; - - fma.rn.f32 %f2128, %f2128, %f162, %f2128; - -BB4_53: - setp.geu.f32 %p225, %f116, 0f00000000; - mov.b32 %r140, %f2128; - xor.b32 %r141, %r140, -2147483648; - mov.b32 %f873, %r141; - selp.f32 %f166, %f873, %f2128, %p1; - add.f32 %f874, %f116, %f116; - selp.f32 %f875, %f874, 0f00000000, %p54; - setp.eq.f32 %p74, %f116, 0f00000000; - selp.f32 %f2129, %f875, %f166, %p74; - @%p225 bra BB4_55; - - mov.f32 %f1949, 0f40000000; - cvt.rzi.f32.f32 %f877, %f1949; - setp.neu.f32 %p75, %f877, 0f40000000; - selp.f32 %f2129, 0f7FFFFFFF, %f166, %p75; - -BB4_55: - mov.f32 %f1957, 0f00000000; - mov.f32 %f1956, 0f3DAAAABD; - mov.f32 %f1955, 0f3C4CAF63; - mov.f32 %f1954, 0f3B18F0FE; - mov.f32 %f1953, 0fB5BFBE8E; - mov.f32 %f1952, 0fBF317200; - abs.f32 %f1951, %f116; - mov.f32 %f1950, 0f40000000; - add.f32 %f880, %f1951, 0f40000000; - mov.b32 %r142, %f880; - setp.gt.s32 %p76, %r142, 2139095039; - add.f32 %f881, %f116, 0f40000000; - setp.gtu.f32 %p77, %f1951, 0f7F800000; - selp.f32 %f882, %f881, %f2129, %p77; - selp.f32 %f883, 0fFF800000, 0f7F800000, %p1; - setp.neu.f32 %p78, %f1951, 0f7F800000; - selp.f32 %f884, %f882, %f883, %p78; - selp.f32 %f885, %f884, %f2129, %p76; - mul.f32 %f886, %f885, 0fBF000000; - setp.eq.f32 %p79, %f116, 0f3F800000; - selp.f32 %f887, 0fBF000000, %f886, %p79; - mul.f32 %f888, %f887, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f889, %f888; - fma.rn.f32 %f891, %f889, %f1952, %f887; - fma.rn.f32 %f893, %f889, %f1953, %f891; - mul.f32 %f894, %f893, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f895, %f894; - add.f32 %f896, %f889, 0f00000000; - ex2.approx.f32 %f897, %f896; - mul.f32 %f898, %f895, %f897; - setp.lt.f32 %p80, %f887, 0fC2D20000; - selp.f32 %f899, 0f00000000, %f898, %p80; - setp.gt.f32 %p81, %f887, 0f42D20000; - selp.f32 %f170, 0f7F800000, %f899, %p81; - // inline asm - rcp.approx.ftz.f32 %f878,%f127; - // inline asm - mul.f32 %f900, %f878, %f128; - mul.f32 %f901, %f900, %f900; - fma.rn.f32 %f904, %f1954, %f901, %f1955; - fma.rn.f32 %f906, %f904, %f901, %f1956; - mul.rn.f32 %f907, %f906, %f901; - mul.rn.f32 %f908, %f907, %f900; - sub.f32 %f909, %f126, %f900; - neg.f32 %f910, %f900; - add.f32 %f911, %f909, %f909; - fma.rn.f32 %f912, %f910, %f126, %f911; - mul.rn.f32 %f913, %f878, %f912; - add.f32 %f914, %f908, %f900; - sub.f32 %f915, %f900, %f914; - add.f32 %f916, %f908, %f915; - add.f32 %f917, %f913, %f916; - add.f32 %f918, %f914, %f917; - sub.f32 %f919, %f914, %f918; - add.f32 %f920, %f917, %f919; - add.f32 %f921, %f129, %f918; - sub.f32 %f922, %f129, %f921; - add.f32 %f923, %f918, %f922; - add.f32 %f924, %f920, %f923; - add.f32 %f925, %f130, %f924; - add.f32 %f926, %f921, %f925; - sub.f32 %f927, %f921, %f926; - add.f32 %f928, %f925, %f927; - mul.rn.f32 %f930, %f1950, %f926; - neg.f32 %f931, %f930; - fma.rn.f32 %f932, %f1950, %f926, %f931; - fma.rn.f32 %f933, %f1950, %f928, %f932; - fma.rn.f32 %f935, %f1957, %f926, %f933; - add.rn.f32 %f936, %f930, %f935; - neg.f32 %f937, %f936; - add.rn.f32 %f938, %f930, %f937; - add.rn.f32 %f939, %f938, %f935; - mov.b32 %r143, %f936; - setp.eq.s32 %p82, %r143, 1118925336; - add.s32 %r144, %r143, -1; - mov.b32 %f940, %r144; - add.f32 %f941, %f939, 0f37000000; - selp.f32 %f942, %f940, %f936, %p82; - selp.f32 %f171, %f941, %f939, %p82; - mul.f32 %f943, %f942, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f944, %f943; - fma.rn.f32 %f945, %f944, %f1952, %f942; - fma.rn.f32 %f946, %f944, %f1953, %f945; - mul.f32 %f947, %f946, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f948, %f947; - add.f32 %f949, %f944, 0f00000000; - ex2.approx.f32 %f950, %f949; - mul.f32 %f951, %f948, %f950; - setp.lt.f32 %p83, %f942, 0fC2D20000; - selp.f32 %f952, 0f00000000, %f951, %p83; - setp.gt.f32 %p84, %f942, 0f42D20000; - selp.f32 %f2130, 0f7F800000, %f952, %p84; - setp.eq.f32 %p85, %f2130, 0f7F800000; - @%p85 bra BB4_57; - - fma.rn.f32 %f2130, %f2130, %f171, %f2130; - -BB4_57: - setp.geu.f32 %p226, %f124, 0f00000000; - mov.b32 %r145, %f2130; - xor.b32 %r146, %r145, -2147483648; - mov.b32 %f953, %r146; - selp.f32 %f175, %f953, %f2130, %p2; - add.f32 %f954, %f124, %f124; - selp.f32 %f955, %f954, 0f00000000, %p54; - setp.eq.f32 %p87, %f124, 0f00000000; - selp.f32 %f2131, %f955, %f175, %p87; - @%p226 bra BB4_59; - - mov.f32 %f1958, 0f40000000; - cvt.rzi.f32.f32 %f957, %f1958; - setp.neu.f32 %p88, %f957, 0f40000000; - selp.f32 %f2131, 0f7FFFFFFF, %f175, %p88; - -BB4_59: - cvt.rn.f32.s32 %f1973, %r263; - cvt.rn.f32.s32 %f1972, %r264; - mov.f32 %f1971, 0f35BFBE8E; - mov.f32 %f1970, 0f3F317200; - add.f32 %f1969, %f1973, 0f3F800000; - sub.f32 %f1968, %f1969, %f2152; - sub.f32 %f1967, %f1973, %f2152; - abs.f32 %f1966, %f124; - mov.f32 %f1965, 0f00000000; - mov.f32 %f1964, 0f3DAAAABD; - mov.f32 %f1963, 0f3C4CAF63; - mov.f32 %f1962, 0f3B18F0FE; - mov.f32 %f1961, 0fB5BFBE8E; - mov.f32 %f1960, 0fBF317200; - mov.f32 %f1959, 0f40000000; - add.f32 %f960, %f1966, 0f40000000; - mov.b32 %r147, %f960; - setp.gt.s32 %p89, %r147, 2139095039; - add.f32 %f961, %f124, 0f40000000; - setp.gtu.f32 %p90, %f1966, 0f7F800000; - selp.f32 %f962, %f961, %f2131, %p90; - selp.f32 %f963, 0fFF800000, 0f7F800000, %p2; - setp.neu.f32 %p91, %f1966, 0f7F800000; - selp.f32 %f964, %f962, %f963, %p91; - selp.f32 %f965, %f964, %f2131, %p89; - mul.f32 %f966, %f965, 0fBF000000; - setp.eq.f32 %p92, %f124, 0f3F800000; - selp.f32 %f967, 0fBF000000, %f966, %p92; - mul.f32 %f968, %f967, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f969, %f968; - fma.rn.f32 %f971, %f969, %f1960, %f967; - fma.rn.f32 %f973, %f969, %f1961, %f971; - mul.f32 %f974, %f973, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f975, %f974; - add.f32 %f976, %f969, 0f00000000; - ex2.approx.f32 %f977, %f976; - mul.f32 %f978, %f975, %f977; - setp.lt.f32 %p93, %f967, 0fC2D20000; - selp.f32 %f979, 0f00000000, %f978, %p93; - setp.gt.f32 %p94, %f967, 0f42D20000; - selp.f32 %f980, 0f7F800000, %f979, %p94; - sub.f32 %f981, %f170, %f980; - mul.f32 %f982, %f94, %f981; - mul.f32 %f179, %f158, %f982; - mul.f32 %f983, %f1967, %f980; - mul.f32 %f984, %f1968, %f170; - sub.f32 %f985, %f984, %f983; - mul.f32 %f986, %f985, %f95; - mul.f32 %f180, %f158, %f986; - add.f32 %f987, %f1972, 0f3F800000; - sub.f32 %f181, %f987, %f2151; - div.rn.f32 %f182, %f181, %f407; - abs.f32 %f183, %f182; - setp.lt.f32 %p95, %f183, 0f00800000; - mul.f32 %f988, %f183, 0f4B800000; - selp.f32 %f989, 0fC3170000, 0fC2FE0000, %p95; - selp.f32 %f990, %f988, %f183, %p95; - mov.b32 %r148, %f990; - and.b32 %r149, %r148, 8388607; - or.b32 %r150, %r149, 1065353216; - mov.b32 %f991, %r150; - shr.u32 %r151, %r148, 23; - cvt.rn.f32.u32 %f992, %r151; - add.f32 %f993, %f989, %f992; - setp.gt.f32 %p96, %f991, 0f3FB504F3; - mul.f32 %f994, %f991, 0f3F000000; - add.f32 %f995, %f993, 0f3F800000; - selp.f32 %f996, %f994, %f991, %p96; - selp.f32 %f997, %f995, %f993, %p96; - add.f32 %f998, %f996, 0fBF800000; - add.f32 %f959, %f996, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f958,%f959; - // inline asm - add.f32 %f999, %f998, %f998; - mul.f32 %f1000, %f958, %f999; - mul.f32 %f1001, %f1000, %f1000; - fma.rn.f32 %f1004, %f1962, %f1001, %f1963; - fma.rn.f32 %f1006, %f1004, %f1001, %f1964; - mul.rn.f32 %f1007, %f1006, %f1001; - mul.rn.f32 %f1008, %f1007, %f1000; - sub.f32 %f1009, %f998, %f1000; - neg.f32 %f1010, %f1000; - add.f32 %f1011, %f1009, %f1009; - fma.rn.f32 %f1012, %f1010, %f998, %f1011; - mul.rn.f32 %f1013, %f958, %f1012; - add.f32 %f1014, %f1008, %f1000; - sub.f32 %f1015, %f1000, %f1014; - add.f32 %f1016, %f1008, %f1015; - add.f32 %f1017, %f1013, %f1016; - add.f32 %f1018, %f1014, %f1017; - sub.f32 %f1019, %f1014, %f1018; - add.f32 %f1020, %f1017, %f1019; - mul.rn.f32 %f1022, %f997, %f1970; - mul.rn.f32 %f1024, %f997, %f1971; - add.f32 %f1025, %f1022, %f1018; - sub.f32 %f1026, %f1022, %f1025; - add.f32 %f1027, %f1018, %f1026; - add.f32 %f1028, %f1020, %f1027; - add.f32 %f1029, %f1024, %f1028; - add.f32 %f1030, %f1025, %f1029; - sub.f32 %f1031, %f1025, %f1030; - add.f32 %f1032, %f1029, %f1031; - mul.rn.f32 %f1034, %f1959, %f1030; - neg.f32 %f1035, %f1034; - fma.rn.f32 %f1036, %f1959, %f1030, %f1035; - fma.rn.f32 %f1037, %f1959, %f1032, %f1036; - fma.rn.f32 %f1039, %f1965, %f1030, %f1037; - add.rn.f32 %f1040, %f1034, %f1039; - neg.f32 %f1041, %f1040; - add.rn.f32 %f1042, %f1034, %f1041; - add.rn.f32 %f1043, %f1042, %f1039; - mov.b32 %r152, %f1040; - setp.eq.s32 %p97, %r152, 1118925336; - add.s32 %r153, %r152, -1; - mov.b32 %f1044, %r153; - add.f32 %f1045, %f1043, 0f37000000; - selp.f32 %f1046, %f1044, %f1040, %p97; - selp.f32 %f184, %f1045, %f1043, %p97; - mul.f32 %f1047, %f1046, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1048, %f1047; - fma.rn.f32 %f1049, %f1048, %f1960, %f1046; - fma.rn.f32 %f1050, %f1048, %f1961, %f1049; - mul.f32 %f1051, %f1050, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1052, %f1051; - add.f32 %f1053, %f1048, 0f00000000; - ex2.approx.f32 %f1054, %f1053; - mul.f32 %f1055, %f1052, %f1054; - setp.lt.f32 %p98, %f1046, 0fC2D20000; - selp.f32 %f1056, 0f00000000, %f1055, %p98; - setp.gt.f32 %p99, %f1046, 0f42D20000; - selp.f32 %f2132, 0f7F800000, %f1056, %p99; - setp.eq.f32 %p100, %f2132, 0f7F800000; - @%p100 bra BB4_61; - - fma.rn.f32 %f2132, %f2132, %f184, %f2132; - -BB4_61: - setp.lt.f32 %p101, %f182, 0f00000000; - and.pred %p5, %p101, %p54; - mov.b32 %r154, %f2132; - xor.b32 %r155, %r154, -2147483648; - mov.b32 %f1057, %r155; - selp.f32 %f2134, %f1057, %f2132, %p5; - setp.eq.f32 %p103, %f182, 0f00000000; - @%p103 bra BB4_64; - bra.uni BB4_62; - -BB4_64: - add.f32 %f1060, %f182, %f182; - selp.f32 %f2134, %f1060, 0f00000000, %p54; - bra.uni BB4_65; - -BB4_62: - setp.geu.f32 %p104, %f182, 0f00000000; - @%p104 bra BB4_65; - - mov.f32 %f1981, 0f40000000; - cvt.rzi.f32.f32 %f1059, %f1981; - setp.neu.f32 %p105, %f1059, 0f40000000; - selp.f32 %f2134, 0f7FFFFFFF, %f2134, %p105; - -BB4_65: - abs.f32 %f1921, %f182; - add.f32 %f1061, %f1921, 0f40000000; - mov.b32 %r156, %f1061; - setp.lt.s32 %p107, %r156, 2139095040; - @%p107 bra BB4_70; - - abs.f32 %f1979, %f182; - setp.gtu.f32 %p108, %f1979, 0f7F800000; - @%p108 bra BB4_69; - bra.uni BB4_67; - -BB4_69: - add.f32 %f2134, %f182, 0f40000000; - bra.uni BB4_70; - -BB4_67: - abs.f32 %f1980, %f182; - setp.neu.f32 %p109, %f1980, 0f7F800000; - @%p109 bra BB4_70; - - selp.f32 %f2134, 0fFF800000, 0f7F800000, %p5; - -BB4_70: - mov.f32 %f1932, 0f35BFBE8E; - mov.f32 %f1931, 0f3F317200; - mov.f32 %f1930, 0f00000000; - mov.f32 %f1929, 0f3DAAAABD; - mov.f32 %f1928, 0f3C4CAF63; - mov.f32 %f1927, 0f3B18F0FE; - mov.f32 %f1926, 0fB5BFBE8E; - mov.f32 %f1925, 0fBF317200; - mov.f32 %f1924, 0f40000000; - cvt.rn.f32.s32 %f1923, %r264; - sub.f32 %f1922, %f1923, %f2151; - mul.f32 %f1064, %f2134, 0fBF000000; - setp.eq.f32 %p110, %f182, 0f3F800000; - selp.f32 %f1065, 0fBF000000, %f1064, %p110; - mul.f32 %f1066, %f1065, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1067, %f1066; - fma.rn.f32 %f1069, %f1067, %f1925, %f1065; - fma.rn.f32 %f1071, %f1067, %f1926, %f1069; + add.f32 %f868, %f862, 0f4B40007F; + mov.b32 %r271, %f868; + shl.b32 %r272, %r271, 23; + mov.b32 %f869, %r272; + ex2.approx.ftz.f32 %f870, %f867; + mul.f32 %f148, %f870, %f869; + setp.eq.f32 %p159, %f148, 0f7F800000; + mov.f32 %f1760, 0f7F800000; + @%p159 bra $L__BB4_100; + + fma.rn.f32 %f1760, %f148, %f147, %f148; + +$L__BB4_100: + setp.lt.f32 %p160, %f145, 0f00000000; + and.pred %p11, %p160, %p86; + setp.eq.f32 %p162, %f145, 0f00000000; + @%p162 bra $L__BB4_104; + bra.uni $L__BB4_101; + +$L__BB4_104: + add.f32 %f875, %f145, %f145; + selp.f32 %f1762, %f875, 0f00000000, %p86; + bra.uni $L__BB4_105; + +$L__BB4_101: + mov.b32 %r273, %f1760; + xor.b32 %r274, %r273, -2147483648; + mov.b32 %f871, %r274; + selp.f32 %f1762, %f871, %f1760, %p11; + setp.geu.f32 %p163, %f145, 0f00000000; + @%p163 bra $L__BB4_105; + + mov.f32 %f1692, 0f40000000; + cvt.rzi.f32.f32 %f873, %f1692; + setp.eq.f32 %p164, %f873, 0f40000000; + @%p164 bra $L__BB4_105; + + mov.f32 %f1762, 0f7FFFFFFF; + +$L__BB4_105: + abs.f32 %f1696, %f145; + add.f32 %f876, %f1696, 0f40000000; + mov.b32 %r275, %f876; + setp.lt.s32 %p166, %r275, 2139095040; + @%p166 bra $L__BB4_110; + + abs.f32 %f1697, %f145; + setp.gtu.f32 %p167, %f1697, 0f7F800000; + @%p167 bra $L__BB4_109; + bra.uni $L__BB4_107; + +$L__BB4_109: + add.f32 %f1762, %f145, 0f40000000; + bra.uni $L__BB4_110; + +$L__BB4_107: + abs.f32 %f1698, %f145; + setp.neu.f32 %p168, %f1698, 0f7F800000; + @%p168 bra $L__BB4_110; + + selp.f32 %f1762, 0fFF800000, 0f7F800000, %p11; + +$L__BB4_110: + cvt.rn.f32.s32 %f1643, %r523; + add.f32 %f1642, %f1643, 0f3F000000; + sub.f32 %f1641, %f1642, %f1776; + mov.f32 %f1763, 0f00000000; + sub.f32 %f1639, %f1643, %f1776; + add.f32 %f1638, %f1639, 0fBF000000; + mov.f32 %f1637, 0f32A57060; + mov.f32 %f1636, 0f4B400001; + mov.f32 %f1635, 0f437C0000; + mov.f32 %f1634, 0f3BBB989D; + mov.f32 %f1633, 0f3FB8AA3B; + mul.f32 %f878, %f1762, 0fBF000000; + setp.eq.f32 %p169, %f145, 0f3F800000; + selp.f32 %f879, 0fBF000000, %f878, %p169; + fma.rn.f32 %f882, %f879, %f1634, %f358; + cvt.sat.f32.f32 %f885, %f882; + fma.rm.f32 %f887, %f885, %f1635, %f1636; + add.f32 %f888, %f887, 0fCB40007F; + neg.f32 %f889, %f888; + fma.rn.f32 %f890, %f879, %f1633, %f889; + fma.rn.f32 %f892, %f879, %f1637, %f890; + mov.b32 %r276, %f887; + shl.b32 %r277, %r276, 23; + mov.b32 %f893, %r277; + ex2.approx.ftz.f32 %f894, %f892; + mul.f32 %f895, %f894, %f893; + sub.f32 %f896, %f144, %f895; + mul.f32 %f897, %f58, %f896; + mul.f32 %f157, %f101, %f897; + mul.f32 %f898, %f1638, %f895; + mul.f32 %f899, %f1641, %f144; + sub.f32 %f900, %f899, %f898; + cvt.f64.f32 %fd235, %f900; + mul.f64 %fd236, %fd45, %fd235; + cvt.f64.f32 %fd237, %f101; + mul.f64 %fd238, %fd236, %fd237; + cvt.rn.f32.f64 %f158, %fd238; + mul.f32 %f159, %f101, %f113; + setp.leu.f32 %p170, %f114, 0f3C23D70A; + @%p170 bra $L__BB4_112; + + sub.f32 %f901, %f115, %f114; + add.f32 %f902, %f114, %f1773; + div.rn.f32 %f1763, %f901, %f902; + +$L__BB4_112: + mov.f32 %f1764, 0f00000000; + @%p170 bra $L__BB4_127; + + mov.f64 %fd309, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r507}, %fd309; + } + and.b32 %r506, %r507, 2146435072; + setp.eq.s32 %p172, %r506, 1062207488; + add.f32 %f162, %f114, %f1773; + cvt.f64.f32 %fd46, %f162; + { + .reg .b32 %temp; + mov.b64 {%temp, %r68}, %fd46; + } + abs.f64 %fd47, %fd46; + { // callseq 90, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd47; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd338, [retval0+0]; + } // callseq 90 + setp.lt.s32 %p173, %r68, 0; + and.pred %p12, %p173, %p172; + not.pred %p174, %p12; + @%p174 bra $L__BB4_115; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r278}, %fd338; + } + xor.b32 %r279, %r278, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r280, %temp}, %fd338; + } + mov.b64 %fd338, {%r280, %r279}; + +$L__BB4_115: + setp.eq.f32 %p175, %f162, 0f00000000; + @%p175 bra $L__BB4_119; + bra.uni $L__BB4_116; + +$L__BB4_119: + setp.lt.s32 %p178, %r61, 0; + mov.u32 %r281, 0; + selp.b32 %r282, %r68, 0, %p172; + or.b32 %r283, %r282, 2146435072; + selp.b32 %r284, %r283, %r282, %p178; + mov.b64 %fd338, {%r281, %r284}; + bra.uni $L__BB4_120; + +$L__BB4_116: + setp.gt.s32 %p176, %r68, -1; + @%p176 bra $L__BB4_120; + + cvt.rzi.f64.f64 %fd241, %fd224; + setp.eq.f64 %p177, %fd241, 0d4000000000000000; + @%p177 bra $L__BB4_120; + + mov.f64 %fd338, 0dFFF8000000000000; + +$L__BB4_120: + add.f64 %fd53, %fd46, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r285}, %fd53; + } + and.b32 %r286, %r285, 2146435072; + setp.ne.s32 %p180, %r286, 2146435072; + mov.f64 %fd339, %fd338; + @%p180 bra $L__BB4_126; + + setp.gtu.f64 %p181, %fd47, 0d7FF0000000000000; + mov.f64 %fd339, %fd53; + @%p181 bra $L__BB4_126; + + setp.eq.s32 %p182, %r63, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r287, %temp}, %fd224; + } + setp.eq.s32 %p183, %r287, 0; + and.pred %p184, %p182, %p183; + @%p184 bra $L__BB4_125; + bra.uni $L__BB4_123; + +$L__BB4_125: + setp.lt.s32 %p190, %r61, 0; + mov.u32 %r292, 0; + setp.gt.f64 %p191, %fd47, 0d3FF0000000000000; + selp.b32 %r293, 2146435072, 0, %p191; + xor.b32 %r294, %r293, 2146435072; + selp.b32 %r295, %r294, %r293, %p190; + setp.eq.f32 %p192, %f162, 0fBF800000; + selp.b32 %r296, 1072693248, %r295, %p192; + mov.b64 %fd339, {%r292, %r296}; + bra.uni $L__BB4_126; + +$L__BB4_123: + { + .reg .b32 %temp; + mov.b64 {%r288, %temp}, %fd46; + } + and.b32 %r289, %r68, 2147483647; + setp.ne.s32 %p185, %r289, 2146435072; + setp.ne.s32 %p186, %r288, 0; + or.pred %p187, %p185, %p186; + mov.f64 %fd339, %fd338; + @%p187 bra $L__BB4_126; + + setp.ne.s32 %p188, %r63, 1071644672; + and.pred %p189, %p188, %p12; + selp.b32 %r290, %r65, %r64, %p189; + mov.u32 %r291, 0; + mov.b64 %fd339, {%r291, %r290}; + +$L__BB4_126: + setp.eq.f32 %p193, %f162, 0f3F800000; + selp.f64 %fd244, 0d3FF0000000000000, %fd339, %p193; + add.f32 %f904, %f115, %f1773; + cvt.f64.f32 %fd245, %f904; + div.rn.f64 %fd246, %fd245, %fd244; + cvt.rn.f32.f64 %f1764, %fd246; + +$L__BB4_127: + mov.f64 %fd308, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r483}, %fd308; + } + and.b32 %r482, %r483, 2146435072; + mov.f32 %f905, 0f47C35000; + min.f32 %f906, %f1764, %f905; + cvt.f64.f32 %fd57, %f906; + min.f32 %f165, %f1763, %f905; + fma.rn.f32 %f1744, %f165, %f129, %f1744; + mul.f32 %f907, %f165, %f130; + cvt.f64.f32 %fd58, %f907; + cvt.f64.f32 %fd59, %f129; + { + .reg .b32 %temp; + mov.b64 {%temp, %r69}, %fd59; + } + abs.f64 %fd60, %fd59; + { // callseq 91, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd60; + .param .b64 param1; + st.param.f64 [param1+0], %fd308; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd340, [retval0+0]; + } // callseq 91 + setp.eq.s32 %p194, %r482, 1062207488; + @%p194 bra $L__BB4_162; + bra.uni $L__BB4_128; + +$L__BB4_162: + setp.gt.s32 %p240, %r69, -1; + @%p240 bra $L__BB4_164; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r333}, %fd340; + } + xor.b32 %r334, %r333, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r335, %temp}, %fd340; + } + mov.b64 %fd340, {%r335, %r334}; + +$L__BB4_164: + setp.eq.f32 %p241, %f129, 0f00000000; + @%p241 bra $L__BB4_168; + bra.uni $L__BB4_165; + +$L__BB4_168: + setp.lt.s32 %p244, %r61, 0; + mov.u32 %r336, 0; + or.b32 %r337, %r69, 2146435072; + selp.b32 %r338, %r337, %r69, %p244; + mov.b64 %fd340, {%r336, %r338}; + bra.uni $L__BB4_169; + +$L__BB4_128: + setp.eq.f32 %p195, %f129, 0f00000000; + @%p195 bra $L__BB4_132; + bra.uni $L__BB4_129; + +$L__BB4_132: + shr.s32 %r505, %r61, 31; + and.b32 %r504, %r505, 2146435072; + mov.u32 %r297, 0; + mov.b64 %fd340, {%r297, %r504}; + bra.uni $L__BB4_133; + +$L__BB4_165: + @%p240 bra $L__BB4_169; + + cvt.rzi.f64.f64 %fd279, %fd224; + setp.eq.f64 %p243, %fd279, 0d4000000000000000; + @%p243 bra $L__BB4_169; + + mov.f64 %fd340, 0dFFF8000000000000; + +$L__BB4_169: + add.f64 %fd94, %fd59, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r339}, %fd94; + } + and.b32 %r340, %r339, 2146435072; + setp.ne.s32 %p245, %r340, 2146435072; + mov.f64 %fd348, %fd340; + @%p245 bra $L__BB4_175; + + setp.gtu.f64 %p246, %fd60, 0d7FF0000000000000; + mov.f64 %fd348, %fd94; + @%p246 bra $L__BB4_175; + + setp.eq.s32 %p247, %r63, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r341, %temp}, %fd224; + } + setp.eq.s32 %p248, %r341, 0; + and.pred %p249, %p247, %p248; + @%p249 bra $L__BB4_174; + bra.uni $L__BB4_172; + +$L__BB4_174: + setp.lt.s32 %p256, %r61, 0; + mov.u32 %r346, 0; + setp.gt.f64 %p257, %fd60, 0d3FF0000000000000; + selp.b32 %r347, 2146435072, 0, %p257; + xor.b32 %r348, %r347, 2146435072; + selp.b32 %r349, %r348, %r347, %p256; + setp.eq.f32 %p258, %f129, 0fBF800000; + selp.b32 %r350, 1072693248, %r349, %p258; + mov.b64 %fd348, {%r346, %r350}; + bra.uni $L__BB4_175; + +$L__BB4_129: + setp.gt.s32 %p196, %r69, -1; + @%p196 bra $L__BB4_133; + + cvt.rzi.f64.f64 %fd249, %fd224; + setp.eq.f64 %p197, %fd249, 0d4000000000000000; + @%p197 bra $L__BB4_133; + + mov.f64 %fd340, 0dFFF8000000000000; + +$L__BB4_133: + add.f64 %fd64, %fd59, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r298}, %fd64; + } + and.b32 %r299, %r298, 2146435072; + setp.ne.s32 %p198, %r299, 2146435072; + mov.f64 %fd341, %fd340; + @%p198 bra $L__BB4_139; + + setp.gtu.f64 %p199, %fd60, 0d7FF0000000000000; + mov.f64 %fd341, %fd64; + @%p199 bra $L__BB4_139; + + setp.eq.s32 %p200, %r63, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r300, %temp}, %fd224; + } + setp.eq.s32 %p201, %r300, 0; + and.pred %p202, %p200, %p201; + @%p202 bra $L__BB4_138; + bra.uni $L__BB4_136; + +$L__BB4_138: + setp.lt.s32 %p206, %r61, 0; + mov.u32 %r304, 0; + setp.gt.f64 %p207, %fd60, 0d3FF0000000000000; + selp.b32 %r305, 2146435072, 0, %p207; + xor.b32 %r306, %r305, 2146435072; + selp.b32 %r307, %r306, %r305, %p206; + setp.eq.f32 %p208, %f129, 0fBF800000; + selp.b32 %r308, 1072693248, %r307, %p208; + mov.b64 %fd341, {%r304, %r308}; + bra.uni $L__BB4_139; + +$L__BB4_172: + { + .reg .b32 %temp; + mov.b64 {%r342, %temp}, %fd59; + } + and.b32 %r343, %r69, 2147483647; + setp.ne.s32 %p250, %r343, 2146435072; + setp.ne.s32 %p251, %r342, 0; + or.pred %p252, %p250, %p251; + mov.f64 %fd348, %fd340; + @%p252 bra $L__BB4_175; + + setp.lt.s32 %p253, %r69, 0; + mov.u32 %r344, 0; + setp.ne.s32 %p254, %r63, 1071644672; + and.pred %p255, %p254, %p253; + selp.b32 %r345, %r65, %r64, %p255; + mov.b64 %fd348, {%r344, %r345}; + +$L__BB4_175: + setp.eq.f32 %p259, %f129, 0f3F800000; + selp.f64 %fd282, 0d3FF0000000000000, %fd348, %p259; + mul.f64 %fd283, %fd282, %fd57; + sub.f64 %fd284, %fd58, %fd283; + cvt.f64.f32 %fd285, %f1748; + add.f64 %fd358, %fd284, %fd285; + cvt.f64.f32 %fd99, %f157; + { + .reg .b32 %temp; + mov.b64 {%temp, %r72}, %fd99; + } + abs.f64 %fd100, %fd99; + { // callseq 94, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd100; + .param .b64 param1; + st.param.f64 [param1+0], %fd224; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd350, [retval0+0]; + } // callseq 94 + setp.gt.s32 %p260, %r72, -1; + @%p260 bra $L__BB4_177; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r351}, %fd350; + } + xor.b32 %r352, %r351, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r353, %temp}, %fd350; + } + mov.b64 %fd350, {%r353, %r352}; + +$L__BB4_177: + setp.eq.f32 %p261, %f157, 0f00000000; + @%p261 bra $L__BB4_181; + bra.uni $L__BB4_178; + +$L__BB4_181: + setp.lt.s32 %p264, %r61, 0; + mov.u32 %r354, 0; + or.b32 %r355, %r72, 2146435072; + selp.b32 %r356, %r355, %r72, %p264; + mov.b64 %fd350, {%r354, %r356}; + bra.uni $L__BB4_182; + +$L__BB4_178: + @%p260 bra $L__BB4_182; + + cvt.rzi.f64.f64 %fd288, %fd224; + setp.eq.f64 %p263, %fd288, 0d4000000000000000; + @%p263 bra $L__BB4_182; + + mov.f64 %fd350, 0dFFF8000000000000; + +$L__BB4_182: + add.f64 %fd106, %fd99, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r357}, %fd106; + } + and.b32 %r358, %r357, 2146435072; + setp.ne.s32 %p265, %r358, 2146435072; + mov.f64 %fd351, %fd350; + @%p265 bra $L__BB4_188; + + setp.gtu.f64 %p266, %fd100, 0d7FF0000000000000; + mov.f64 %fd351, %fd106; + @%p266 bra $L__BB4_188; + + setp.eq.s32 %p267, %r63, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r359, %temp}, %fd224; + } + setp.eq.s32 %p268, %r359, 0; + and.pred %p269, %p267, %p268; + @%p269 bra $L__BB4_187; + bra.uni $L__BB4_185; + +$L__BB4_187: + setp.lt.s32 %p276, %r61, 0; + mov.u32 %r364, 0; + setp.gt.f64 %p277, %fd100, 0d3FF0000000000000; + selp.b32 %r365, 2146435072, 0, %p277; + xor.b32 %r366, %r365, 2146435072; + selp.b32 %r367, %r366, %r365, %p276; + setp.eq.f32 %p278, %f157, 0fBF800000; + selp.b32 %r368, 1072693248, %r367, %p278; + mov.b64 %fd351, {%r364, %r368}; + bra.uni $L__BB4_188; + +$L__BB4_136: + { + .reg .b32 %temp; + mov.b64 {%r301, %temp}, %fd59; + } + and.b32 %r302, %r69, 2147483647; + setp.ne.s32 %p203, %r302, 2146435072; + setp.ne.s32 %p204, %r301, 0; + or.pred %p205, %p203, %p204; + mov.f64 %fd341, %fd340; + @%p205 bra $L__BB4_139; + + mov.u32 %r303, 0; + mov.b64 %fd341, {%r303, %r64}; + +$L__BB4_139: + setp.eq.f32 %p209, %f129, 0f3F800000; + selp.f64 %fd252, 0d3FF0000000000000, %fd341, %p209; + mul.f64 %fd253, %fd252, %fd57; + sub.f64 %fd254, %fd58, %fd253; + cvt.f64.f32 %fd255, %f1748; + add.f64 %fd358, %fd254, %fd255; + cvt.f64.f32 %fd69, %f157; + { + .reg .b32 %temp; + mov.b64 {%temp, %r70}, %fd69; + } + abs.f64 %fd70, %fd69; + setp.eq.f32 %p210, %f157, 0f00000000; + @%p210 bra $L__BB4_143; + bra.uni $L__BB4_140; + +$L__BB4_143: + shr.s32 %r503, %r61, 31; + and.b32 %r502, %r503, 2146435072; + mov.u32 %r309, 0; + mov.b64 %fd342, {%r309, %r502}; + bra.uni $L__BB4_144; + +$L__BB4_140: + { // callseq 92, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd70; + .param .b64 param1; + st.param.f64 [param1+0], %fd224; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd342, [retval0+0]; + } // callseq 92 + setp.gt.s32 %p211, %r70, -1; + @%p211 bra $L__BB4_144; + + cvt.rzi.f64.f64 %fd258, %fd224; + setp.eq.f64 %p212, %fd258, 0d4000000000000000; + @%p212 bra $L__BB4_144; + + mov.f64 %fd342, 0dFFF8000000000000; + +$L__BB4_144: + add.f64 %fd74, %fd69, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r310}, %fd74; + } + and.b32 %r311, %r310, 2146435072; + setp.ne.s32 %p213, %r311, 2146435072; + mov.f64 %fd343, %fd342; + @%p213 bra $L__BB4_150; + + setp.gtu.f64 %p214, %fd70, 0d7FF0000000000000; + mov.f64 %fd343, %fd74; + @%p214 bra $L__BB4_150; + + setp.eq.s32 %p215, %r63, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r312, %temp}, %fd224; + } + setp.eq.s32 %p216, %r312, 0; + and.pred %p217, %p215, %p216; + @%p217 bra $L__BB4_149; + bra.uni $L__BB4_147; + +$L__BB4_149: + setp.lt.s32 %p221, %r61, 0; + mov.u32 %r316, 0; + setp.gt.f64 %p222, %fd70, 0d3FF0000000000000; + selp.b32 %r317, 2146435072, 0, %p222; + xor.b32 %r318, %r317, 2146435072; + selp.b32 %r319, %r318, %r317, %p221; + setp.eq.f32 %p223, %f157, 0fBF800000; + selp.b32 %r320, 1072693248, %r319, %p223; + mov.b64 %fd343, {%r316, %r320}; + bra.uni $L__BB4_150; + +$L__BB4_185: + { + .reg .b32 %temp; + mov.b64 {%r360, %temp}, %fd99; + } + and.b32 %r361, %r72, 2147483647; + setp.ne.s32 %p270, %r361, 2146435072; + setp.ne.s32 %p271, %r360, 0; + or.pred %p272, %p270, %p271; + mov.f64 %fd351, %fd350; + @%p272 bra $L__BB4_188; + + setp.lt.s32 %p273, %r72, 0; + mov.u32 %r362, 0; + setp.ne.s32 %p274, %r63, 1071644672; + and.pred %p275, %p274, %p273; + selp.b32 %r363, %r65, %r64, %p275; + mov.b64 %fd351, {%r362, %r363}; + +$L__BB4_188: + setp.eq.f32 %p279, %f157, 0f3F800000; + selp.f64 %fd291, 0d3FF0000000000000, %fd351, %p279; + mul.f64 %fd292, %fd291, %fd57; + mul.f32 %f910, %f165, %f158; + cvt.f64.f32 %fd293, %f910; + sub.f64 %fd294, %fd293, %fd292; + cvt.f64.f32 %fd295, %f1747; + add.f64 %fd357, %fd294, %fd295; + cvt.f64.f32 %fd111, %f159; + { + .reg .b32 %temp; + mov.b64 {%temp, %r73}, %fd111; + } + abs.f64 %fd112, %fd111; + { // callseq 95, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd112; + .param .b64 param1; + st.param.f64 [param1+0], %fd224; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd353, [retval0+0]; + } // callseq 95 + setp.gt.s32 %p280, %r73, -1; + @%p280 bra $L__BB4_190; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r369}, %fd353; + } + xor.b32 %r370, %r369, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r371, %temp}, %fd353; + } + mov.b64 %fd353, {%r371, %r370}; + +$L__BB4_190: + setp.eq.f32 %p281, %f159, 0f00000000; + @%p281 bra $L__BB4_194; + bra.uni $L__BB4_191; + +$L__BB4_194: + setp.lt.s32 %p284, %r61, 0; + mov.u32 %r372, 0; + or.b32 %r373, %r73, 2146435072; + selp.b32 %r374, %r373, %r73, %p284; + mov.b64 %fd353, {%r372, %r374}; + bra.uni $L__BB4_195; + +$L__BB4_191: + @%p280 bra $L__BB4_195; + + cvt.rzi.f64.f64 %fd298, %fd224; + setp.eq.f64 %p283, %fd298, 0d4000000000000000; + @%p283 bra $L__BB4_195; + + mov.f64 %fd353, 0dFFF8000000000000; + +$L__BB4_195: + add.f64 %fd118, %fd111, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r375}, %fd118; + } + and.b32 %r376, %r375, 2146435072; + setp.ne.s32 %p285, %r376, 2146435072; + mov.f64 %fd354, %fd353; + @%p285 bra $L__BB4_201; + + setp.gtu.f64 %p286, %fd112, 0d7FF0000000000000; + mov.f64 %fd354, %fd118; + @%p286 bra $L__BB4_201; + + setp.eq.s32 %p287, %r63, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r377, %temp}, %fd224; + } + setp.eq.s32 %p288, %r377, 0; + and.pred %p289, %p287, %p288; + @%p289 bra $L__BB4_200; + bra.uni $L__BB4_198; + +$L__BB4_200: + setp.lt.s32 %p296, %r61, 0; + mov.u32 %r382, 0; + setp.gt.f64 %p297, %fd112, 0d3FF0000000000000; + selp.b32 %r383, 2146435072, 0, %p297; + xor.b32 %r384, %r383, 2146435072; + selp.b32 %r385, %r384, %r383, %p296; + setp.eq.f32 %p298, %f159, 0fBF800000; + selp.b32 %r386, 1072693248, %r385, %p298; + mov.b64 %fd354, {%r382, %r386}; + bra.uni $L__BB4_201; + +$L__BB4_147: + { + .reg .b32 %temp; + mov.b64 {%r313, %temp}, %fd69; + } + and.b32 %r314, %r70, 2147483647; + setp.ne.s32 %p218, %r314, 2146435072; + setp.ne.s32 %p219, %r313, 0; + or.pred %p220, %p218, %p219; + mov.f64 %fd343, %fd342; + @%p220 bra $L__BB4_150; + + mov.u32 %r315, 0; + mov.b64 %fd343, {%r315, %r64}; + +$L__BB4_150: + setp.eq.f32 %p224, %f157, 0f3F800000; + selp.f64 %fd261, 0d3FF0000000000000, %fd343, %p224; + mul.f64 %fd262, %fd261, %fd57; + mul.f32 %f908, %f165, %f158; + cvt.f64.f32 %fd263, %f908; + sub.f64 %fd264, %fd263, %fd262; + cvt.f64.f32 %fd265, %f1747; + add.f64 %fd357, %fd264, %fd265; + cvt.f64.f32 %fd79, %f159; + { + .reg .b32 %temp; + mov.b64 {%temp, %r71}, %fd79; + } + abs.f64 %fd80, %fd79; + setp.eq.f32 %p225, %f159, 0f00000000; + @%p225 bra $L__BB4_154; + bra.uni $L__BB4_151; + +$L__BB4_154: + shr.s32 %r501, %r61, 31; + and.b32 %r500, %r501, 2146435072; + mov.u32 %r321, 0; + mov.b64 %fd344, {%r321, %r500}; + bra.uni $L__BB4_155; + +$L__BB4_151: + { // callseq 93, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd80; + .param .b64 param1; + st.param.f64 [param1+0], %fd224; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd344, [retval0+0]; + } // callseq 93 + setp.gt.s32 %p226, %r71, -1; + @%p226 bra $L__BB4_155; + + cvt.rzi.f64.f64 %fd268, %fd224; + setp.eq.f64 %p227, %fd268, 0d4000000000000000; + @%p227 bra $L__BB4_155; + + mov.f64 %fd344, 0dFFF8000000000000; + +$L__BB4_155: + add.f64 %fd84, %fd79, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r322}, %fd84; + } + and.b32 %r323, %r322, 2146435072; + setp.ne.s32 %p228, %r323, 2146435072; + mov.f64 %fd345, %fd344; + @%p228 bra $L__BB4_161; + + setp.gtu.f64 %p229, %fd80, 0d7FF0000000000000; + mov.f64 %fd345, %fd84; + @%p229 bra $L__BB4_161; + + setp.eq.s32 %p230, %r63, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r324, %temp}, %fd224; + } + setp.eq.s32 %p231, %r324, 0; + and.pred %p232, %p230, %p231; + @%p232 bra $L__BB4_160; + bra.uni $L__BB4_158; + +$L__BB4_160: + setp.lt.s32 %p236, %r61, 0; + mov.u32 %r328, 0; + setp.gt.f64 %p237, %fd80, 0d3FF0000000000000; + selp.b32 %r329, 2146435072, 0, %p237; + xor.b32 %r330, %r329, 2146435072; + selp.b32 %r331, %r330, %r329, %p236; + setp.eq.f32 %p238, %f159, 0fBF800000; + selp.b32 %r332, 1072693248, %r331, %p238; + mov.b64 %fd345, {%r328, %r332}; + bra.uni $L__BB4_161; + +$L__BB4_198: + { + .reg .b32 %temp; + mov.b64 {%r378, %temp}, %fd111; + } + and.b32 %r379, %r73, 2147483647; + setp.ne.s32 %p290, %r379, 2146435072; + setp.ne.s32 %p291, %r378, 0; + or.pred %p292, %p290, %p291; + mov.f64 %fd354, %fd353; + @%p292 bra $L__BB4_201; + + setp.lt.s32 %p293, %r73, 0; + mov.u32 %r380, 0; + setp.ne.s32 %p294, %r63, 1071644672; + and.pred %p295, %p294, %p293; + selp.b32 %r381, %r65, %r64, %p295; + mov.b64 %fd354, {%r380, %r381}; + +$L__BB4_201: + mul.f32 %f911, %f165, 0f00000000; + cvt.f64.f32 %fd301, %f911; + setp.eq.f32 %p299, %f159, 0f3F800000; + selp.f64 %fd302, 0d3FF0000000000000, %fd354, %p299; + mul.f64 %fd303, %fd302, %fd57; + sub.f64 %fd304, %fd301, %fd303; + cvt.f64.f32 %fd305, %f1746; + add.f64 %fd356, %fd304, %fd305; + cvt.f64.f32 %fd306, %f1745; + sub.f64 %fd307, %fd301, %fd57; + add.f64 %fd355, %fd307, %fd306; + bra.uni $L__BB4_202; + +$L__BB4_158: + { + .reg .b32 %temp; + mov.b64 {%r325, %temp}, %fd79; + } + and.b32 %r326, %r71, 2147483647; + setp.ne.s32 %p233, %r326, 2146435072; + setp.ne.s32 %p234, %r325, 0; + or.pred %p235, %p233, %p234; + mov.f64 %fd345, %fd344; + @%p235 bra $L__BB4_161; + + mov.u32 %r327, 0; + mov.b64 %fd345, {%r327, %r64}; + +$L__BB4_161: + mul.f32 %f909, %f165, 0f00000000; + cvt.f64.f32 %fd271, %f909; + setp.eq.f32 %p239, %f159, 0f3F800000; + selp.f64 %fd272, 0d3FF0000000000000, %fd345, %p239; + mul.f64 %fd273, %fd272, %fd57; + sub.f64 %fd274, %fd271, %fd273; + cvt.f64.f32 %fd275, %f1746; + add.f64 %fd356, %fd274, %fd275; + cvt.f64.f32 %fd276, %f1745; + sub.f64 %fd277, %fd271, %fd57; + add.f64 %fd355, %fd277, %fd276; + +$L__BB4_202: + cvt.rn.f32.f64 %f1748, %fd358; + cvt.rn.f32.f64 %f1747, %fd357; + cvt.rn.f32.f64 %f1746, %fd356; + cvt.rn.f32.f64 %f1745, %fd355; + fma.rn.f32 %f1743, %f165, %f157, %f1743; + fma.rn.f32 %f1742, %f165, %f159, %f1742; + add.f32 %f1741, %f1741, %f165; + add.s32 %r523, %r523, 1; + setp.lt.s32 %p300, %r523, %r85; + @%p300 bra $L__BB4_56; + + add.s32 %r522, %r522, 1; + setp.lt.s32 %p301, %r522, %r85; + @%p301 bra $L__BB4_55; + +$L__BB4_204: + ld.param.u32 %r484, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_6]; + div.rn.f32 %f912, %f1744, %f1748; + mov.f32 %f913, 0fBF800000; + max.f32 %f914, %f912, %f913; + mov.f32 %f915, 0f3F800000; + min.f32 %f916, %f914, %f915; + sub.f32 %f1777, %f1777, %f916; + div.rn.f32 %f917, %f1743, %f1747; + max.f32 %f918, %f917, %f913; + min.f32 %f919, %f918, %f915; + sub.f32 %f1776, %f1776, %f919; + neg.f32 %f920, %f1775; + div.rn.f32 %f921, %f1742, %f1746; + max.f32 %f922, %f921, %f920; + min.f32 %f923, %f922, %f1775; + sub.f32 %f924, %f1775, %f923; + neg.f32 %f925, %f1774; + div.rn.f32 %f926, %f1741, %f1745; + max.f32 %f927, %f926, %f925; + min.f32 %f928, %f927, %f1774; + sub.f32 %f929, %f1774, %f928; + max.f32 %f1775, %f924, %f915; + mov.f32 %f930, 0f3C23D70A; + max.f32 %f1774, %f929, %f930; + add.s32 %r521, %r521, 1; + setp.lt.s32 %p302, %r521, %r484; + @%p302 bra $L__BB4_53; + +$L__BB4_205: + mov.f32 %f941, 0f00000000; + mov.f32 %f1790, %f941; + mov.f32 %f1791, %f941; + mov.f32 %f1792, %f941; + mov.f32 %f1795, %f941; + mov.f32 %f1793, %f941; + mov.f32 %f1794, %f941; + mov.f32 %f1796, %f941; + mov.f32 %f1797, %f941; + mov.f32 %f1798, %f941; + mov.f32 %f1799, %f941; + mov.f32 %f1817, %f941; + @%p20 bra $L__BB4_260; + + div.rn.f32 %f953, %f1775, 0fC0206C98; + div.rn.f32 %f192, %f953, %f325; + sqrt.rn.f32 %f193, %f34; + mov.f32 %f954, 0f3F800000; + cvt.rzi.f32.f32 %f955, %f954; + add.f32 %f956, %f955, %f955; + mov.f32 %f957, 0f40000000; + sub.f32 %f958, %f957, %f956; + abs.f32 %f194, %f958; + mov.u32 %r387, 0; + setp.eq.f32 %p311, %f194, 0f3F800000; + mov.u32 %r524, %r387; + +$L__BB4_207: + cvt.rn.f32.s32 %f959, %r524; + sub.f32 %f960, %f959, %f1777; + add.f32 %f961, %f960, 0f3F000000; + mul.f32 %f962, %f193, %f961; + abs.f32 %f206, %f962; + setp.ge.f32 %p304, %f206, 0f3F8060FE; + mul.f32 %f963, %f962, %f962; + selp.f32 %f964, %f206, %f963, %p304; + selp.f32 %f965, 0f3789CA3C, 0f38B1E96A, %p304; + selp.f32 %f966, 0fB9F560B9, 0fBA574D20, %p304; + fma.rn.f32 %f967, %f965, %f964, %f966; + selp.f32 %f968, 0f3BAC840B, 0f3BAAD5EA, %p304; + fma.rn.f32 %f969, %f967, %f964, %f968; + selp.f32 %f970, 0fBD0C8162, 0fBCDC1BE7, %p304; + fma.rn.f32 %f971, %f969, %f964, %f970; + selp.f32 %f972, 0f3E1CF906, 0f3DE718AF, %p304; + fma.rn.f32 %f973, %f971, %f964, %f972; + selp.f32 %f974, 0f3F6A937E, 0fBEC093AC, %p304; + fma.rn.f32 %f975, %f973, %f964, %f974; + selp.f32 %f976, 0f3F20D842, 0f3E0375D3, %p304; + fma.rn.f32 %f977, %f975, %f964, %f976; + neg.f32 %f978, %f206; + selp.f32 %f979, %f978, %f962, %p304; + fma.rn.f32 %f207, %f977, %f979, %f979; + mov.b32 %r389, %f962; + and.b32 %r78, %r389, -2147483648; + add.f32 %f980, %f960, 0fBF000000; + mul.f32 %f981, %f193, %f980; + abs.f32 %f208, %f981; + setp.ge.f32 %p305, %f208, 0f3F8060FE; + mul.f32 %f982, %f981, %f981; + selp.f32 %f983, %f208, %f982, %p305; + selp.f32 %f984, 0f3789CA3C, 0f38B1E96A, %p305; + selp.f32 %f985, 0fB9F560B9, 0fBA574D20, %p305; + fma.rn.f32 %f986, %f984, %f983, %f985; + selp.f32 %f987, 0f3BAC840B, 0f3BAAD5EA, %p305; + fma.rn.f32 %f988, %f986, %f983, %f987; + selp.f32 %f989, 0fBD0C8162, 0fBCDC1BE7, %p305; + fma.rn.f32 %f990, %f988, %f983, %f989; + selp.f32 %f991, 0f3E1CF906, 0f3DE718AF, %p305; + fma.rn.f32 %f992, %f990, %f983, %f991; + selp.f32 %f993, 0f3F6A937E, 0fBEC093AC, %p305; + fma.rn.f32 %f994, %f992, %f983, %f993; + selp.f32 %f995, 0f3F20D842, 0f3E0375D3, %p305; + fma.rn.f32 %f996, %f994, %f983, %f995; + neg.f32 %f997, %f208; + selp.f32 %f998, %f997, %f981, %p305; + fma.rn.f32 %f209, %f996, %f998, %f998; + mov.b32 %r390, %f981; + and.b32 %r79, %r390, -2147483648; + add.f32 %f999, %f959, 0f3F000000; + sub.f32 %f1000, %f999, %f1777; + div.rn.f32 %f210, %f1000, %f325; + abs.f32 %f211, %f210; + setp.lt.f32 %p306, %f211, 0f00800000; + mul.f32 %f1001, %f211, 0f4B800000; + selp.f32 %f1002, %f1001, %f211, %p306; + selp.f32 %f1003, 0fC3170000, 0fC2FE0000, %p306; + mov.b32 %r391, %f1002; + and.b32 %r392, %r391, 8388607; + or.b32 %r393, %r392, 1065353216; + mov.b32 %f1004, %r393; + shr.u32 %r394, %r391, 23; + cvt.rn.f32.u32 %f1005, %r394; + add.f32 %f1006, %f1003, %f1005; + setp.gt.f32 %p307, %f1004, 0f3FB504F3; + mul.f32 %f1007, %f1004, 0f3F000000; + add.f32 %f1008, %f1006, 0f3F800000; + selp.f32 %f1009, %f1008, %f1006, %p307; + selp.f32 %f1010, %f1007, %f1004, %p307; + add.f32 %f1011, %f1010, 0fBF800000; + add.f32 %f1012, %f1010, 0f3F800000; + rcp.approx.ftz.f32 %f1013, %f1012; + add.f32 %f1014, %f1011, %f1011; + mul.f32 %f1016, %f1014, %f1013; + mul.f32 %f1017, %f1016, %f1016; + mov.f32 %f1018, 0f3C4CAF63; + mov.f32 %f1019, 0f3B18F0FE; + fma.rn.f32 %f1020, %f1019, %f1017, %f1018; + mov.f32 %f1021, 0f3DAAAABD; + fma.rn.f32 %f1022, %f1020, %f1017, %f1021; + mul.rn.f32 %f1023, %f1022, %f1017; + mul.rn.f32 %f1024, %f1023, %f1016; + sub.f32 %f1025, %f1011, %f1016; + add.f32 %f1026, %f1025, %f1025; + neg.f32 %f1027, %f1016; + fma.rn.f32 %f1028, %f1027, %f1011, %f1026; + mul.rn.f32 %f1029, %f1013, %f1028; + add.f32 %f1030, %f1024, %f1016; + sub.f32 %f1031, %f1016, %f1030; + add.f32 %f1032, %f1024, %f1031; + add.f32 %f1033, %f1029, %f1032; + add.f32 %f1034, %f1030, %f1033; + sub.f32 %f1035, %f1030, %f1034; + add.f32 %f1036, %f1033, %f1035; + mov.f32 %f1037, 0f3F317200; + mul.rn.f32 %f1038, %f1009, %f1037; + mov.f32 %f1039, 0f35BFBE8E; + mul.rn.f32 %f1040, %f1009, %f1039; + add.f32 %f1041, %f1038, %f1034; + sub.f32 %f1042, %f1038, %f1041; + add.f32 %f1043, %f1034, %f1042; + add.f32 %f1044, %f1036, %f1043; + add.f32 %f1045, %f1040, %f1044; + add.f32 %f1046, %f1041, %f1045; + sub.f32 %f1047, %f1041, %f1046; + add.f32 %f1048, %f1045, %f1047; + mul.rn.f32 %f1049, %f957, %f1046; + neg.f32 %f1050, %f1049; + fma.rn.f32 %f1051, %f957, %f1046, %f1050; + fma.rn.f32 %f1052, %f957, %f1048, %f1051; + fma.rn.f32 %f1054, %f941, %f1046, %f1052; + add.rn.f32 %f1055, %f1049, %f1054; + neg.f32 %f1056, %f1055; + add.rn.f32 %f1057, %f1049, %f1056; + add.rn.f32 %f1058, %f1057, %f1054; + mov.b32 %r395, %f1055; + setp.eq.s32 %p308, %r395, 1118925336; + add.s32 %r396, %r395, -1; + mov.b32 %f1059, %r396; + add.f32 %f1060, %f1058, 0f37000000; + selp.f32 %f212, %f1060, %f1058, %p308; + selp.f32 %f1061, %f1059, %f1055, %p308; + mov.f32 %f1062, 0f3FB8AA3B; + mul.rn.f32 %f1063, %f1061, %f1062; + cvt.rzi.f32.f32 %f1064, %f1063; + abs.f32 %f1065, %f1064; + setp.gt.f32 %p309, %f1065, 0f42FC0000; + mov.b32 %r397, %f1064; + and.b32 %r398, %r397, -2147483648; + or.b32 %r399, %r398, 1123811328; + mov.b32 %f1066, %r399; + selp.f32 %f1067, %f1066, %f1064, %p309; + mov.f32 %f1068, 0fBF317218; + fma.rn.f32 %f1069, %f1067, %f1068, %f1061; + mov.f32 %f1070, 0f3102E308; + fma.rn.f32 %f1071, %f1067, %f1070, %f1069; mul.f32 %f1072, %f1071, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1073, %f1072; - add.f32 %f1074, %f1067, 0f00000000; - ex2.approx.f32 %f1075, %f1074; - mul.f32 %f1076, %f1073, %f1075; - setp.lt.f32 %p111, %f1065, 0fC2D20000; - selp.f32 %f1077, 0f00000000, %f1076, %p111; - setp.gt.f32 %p112, %f1065, 0f42D20000; - selp.f32 %f195, 0f7F800000, %f1077, %p112; - div.rn.f32 %f196, %f1922, %f407; - abs.f32 %f197, %f196; - setp.lt.f32 %p113, %f197, 0f00800000; - mul.f32 %f1078, %f197, 0f4B800000; - selp.f32 %f1079, 0fC3170000, 0fC2FE0000, %p113; - selp.f32 %f1080, %f1078, %f197, %p113; - mov.b32 %r157, %f1080; - and.b32 %r158, %r157, 8388607; - or.b32 %r159, %r158, 1065353216; - mov.b32 %f1081, %r159; - shr.u32 %r160, %r157, 23; - cvt.rn.f32.u32 %f1082, %r160; - add.f32 %f1083, %f1079, %f1082; - setp.gt.f32 %p114, %f1081, 0f3FB504F3; + add.f32 %f1073, %f1067, 0f4B40007F; + mov.b32 %r400, %f1073; + shl.b32 %r401, %r400, 23; + mov.b32 %f1074, %r401; + ex2.approx.ftz.f32 %f1075, %f1072; + mul.f32 %f213, %f1075, %f1074; + setp.lt.f32 %p310, %f210, 0f00000000; + and.pred %p13, %p310, %p311; + add.f32 %f1076, %f210, %f210; + selp.f32 %f214, %f1076, 0f00000000, %p311; + add.f32 %f1077, %f211, 0f40000000; + mov.b32 %r80, %f1077; + div.rn.f32 %f215, %f980, %f325; + abs.f32 %f216, %f215; + setp.lt.f32 %p312, %f216, 0f00800000; + mul.f32 %f1078, %f216, 0f4B800000; + selp.f32 %f1079, %f1078, %f216, %p312; + selp.f32 %f1080, 0fC3170000, 0fC2FE0000, %p312; + mov.b32 %r402, %f1079; + and.b32 %r403, %r402, 8388607; + or.b32 %r404, %r403, 1065353216; + mov.b32 %f1081, %r404; + shr.u32 %r405, %r402, 23; + cvt.rn.f32.u32 %f1082, %r405; + add.f32 %f1083, %f1080, %f1082; + setp.gt.f32 %p313, %f1081, 0f3FB504F3; mul.f32 %f1084, %f1081, 0f3F000000; add.f32 %f1085, %f1083, 0f3F800000; - selp.f32 %f1086, %f1084, %f1081, %p114; - selp.f32 %f1087, %f1085, %f1083, %p114; - add.f32 %f1088, %f1086, 0fBF800000; - add.f32 %f1063, %f1086, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1062,%f1063; - // inline asm - add.f32 %f1089, %f1088, %f1088; - mul.f32 %f1090, %f1062, %f1089; - mul.f32 %f1091, %f1090, %f1090; - fma.rn.f32 %f1094, %f1927, %f1091, %f1928; - fma.rn.f32 %f1096, %f1094, %f1091, %f1929; - mul.rn.f32 %f1097, %f1096, %f1091; - mul.rn.f32 %f1098, %f1097, %f1090; - sub.f32 %f1099, %f1088, %f1090; - neg.f32 %f1100, %f1090; - add.f32 %f1101, %f1099, %f1099; - fma.rn.f32 %f1102, %f1100, %f1088, %f1101; - mul.rn.f32 %f1103, %f1062, %f1102; - add.f32 %f1104, %f1098, %f1090; - sub.f32 %f1105, %f1090, %f1104; - add.f32 %f1106, %f1098, %f1105; + selp.f32 %f1086, %f1085, %f1083, %p313; + selp.f32 %f1087, %f1084, %f1081, %p313; + add.f32 %f1088, %f1087, 0fBF800000; + add.f32 %f1089, %f1087, 0f3F800000; + rcp.approx.ftz.f32 %f1090, %f1089; + add.f32 %f1091, %f1088, %f1088; + mul.f32 %f1092, %f1091, %f1090; + mul.f32 %f1093, %f1092, %f1092; + fma.rn.f32 %f1094, %f1019, %f1093, %f1018; + fma.rn.f32 %f1095, %f1094, %f1093, %f1021; + mul.rn.f32 %f1096, %f1095, %f1093; + mul.rn.f32 %f1097, %f1096, %f1092; + sub.f32 %f1098, %f1088, %f1092; + add.f32 %f1099, %f1098, %f1098; + neg.f32 %f1100, %f1092; + fma.rn.f32 %f1101, %f1100, %f1088, %f1099; + mul.rn.f32 %f1102, %f1090, %f1101; + add.f32 %f1103, %f1097, %f1092; + sub.f32 %f1104, %f1092, %f1103; + add.f32 %f1105, %f1097, %f1104; + add.f32 %f1106, %f1102, %f1105; add.f32 %f1107, %f1103, %f1106; - add.f32 %f1108, %f1104, %f1107; - sub.f32 %f1109, %f1104, %f1108; - add.f32 %f1110, %f1107, %f1109; - mul.rn.f32 %f1112, %f1087, %f1931; - mul.rn.f32 %f1114, %f1087, %f1932; - add.f32 %f1115, %f1112, %f1108; - sub.f32 %f1116, %f1112, %f1115; - add.f32 %f1117, %f1108, %f1116; - add.f32 %f1118, %f1110, %f1117; - add.f32 %f1119, %f1114, %f1118; - add.f32 %f1120, %f1115, %f1119; - sub.f32 %f1121, %f1115, %f1120; - add.f32 %f1122, %f1119, %f1121; - mul.rn.f32 %f1124, %f1924, %f1120; - neg.f32 %f1125, %f1124; - fma.rn.f32 %f1126, %f1924, %f1120, %f1125; - fma.rn.f32 %f1127, %f1924, %f1122, %f1126; - fma.rn.f32 %f1129, %f1930, %f1120, %f1127; - add.rn.f32 %f1130, %f1124, %f1129; - neg.f32 %f1131, %f1130; - add.rn.f32 %f1132, %f1124, %f1131; - add.rn.f32 %f1133, %f1132, %f1129; - mov.b32 %r161, %f1130; - setp.eq.s32 %p115, %r161, 1118925336; - add.s32 %r162, %r161, -1; - mov.b32 %f1134, %r162; - add.f32 %f1135, %f1133, 0f37000000; - selp.f32 %f1136, %f1134, %f1130, %p115; - selp.f32 %f198, %f1135, %f1133, %p115; - mul.f32 %f1137, %f1136, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1138, %f1137; - fma.rn.f32 %f1139, %f1138, %f1925, %f1136; - fma.rn.f32 %f1140, %f1138, %f1926, %f1139; - mul.f32 %f1141, %f1140, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1142, %f1141; - add.f32 %f1143, %f1138, 0f00000000; - ex2.approx.f32 %f1144, %f1143; - mul.f32 %f1145, %f1142, %f1144; - setp.lt.f32 %p116, %f1136, 0fC2D20000; - selp.f32 %f1146, 0f00000000, %f1145, %p116; - setp.gt.f32 %p117, %f1136, 0f42D20000; - selp.f32 %f2135, 0f7F800000, %f1146, %p117; - setp.eq.f32 %p118, %f2135, 0f7F800000; - @%p118 bra BB4_72; - - fma.rn.f32 %f2135, %f2135, %f198, %f2135; - -BB4_72: - setp.lt.f32 %p119, %f196, 0f00000000; - and.pred %p6, %p119, %p54; - mov.b32 %r163, %f2135; - xor.b32 %r164, %r163, -2147483648; - mov.b32 %f1147, %r164; - selp.f32 %f2137, %f1147, %f2135, %p6; - setp.eq.f32 %p121, %f196, 0f00000000; - @%p121 bra BB4_75; - bra.uni BB4_73; - -BB4_75: - add.f32 %f1150, %f196, %f196; - selp.f32 %f2137, %f1150, 0f00000000, %p54; - bra.uni BB4_76; - -BB4_73: - setp.geu.f32 %p122, %f196, 0f00000000; - @%p122 bra BB4_76; - - mov.f32 %f1978, 0f40000000; - cvt.rzi.f32.f32 %f1149, %f1978; - setp.neu.f32 %p123, %f1149, 0f40000000; - selp.f32 %f2137, 0f7FFFFFFF, %f2137, %p123; - -BB4_76: - abs.f32 %f1982, %f196; - add.f32 %f1151, %f1982, 0f40000000; - mov.b32 %r165, %f1151; - setp.lt.s32 %p125, %r165, 2139095040; - @%p125 bra BB4_81; - - abs.f32 %f1986, %f196; - setp.gtu.f32 %p126, %f1986, 0f7F800000; - @%p126 bra BB4_80; - bra.uni BB4_78; - -BB4_80: - add.f32 %f2137, %f196, 0f40000000; - bra.uni BB4_81; - -BB4_78: - abs.f32 %f1987, %f196; - setp.neu.f32 %p127, %f1987, 0f7F800000; - @%p127 bra BB4_81; - - selp.f32 %f2137, 0fFF800000, 0f7F800000, %p6; - -BB4_81: - cvt.rn.f32.s32 %f1939, %r264; - add.f32 %f1938, %f1939, 0f3F800000; - sub.f32 %f1937, %f1938, %f2151; - mov.f32 %f2138, 0f00000000; - mov.f32 %f1935, 0fB5BFBE8E; - mov.f32 %f1934, 0fBF317200; - sub.f32 %f1933, %f1939, %f2151; - mul.f32 %f1153, %f2137, 0fBF000000; - setp.eq.f32 %p128, %f196, 0f3F800000; - selp.f32 %f1154, 0fBF000000, %f1153, %p128; - mul.f32 %f1155, %f1154, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1156, %f1155; - fma.rn.f32 %f1158, %f1156, %f1934, %f1154; - fma.rn.f32 %f1160, %f1156, %f1935, %f1158; - mul.f32 %f1161, %f1160, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1162, %f1161; - add.f32 %f1163, %f1156, 0f00000000; - ex2.approx.f32 %f1164, %f1163; - mul.f32 %f1165, %f1162, %f1164; - setp.lt.f32 %p129, %f1154, 0fC2D20000; - selp.f32 %f1166, 0f00000000, %f1165, %p129; - setp.gt.f32 %p130, %f1154, 0f42D20000; - selp.f32 %f1167, 0f7F800000, %f1166, %p130; - sub.f32 %f1168, %f195, %f1167; - mul.f32 %f1169, %f94, %f1168; - mul.f32 %f209, %f145, %f1169; - mul.f32 %f1170, %f1933, %f1167; - mul.f32 %f1171, %f1937, %f195; - sub.f32 %f1172, %f1171, %f1170; - mul.f32 %f1173, %f95, %f1172; - mul.f32 %f210, %f145, %f1173; - mul.f32 %f211, %f145, %f158; - setp.leu.f32 %p131, %f159, 0f3C23D70A; - @%p131 bra BB4_83; - - sub.f32 %f1174, %f160, %f159; - add.f32 %f1175, %f159, %f2148; - div.rn.f32 %f2138, %f1174, %f1175; - -BB4_83: - mov.f32 %f2139, 0f00000000; - @%p131 bra BB4_85; - - add.f32 %f1177, %f159, %f2148; - mul.f32 %f1178, %f1177, %f1177; - add.f32 %f1179, %f160, %f2148; - div.rn.f32 %f2139, %f1179, %f1178; - -BB4_85: - mov.f32 %f1180, 0f47C35000; - min.f32 %f1181, %f2138, %f1180; - fma.rn.f32 %f2119, %f1181, %f179, %f2119; - mul.f32 %f1182, %f1181, %f180; - mul.f32 %f1183, %f179, %f179; - min.f32 %f1184, %f2139, %f1180; - mul.f32 %f1185, %f1184, %f1183; - sub.f32 %f1186, %f1182, %f1185; - add.f32 %f2123, %f1186, %f2123; - fma.rn.f32 %f2118, %f1181, %f209, %f2118; - mul.f32 %f1187, %f1181, %f210; - mul.f32 %f1188, %f209, %f209; - mul.f32 %f1189, %f1184, %f1188; - sub.f32 %f1190, %f1187, %f1189; - add.f32 %f2122, %f1190, %f2122; - fma.rn.f32 %f2117, %f1181, %f211, %f2117; - mul.f32 %f1191, %f1181, 0f00000000; - mul.f32 %f1192, %f211, %f211; - mul.f32 %f1193, %f1184, %f1192; - sub.f32 %f1194, %f1191, %f1193; - add.f32 %f2121, %f1194, %f2121; - add.f32 %f2116, %f2116, %f1181; - sub.f32 %f1195, %f1191, %f1184; - add.f32 %f2120, %f1195, %f2120; - add.s32 %r264, %r264, 1; - setp.lt.s32 %p133, %r264, %r49; - @%p133 bra BB4_39; - - add.s32 %r263, %r263, 1; - setp.lt.s32 %p134, %r263, %r49; - @%p134 bra BB4_38; - -BB4_87: - div.rn.f32 %f1196, %f2119, %f2123; - mov.f32 %f1197, 0fBF800000; - max.f32 %f1198, %f1196, %f1197; - mov.f32 %f1199, 0f3F800000; - min.f32 %f1200, %f1198, %f1199; - sub.f32 %f2152, %f2152, %f1200; - div.rn.f32 %f1201, %f2118, %f2122; - max.f32 %f1202, %f1201, %f1197; - min.f32 %f1203, %f1202, %f1199; - sub.f32 %f2151, %f2151, %f1203; - neg.f32 %f1204, %f2150; - div.rn.f32 %f1205, %f2117, %f2121; - max.f32 %f1206, %f1205, %f1204; - min.f32 %f1207, %f1206, %f2150; - sub.f32 %f1208, %f2150, %f1207; - neg.f32 %f1209, %f2081; - div.rn.f32 %f1210, %f2116, %f2120; - max.f32 %f1211, %f1210, %f1209; - min.f32 %f1212, %f1211, %f2081; - sub.f32 %f1213, %f2081, %f1212; - max.f32 %f2150, %f1208, %f1199; - mov.f32 %f1214, 0f3C23D70A; - max.f32 %f2081, %f1213, %f1214; - add.s32 %r262, %r262, 1; - setp.lt.s32 %p135, %r262, %r51; - @%p135 bra BB4_36; - -BB4_88: - mov.f32 %f2165, 0f00000000; - @%p14 bra BB4_144; - - mov.u32 %r265, 0; - div.rn.f32 %f1217, %f2150, 0fC0206C98; - div.rn.f32 %f242, %f1217, %f407; - mov.f32 %f2165, 0f00000000; - -BB4_90: - mov.u32 %r266, 0; - cvt.rn.f32.s32 %f1218, %r265; - sub.f32 %f1219, %f1218, %f2152; - add.f32 %f1220, %f1219, 0f3F800000; - sqrt.rn.f32 %f244, %f40; - mul.f32 %f245, %f1220, %f244; - abs.f32 %f246, %f245; - mul.f32 %f247, %f245, %f245; - mul.f32 %f248, %f1219, %f244; - abs.f32 %f249, %f248; - add.f32 %f1221, %f1218, 0f3F800000; - sub.f32 %f1222, %f1221, %f2152; - div.rn.f32 %f251, %f1222, %f407; - mov.f32 %f1223, 0f3F800000; - cvt.rzi.f32.f32 %f1224, %f1223; - add.f32 %f1225, %f1224, %f1224; - mov.f32 %f1226, 0f40000000; - sub.f32 %f1227, %f1226, %f1225; - abs.f32 %f252, %f1227; - setp.eq.f32 %p137, %f252, 0f3F800000; - abs.f32 %f253, %f251; - setp.lt.f32 %p138, %f253, 0f00800000; - mul.f32 %f1228, %f253, 0f4B800000; - selp.f32 %f1229, 0fC3170000, 0fC2FE0000, %p138; - selp.f32 %f1230, %f1228, %f253, %p138; - mov.b32 %r168, %f1230; - and.b32 %r169, %r168, 8388607; - or.b32 %r170, %r169, 1065353216; - mov.b32 %f1231, %r170; - shr.u32 %r171, %r168, 23; - cvt.rn.f32.u32 %f1232, %r171; - add.f32 %f1233, %f1229, %f1232; - setp.gt.f32 %p139, %f1231, 0f3FB504F3; - mul.f32 %f1234, %f1231, 0f3F000000; - add.f32 %f1235, %f1233, 0f3F800000; - selp.f32 %f1236, %f1234, %f1231, %p139; - selp.f32 %f1237, %f1235, %f1233, %p139; - add.f32 %f254, %f1236, 0fBF800000; - add.f32 %f255, %f1236, 0f3F800000; - add.f32 %f256, %f254, %f254; - mov.f32 %f1238, 0f3F317200; - mul.rn.f32 %f257, %f1237, %f1238; - mov.f32 %f1239, 0f35BFBE8E; - mul.rn.f32 %f258, %f1237, %f1239; - setp.lt.f32 %p140, %f251, 0f00000000; - and.pred %p7, %p140, %p137; - add.f32 %f1240, %f251, %f251; - selp.f32 %f259, %f1240, 0f00000000, %p137; - div.rn.f32 %f262, %f1219, %f407; - abs.f32 %f263, %f262; - setp.lt.f32 %p141, %f263, 0f00800000; - mul.f32 %f1242, %f263, 0f4B800000; - selp.f32 %f1243, 0fC3170000, 0fC2FE0000, %p141; - selp.f32 %f1244, %f1242, %f263, %p141; - mov.b32 %r172, %f1244; - and.b32 %r173, %r172, 8388607; - or.b32 %r174, %r173, 1065353216; - mov.b32 %f1245, %r174; - shr.u32 %r175, %r172, 23; - cvt.rn.f32.u32 %f1246, %r175; - add.f32 %f1247, %f1243, %f1246; - setp.gt.f32 %p142, %f1245, 0f3FB504F3; - mul.f32 %f1248, %f1245, 0f3F000000; - add.f32 %f1249, %f1247, 0f3F800000; - selp.f32 %f1250, %f1248, %f1245, %p142; - selp.f32 %f1251, %f1249, %f1247, %p142; - add.f32 %f264, %f1250, 0fBF800000; - add.f32 %f265, %f1250, 0f3F800000; - add.f32 %f266, %f264, %f264; - mul.rn.f32 %f267, %f1251, %f1238; - mul.rn.f32 %f268, %f1251, %f1239; - setp.lt.f32 %p143, %f262, 0f00000000; - and.pred %p8, %p143, %p137; - add.f32 %f1252, %f262, %f262; - selp.f32 %f269, %f1252, 0f00000000, %p137; - mov.b32 %r177, %f245; - and.b32 %r39, %r177, -2147483648; - ld.local.v4.f32 {%f2164, %f2163, %f2162, %f2161}, [%rd2]; - ld.local.f32 %f2160, [%rd2+20]; - ld.local.v2.f32 {%f2159, %f2158}, [%rd2+24]; - ld.local.v2.f32 {%f2157, %f2156}, [%rd2+40]; - ld.local.f32 %f2155, [%rd2+60]; - -BB4_91: - setp.ltu.f32 %p144, %f246, 0f3F800000; - @%p144 bra BB4_93; - bra.uni BB4_92; - -BB4_93: - cvt.rn.f32.s32 %f2047, %r265; - sub.f32 %f2046, %f2047, %f2152; - add.f32 %f2045, %f2046, 0f3F800000; - mul.f32 %f2044, %f2045, %f244; - mov.f32 %f1280, 0f3BA0C9F8; - mov.f32 %f1281, 0fBA1268FB; - fma.rn.f32 %f1282, %f1281, %f247, %f1280; - mov.f32 %f1283, 0fBCDABFD4; - fma.rn.f32 %f1284, %f1282, %f247, %f1283; - mov.f32 %f1285, 0f3DE70331; - fma.rn.f32 %f1286, %f1284, %f247, %f1285; - mov.f32 %f1287, 0fBEC09330; - fma.rn.f32 %f1288, %f1286, %f247, %f1287; - mov.f32 %f1289, 0f3F906EBA; - fma.rn.f32 %f1290, %f1288, %f247, %f1289; - mul.f32 %f2166, %f2044, %f1290; - bra.uni BB4_94; - -BB4_92: - mov.f32 %f2003, 0f3F800000; - setp.ltu.f32 %p145, %f246, 0f407AD445; - mov.f32 %f1262, 0f3A03BB71; - mov.f32 %f1263, 0fB7B730FB; - fma.rn.f32 %f1264, %f1263, %f246, %f1262; - mov.f32 %f1265, 0fBBACA3B3; - fma.rn.f32 %f1266, %f1264, %f246, %f1265; - mov.f32 %f1267, 0f3D0A7445; - fma.rn.f32 %f1268, %f1266, %f246, %f1267; - mov.f32 %f1269, 0fBE1B3B75; - fma.rn.f32 %f1270, %f1268, %f246, %f1269; - mov.f32 %f1271, 0fBF6B385A; - fma.rn.f32 %f1272, %f1270, %f246, %f1271; - mov.f32 %f1273, 0fBFD0316E; - fma.rn.f32 %f1274, %f1272, %f246, %f1273; - mov.f32 %f1275, 0fBA031CCE; - fma.rn.f32 %f1276, %f1274, %f246, %f1275; - ex2.approx.ftz.f32 %f1277, %f1276; - sub.f32 %f1279, %f2003, %f1277; - mov.b32 %r178, %f1279; - selp.b32 %r179, %r178, 1065353216, %p145; - or.b32 %r180, %r179, %r39; - mov.b32 %f2166, %r180; - -BB4_94: - setp.ltu.f32 %p146, %f249, 0f3F800000; - @%p146 bra BB4_96; - bra.uni BB4_95; - -BB4_96: - cvt.rn.f32.s32 %f2042, %r265; - sub.f32 %f2041, %f2042, %f2152; - mul.f32 %f2040, %f2041, %f244; - mul.f32 %f2039, %f2040, %f2040; - mov.f32 %f1309, 0f3BA0C9F8; - mov.f32 %f1310, 0fBA1268FB; - fma.rn.f32 %f1311, %f1310, %f2039, %f1309; - mov.f32 %f1312, 0fBCDABFD4; - fma.rn.f32 %f1313, %f1311, %f2039, %f1312; - mov.f32 %f1314, 0f3DE70331; - fma.rn.f32 %f1315, %f1313, %f2039, %f1314; - mov.f32 %f1316, 0fBEC09330; - fma.rn.f32 %f1317, %f1315, %f2039, %f1316; - mov.f32 %f1318, 0f3F906EBA; - fma.rn.f32 %f1319, %f1317, %f2039, %f1318; - mul.f32 %f2167, %f2040, %f1319; - bra.uni BB4_97; - -BB4_95: - cvt.rn.f32.s32 %f2007, %r265; - sub.f32 %f2006, %f2007, %f2152; - mul.f32 %f2005, %f2006, %f244; - mov.b32 %r246, %f2005; - and.b32 %r245, %r246, -2147483648; - mov.f32 %f2004, 0f3F800000; - setp.ltu.f32 %p147, %f249, 0f407AD445; - mov.f32 %f1291, 0f3A03BB71; - mov.f32 %f1292, 0fB7B730FB; - fma.rn.f32 %f1293, %f1292, %f249, %f1291; - mov.f32 %f1294, 0fBBACA3B3; - fma.rn.f32 %f1295, %f1293, %f249, %f1294; - mov.f32 %f1296, 0f3D0A7445; - fma.rn.f32 %f1297, %f1295, %f249, %f1296; - mov.f32 %f1298, 0fBE1B3B75; - fma.rn.f32 %f1299, %f1297, %f249, %f1298; - mov.f32 %f1300, 0fBF6B385A; - fma.rn.f32 %f1301, %f1299, %f249, %f1300; - mov.f32 %f1302, 0fBFD0316E; - fma.rn.f32 %f1303, %f1301, %f249, %f1302; - mov.f32 %f1304, 0fBA031CCE; - fma.rn.f32 %f1305, %f1303, %f249, %f1304; - ex2.approx.ftz.f32 %f1306, %f1305; - sub.f32 %f1308, %f2004, %f1306; - mov.b32 %r181, %f1308; - selp.b32 %r182, %r181, 1065353216, %p147; - or.b32 %r183, %r182, %r245; - mov.b32 %f2167, %r183; - -BB4_97: - sub.f32 %f1320, %f2166, %f2167; - mul.f32 %f299, %f1320, 0f3F000000; - cvt.rn.f32.s32 %f300, %r266; - sub.f32 %f301, %f300, %f2151; - add.f32 %f1321, %f301, 0f3F800000; - mul.f32 %f302, %f1321, %f244; - abs.f32 %f303, %f302; - setp.ltu.f32 %p148, %f303, 0f3F800000; - @%p148 bra BB4_99; - bra.uni BB4_98; - -BB4_99: - mul.f32 %f1340, %f302, %f302; - mov.f32 %f1341, 0f3BA0C9F8; - mov.f32 %f1342, 0fBA1268FB; - fma.rn.f32 %f1343, %f1342, %f1340, %f1341; - mov.f32 %f1344, 0fBCDABFD4; - fma.rn.f32 %f1345, %f1343, %f1340, %f1344; - mov.f32 %f1346, 0f3DE70331; - fma.rn.f32 %f1347, %f1345, %f1340, %f1346; - mov.f32 %f1348, 0fBEC09330; - fma.rn.f32 %f1349, %f1347, %f1340, %f1348; - mov.f32 %f1350, 0f3F906EBA; - fma.rn.f32 %f1351, %f1349, %f1340, %f1350; - mul.f32 %f2168, %f302, %f1351; - bra.uni BB4_100; - -BB4_98: - mov.f32 %f2008, 0f3F800000; - mov.f32 %f1322, 0f3A03BB71; - mov.f32 %f1323, 0fB7B730FB; - fma.rn.f32 %f1324, %f1323, %f303, %f1322; - mov.f32 %f1325, 0fBBACA3B3; - fma.rn.f32 %f1326, %f1324, %f303, %f1325; - mov.f32 %f1327, 0f3D0A7445; - fma.rn.f32 %f1328, %f1326, %f303, %f1327; - mov.f32 %f1329, 0fBE1B3B75; - fma.rn.f32 %f1330, %f1328, %f303, %f1329; - mov.f32 %f1331, 0fBF6B385A; - fma.rn.f32 %f1332, %f1330, %f303, %f1331; - mov.f32 %f1333, 0fBFD0316E; - fma.rn.f32 %f1334, %f1332, %f303, %f1333; - mov.f32 %f1335, 0fBA031CCE; - fma.rn.f32 %f1336, %f1334, %f303, %f1335; - ex2.approx.ftz.f32 %f1337, %f1336; - sub.f32 %f1339, %f2008, %f1337; - mov.b32 %r184, %f1339; - setp.ltu.f32 %p149, %f303, 0f407AD445; - selp.b32 %r185, %r184, 1065353216, %p149; - mov.b32 %r186, %f302; - and.b32 %r187, %r186, -2147483648; - or.b32 %r188, %r185, %r187; - mov.b32 %f2168, %r188; - -BB4_100: - cvt.rn.f32.s32 %f2010, %r266; - sub.f32 %f2009, %f2010, %f2151; - mul.f32 %f307, %f2009, %f244; - abs.f32 %f308, %f307; - setp.ltu.f32 %p150, %f308, 0f3F800000; - @%p150 bra BB4_102; - bra.uni BB4_101; - -BB4_102: - mul.f32 %f1370, %f307, %f307; - mov.f32 %f1371, 0f3BA0C9F8; - mov.f32 %f1372, 0fBA1268FB; - fma.rn.f32 %f1373, %f1372, %f1370, %f1371; - mov.f32 %f1374, 0fBCDABFD4; - fma.rn.f32 %f1375, %f1373, %f1370, %f1374; - mov.f32 %f1376, 0f3DE70331; - fma.rn.f32 %f1377, %f1375, %f1370, %f1376; - mov.f32 %f1378, 0fBEC09330; - fma.rn.f32 %f1379, %f1377, %f1370, %f1378; - mov.f32 %f1380, 0f3F906EBA; - fma.rn.f32 %f1381, %f1379, %f1370, %f1380; - mul.f32 %f2169, %f307, %f1381; - bra.uni BB4_103; - -BB4_101: - mov.f32 %f2011, 0f3F800000; - mov.f32 %f1352, 0f3A03BB71; - mov.f32 %f1353, 0fB7B730FB; - fma.rn.f32 %f1354, %f1353, %f308, %f1352; - mov.f32 %f1355, 0fBBACA3B3; - fma.rn.f32 %f1356, %f1354, %f308, %f1355; - mov.f32 %f1357, 0f3D0A7445; - fma.rn.f32 %f1358, %f1356, %f308, %f1357; - mov.f32 %f1359, 0fBE1B3B75; - fma.rn.f32 %f1360, %f1358, %f308, %f1359; - mov.f32 %f1361, 0fBF6B385A; - fma.rn.f32 %f1362, %f1360, %f308, %f1361; - mov.f32 %f1363, 0fBFD0316E; - fma.rn.f32 %f1364, %f1362, %f308, %f1363; - mov.f32 %f1365, 0fBA031CCE; - fma.rn.f32 %f1366, %f1364, %f308, %f1365; - ex2.approx.ftz.f32 %f1367, %f1366; - sub.f32 %f1369, %f2011, %f1367; - mov.b32 %r189, %f1369; - setp.ltu.f32 %p151, %f308, 0f407AD445; - selp.b32 %r190, %r189, 1065353216, %p151; - mov.b32 %r191, %f307; - and.b32 %r192, %r191, -2147483648; - or.b32 %r193, %r190, %r192; - mov.b32 %f2169, %r193; - -BB4_103: - mov.f32 %f2012, 0f40000000; - sub.f32 %f1384, %f2168, %f2169; - mul.f32 %f312, %f1384, 0f3F000000; - mul.f32 %f1385, %f299, %f2150; - fma.rn.f32 %f313, %f312, %f1385, %f2081; - mad.lo.s32 %r194, %r266, %r49, %r265; - add.s32 %r195, %r194, %r2; - mul.wide.s32 %rd63, %r195, 4; - add.s64 %rd64, %rd1, %rd63; - ld.global.f32 %f314, [%rd64]; - // inline asm - rcp.approx.ftz.f32 %f1382,%f255; - // inline asm - mul.f32 %f1386, %f1382, %f256; - mul.f32 %f1387, %f1386, %f1386; - mov.f32 %f1388, 0f3C4CAF63; - mov.f32 %f1389, 0f3B18F0FE; - fma.rn.f32 %f1390, %f1389, %f1387, %f1388; - mov.f32 %f1391, 0f3DAAAABD; - fma.rn.f32 %f1392, %f1390, %f1387, %f1391; - mul.rn.f32 %f1393, %f1392, %f1387; - mul.rn.f32 %f1394, %f1393, %f1386; - sub.f32 %f1395, %f254, %f1386; - neg.f32 %f1396, %f1386; - add.f32 %f1397, %f1395, %f1395; - fma.rn.f32 %f1398, %f1396, %f254, %f1397; - mul.rn.f32 %f1399, %f1382, %f1398; - add.f32 %f1400, %f1394, %f1386; - sub.f32 %f1401, %f1386, %f1400; - add.f32 %f1402, %f1394, %f1401; - add.f32 %f1403, %f1399, %f1402; - add.f32 %f1404, %f1400, %f1403; - sub.f32 %f1405, %f1400, %f1404; - add.f32 %f1406, %f1403, %f1405; - add.f32 %f1407, %f257, %f1404; - sub.f32 %f1408, %f257, %f1407; - add.f32 %f1409, %f1404, %f1408; - add.f32 %f1410, %f1406, %f1409; - add.f32 %f1411, %f258, %f1410; - add.f32 %f1412, %f1407, %f1411; - sub.f32 %f1413, %f1407, %f1412; - add.f32 %f1414, %f1411, %f1413; - mul.rn.f32 %f1416, %f2012, %f1412; - neg.f32 %f1417, %f1416; - fma.rn.f32 %f1418, %f2012, %f1412, %f1417; - fma.rn.f32 %f1419, %f2012, %f1414, %f1418; - mov.f32 %f1420, 0f00000000; - fma.rn.f32 %f1421, %f1420, %f1412, %f1419; - add.rn.f32 %f1422, %f1416, %f1421; - neg.f32 %f1423, %f1422; - add.rn.f32 %f1424, %f1416, %f1423; - add.rn.f32 %f1425, %f1424, %f1421; - mov.b32 %r196, %f1422; - setp.eq.s32 %p152, %r196, 1118925336; - add.s32 %r197, %r196, -1; - mov.b32 %f1426, %r197; - add.f32 %f1427, %f1425, 0f37000000; - selp.f32 %f1428, %f1426, %f1422, %p152; - selp.f32 %f315, %f1427, %f1425, %p152; - mul.f32 %f1429, %f1428, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1430, %f1429; - mov.f32 %f1431, 0fBF317200; - fma.rn.f32 %f1432, %f1430, %f1431, %f1428; - mov.f32 %f1433, 0fB5BFBE8E; - fma.rn.f32 %f1434, %f1430, %f1433, %f1432; - mul.f32 %f1435, %f1434, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1436, %f1435; - add.f32 %f1437, %f1430, 0f00000000; - ex2.approx.f32 %f1438, %f1437; - mul.f32 %f1439, %f1436, %f1438; - setp.lt.f32 %p153, %f1428, 0fC2D20000; - selp.f32 %f1440, 0f00000000, %f1439, %p153; - setp.gt.f32 %p154, %f1428, 0f42D20000; - selp.f32 %f2170, 0f7F800000, %f1440, %p154; - setp.eq.f32 %p155, %f2170, 0f7F800000; - @%p155 bra BB4_105; - - fma.rn.f32 %f2170, %f2170, %f315, %f2170; - -BB4_105: - setp.geu.f32 %p227, %f251, 0f00000000; - mov.b32 %r198, %f2170; - xor.b32 %r199, %r198, -2147483648; - mov.b32 %f1441, %r199; - selp.f32 %f319, %f1441, %f2170, %p7; - setp.eq.f32 %p156, %f251, 0f00000000; - selp.f32 %f2171, %f259, %f319, %p156; - @%p227 bra BB4_107; - - mov.f32 %f2013, 0f40000000; - cvt.rzi.f32.f32 %f1443, %f2013; - setp.neu.f32 %p157, %f1443, 0f40000000; - selp.f32 %f2171, 0f7FFFFFFF, %f319, %p157; - -BB4_107: - abs.f32 %f2024, %f251; - mov.f32 %f2023, 0f00000000; - mov.f32 %f2022, 0f3DAAAABD; - mov.f32 %f2021, 0f3C4CAF63; - mov.f32 %f2020, 0f3B18F0FE; - mov.f32 %f2019, 0fB5BFBE8E; - mov.f32 %f2018, 0fBF317200; - add.f32 %f2017, %f2024, 0f40000000; - mov.b32 %r247, %f2017; - selp.f32 %f2016, 0fFF800000, 0f7F800000, %p7; - add.f32 %f2015, %f251, 0f40000000; - mov.f32 %f2014, 0f40000000; - setp.gtu.f32 %p158, %f2024, 0f7F800000; - selp.f32 %f1446, %f2015, %f2171, %p158; - setp.neu.f32 %p159, %f2024, 0f7F800000; - selp.f32 %f1447, %f1446, %f2016, %p159; - setp.gt.s32 %p160, %r247, 2139095039; - selp.f32 %f1448, %f1447, %f2171, %p160; - mul.f32 %f1449, %f1448, 0fBF000000; - setp.eq.f32 %p161, %f251, 0f3F800000; - selp.f32 %f1450, 0fBF000000, %f1449, %p161; - mul.f32 %f1451, %f1450, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1452, %f1451; - fma.rn.f32 %f1454, %f1452, %f2018, %f1450; - fma.rn.f32 %f1456, %f1452, %f2019, %f1454; - mul.f32 %f1457, %f1456, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1458, %f1457; - add.f32 %f1459, %f1452, 0f00000000; - ex2.approx.f32 %f1460, %f1459; - mul.f32 %f1461, %f1458, %f1460; - setp.lt.f32 %p162, %f1450, 0fC2D20000; - selp.f32 %f1462, 0f00000000, %f1461, %p162; - setp.gt.f32 %p163, %f1450, 0f42D20000; - selp.f32 %f323, 0f7F800000, %f1462, %p163; - // inline asm - rcp.approx.ftz.f32 %f1444,%f265; - // inline asm - mul.f32 %f1463, %f1444, %f266; - mul.f32 %f1464, %f1463, %f1463; - fma.rn.f32 %f1467, %f2020, %f1464, %f2021; - fma.rn.f32 %f1469, %f1467, %f1464, %f2022; - mul.rn.f32 %f1470, %f1469, %f1464; - mul.rn.f32 %f1471, %f1470, %f1463; - sub.f32 %f1472, %f264, %f1463; - neg.f32 %f1473, %f1463; - add.f32 %f1474, %f1472, %f1472; - fma.rn.f32 %f1475, %f1473, %f264, %f1474; - mul.rn.f32 %f1476, %f1444, %f1475; - add.f32 %f1477, %f1471, %f1463; - sub.f32 %f1478, %f1463, %f1477; - add.f32 %f1479, %f1471, %f1478; - add.f32 %f1480, %f1476, %f1479; - add.f32 %f1481, %f1477, %f1480; - sub.f32 %f1482, %f1477, %f1481; - add.f32 %f1483, %f1480, %f1482; - add.f32 %f1484, %f267, %f1481; - sub.f32 %f1485, %f267, %f1484; - add.f32 %f1486, %f1481, %f1485; - add.f32 %f1487, %f1483, %f1486; - add.f32 %f1488, %f268, %f1487; - add.f32 %f1489, %f1484, %f1488; - sub.f32 %f1490, %f1484, %f1489; - add.f32 %f1491, %f1488, %f1490; - mul.rn.f32 %f1493, %f2014, %f1489; - neg.f32 %f1494, %f1493; - fma.rn.f32 %f1495, %f2014, %f1489, %f1494; - fma.rn.f32 %f1496, %f2014, %f1491, %f1495; - fma.rn.f32 %f1498, %f2023, %f1489, %f1496; - add.rn.f32 %f1499, %f1493, %f1498; - neg.f32 %f1500, %f1499; - add.rn.f32 %f1501, %f1493, %f1500; - add.rn.f32 %f1502, %f1501, %f1498; - mov.b32 %r200, %f1499; - setp.eq.s32 %p164, %r200, 1118925336; - add.s32 %r201, %r200, -1; - mov.b32 %f1503, %r201; - add.f32 %f1504, %f1502, 0f37000000; - selp.f32 %f1505, %f1503, %f1499, %p164; - selp.f32 %f324, %f1504, %f1502, %p164; - mul.f32 %f1506, %f1505, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1507, %f1506; - fma.rn.f32 %f1508, %f1507, %f2018, %f1505; - fma.rn.f32 %f1509, %f1507, %f2019, %f1508; - mul.f32 %f1510, %f1509, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1511, %f1510; - add.f32 %f1512, %f1507, 0f00000000; - ex2.approx.f32 %f1513, %f1512; - mul.f32 %f1514, %f1511, %f1513; - setp.lt.f32 %p165, %f1505, 0fC2D20000; - selp.f32 %f1515, 0f00000000, %f1514, %p165; - setp.gt.f32 %p166, %f1505, 0f42D20000; - selp.f32 %f2172, 0f7F800000, %f1515, %p166; - setp.eq.f32 %p167, %f2172, 0f7F800000; - @%p167 bra BB4_109; - - fma.rn.f32 %f2172, %f2172, %f324, %f2172; - -BB4_109: - setp.geu.f32 %p228, %f262, 0f00000000; - mov.b32 %r202, %f2172; - xor.b32 %r203, %r202, -2147483648; - mov.b32 %f1516, %r203; - selp.f32 %f328, %f1516, %f2172, %p8; - setp.eq.f32 %p168, %f262, 0f00000000; - selp.f32 %f2173, %f269, %f328, %p168; - @%p228 bra BB4_111; - - mov.f32 %f2025, 0f40000000; - cvt.rzi.f32.f32 %f1518, %f2025; - setp.neu.f32 %p169, %f1518, 0f40000000; - selp.f32 %f2173, 0f7FFFFFFF, %f328, %p169; - -BB4_111: - abs.f32 %f2038, %f262; - mov.f32 %f2037, 0f35BFBE8E; - mov.f32 %f2036, 0f3F317200; - add.f32 %f2035, %f2038, 0f40000000; - mov.b32 %r248, %f2035; - selp.f32 %f2034, 0fFF800000, 0f7F800000, %p8; - add.f32 %f2033, %f262, 0f40000000; - mov.f32 %f2032, 0f00000000; - mov.f32 %f2031, 0f3DAAAABD; - mov.f32 %f2030, 0f3C4CAF63; - mov.f32 %f2029, 0f3B18F0FE; - mov.f32 %f2028, 0fB5BFBE8E; - mov.f32 %f2027, 0fBF317200; - mov.f32 %f2026, 0f40000000; - setp.gtu.f32 %p170, %f2038, 0f7F800000; - selp.f32 %f1521, %f2033, %f2173, %p170; - setp.neu.f32 %p171, %f2038, 0f7F800000; - selp.f32 %f1522, %f1521, %f2034, %p171; - setp.gt.s32 %p172, %r248, 2139095039; - selp.f32 %f1523, %f1522, %f2173, %p172; - mul.f32 %f1524, %f1523, 0fBF000000; - setp.eq.f32 %p173, %f262, 0f3F800000; - selp.f32 %f1525, 0fBF000000, %f1524, %p173; - mul.f32 %f1526, %f1525, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1527, %f1526; - fma.rn.f32 %f1529, %f1527, %f2027, %f1525; - fma.rn.f32 %f1531, %f1527, %f2028, %f1529; - mul.f32 %f1532, %f1531, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1533, %f1532; - add.f32 %f1534, %f1527, 0f00000000; - ex2.approx.f32 %f1535, %f1534; - mul.f32 %f1536, %f1533, %f1535; - setp.lt.f32 %p174, %f1525, 0fC2D20000; - selp.f32 %f1537, 0f00000000, %f1536, %p174; - setp.gt.f32 %p175, %f1525, 0f42D20000; - selp.f32 %f1538, 0f7F800000, %f1537, %p175; - sub.f32 %f1539, %f323, %f1538; - mul.f32 %f1540, %f242, %f1539; - mul.f32 %f332, %f312, %f1540; - add.f32 %f1541, %f300, 0f3F800000; - sub.f32 %f1542, %f1541, %f2151; - div.rn.f32 %f333, %f1542, %f407; - abs.f32 %f334, %f333; - setp.lt.f32 %p176, %f334, 0f00800000; - mul.f32 %f1543, %f334, 0f4B800000; - selp.f32 %f1544, 0fC3170000, 0fC2FE0000, %p176; - selp.f32 %f1545, %f1543, %f334, %p176; - mov.b32 %r204, %f1545; - and.b32 %r205, %r204, 8388607; - or.b32 %r206, %r205, 1065353216; - mov.b32 %f1546, %r206; - shr.u32 %r207, %r204, 23; - cvt.rn.f32.u32 %f1547, %r207; - add.f32 %f1548, %f1544, %f1547; - setp.gt.f32 %p177, %f1546, 0f3FB504F3; - mul.f32 %f1549, %f1546, 0f3F000000; - add.f32 %f1550, %f1548, 0f3F800000; - selp.f32 %f1551, %f1549, %f1546, %p177; - selp.f32 %f1552, %f1550, %f1548, %p177; - add.f32 %f1553, %f1551, 0fBF800000; - add.f32 %f1520, %f1551, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1519,%f1520; - // inline asm - add.f32 %f1554, %f1553, %f1553; - mul.f32 %f1555, %f1519, %f1554; - mul.f32 %f1556, %f1555, %f1555; - fma.rn.f32 %f1559, %f2029, %f1556, %f2030; - fma.rn.f32 %f1561, %f1559, %f1556, %f2031; - mul.rn.f32 %f1562, %f1561, %f1556; - mul.rn.f32 %f1563, %f1562, %f1555; - sub.f32 %f1564, %f1553, %f1555; - neg.f32 %f1565, %f1555; - add.f32 %f1566, %f1564, %f1564; - fma.rn.f32 %f1567, %f1565, %f1553, %f1566; - mul.rn.f32 %f1568, %f1519, %f1567; - add.f32 %f1569, %f1563, %f1555; - sub.f32 %f1570, %f1555, %f1569; - add.f32 %f1571, %f1563, %f1570; - add.f32 %f1572, %f1568, %f1571; - add.f32 %f1573, %f1569, %f1572; - sub.f32 %f1574, %f1569, %f1573; - add.f32 %f1575, %f1572, %f1574; - mul.rn.f32 %f1577, %f1552, %f2036; - mul.rn.f32 %f1579, %f1552, %f2037; - add.f32 %f1580, %f1577, %f1573; - sub.f32 %f1581, %f1577, %f1580; - add.f32 %f1582, %f1573, %f1581; - add.f32 %f1583, %f1575, %f1582; - add.f32 %f1584, %f1579, %f1583; - add.f32 %f1585, %f1580, %f1584; - sub.f32 %f1586, %f1580, %f1585; - add.f32 %f1587, %f1584, %f1586; - mul.rn.f32 %f1589, %f2026, %f1585; - neg.f32 %f1590, %f1589; - fma.rn.f32 %f1591, %f2026, %f1585, %f1590; - fma.rn.f32 %f1592, %f2026, %f1587, %f1591; - fma.rn.f32 %f1594, %f2032, %f1585, %f1592; - add.rn.f32 %f1595, %f1589, %f1594; - neg.f32 %f1596, %f1595; - add.rn.f32 %f1597, %f1589, %f1596; - add.rn.f32 %f1598, %f1597, %f1594; - mov.b32 %r208, %f1595; - setp.eq.s32 %p178, %r208, 1118925336; - add.s32 %r209, %r208, -1; - mov.b32 %f1599, %r209; - add.f32 %f1600, %f1598, 0f37000000; - selp.f32 %f1601, %f1599, %f1595, %p178; - selp.f32 %f335, %f1600, %f1598, %p178; - mul.f32 %f1602, %f1601, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1603, %f1602; - fma.rn.f32 %f1604, %f1603, %f2027, %f1601; - fma.rn.f32 %f1605, %f1603, %f2028, %f1604; - mul.f32 %f1606, %f1605, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1607, %f1606; - add.f32 %f1608, %f1603, 0f00000000; - ex2.approx.f32 %f1609, %f1608; - mul.f32 %f1610, %f1607, %f1609; - setp.lt.f32 %p179, %f1601, 0fC2D20000; - selp.f32 %f1611, 0f00000000, %f1610, %p179; - setp.gt.f32 %p180, %f1601, 0f42D20000; - selp.f32 %f2174, 0f7F800000, %f1611, %p180; - setp.eq.f32 %p181, %f2174, 0f7F800000; - @%p181 bra BB4_113; - - fma.rn.f32 %f2174, %f2174, %f335, %f2174; - -BB4_113: - setp.lt.f32 %p182, %f333, 0f00000000; - and.pred %p11, %p182, %p137; - mov.b32 %r210, %f2174; - xor.b32 %r211, %r210, -2147483648; - mov.b32 %f1612, %r211; - selp.f32 %f2176, %f1612, %f2174, %p11; - setp.eq.f32 %p184, %f333, 0f00000000; - @%p184 bra BB4_116; - bra.uni BB4_114; - -BB4_116: - add.f32 %f1615, %f333, %f333; - selp.f32 %f2176, %f1615, 0f00000000, %p137; - bra.uni BB4_117; - -BB4_114: - setp.geu.f32 %p185, %f333, 0f00000000; - @%p185 bra BB4_117; - - mov.f32 %f2051, 0f40000000; - cvt.rzi.f32.f32 %f1614, %f2051; - setp.neu.f32 %p186, %f1614, 0f40000000; - selp.f32 %f2176, 0f7FFFFFFF, %f2176, %p186; - -BB4_117: - abs.f32 %f1988, %f333; - add.f32 %f1616, %f1988, 0f40000000; - mov.b32 %r212, %f1616; - setp.lt.s32 %p188, %r212, 2139095040; - @%p188 bra BB4_122; - - abs.f32 %f2049, %f333; - setp.gtu.f32 %p189, %f2049, 0f7F800000; - @%p189 bra BB4_121; - bra.uni BB4_119; - -BB4_121: - add.f32 %f2176, %f333, 0f40000000; - bra.uni BB4_122; - -BB4_119: - abs.f32 %f2050, %f333; - setp.neu.f32 %p190, %f2050, 0f7F800000; - @%p190 bra BB4_122; - - selp.f32 %f2176, 0fFF800000, 0f7F800000, %p11; - -BB4_122: - mov.f32 %f1999, 0f35BFBE8E; - mov.f32 %f1998, 0f3F317200; - mov.f32 %f1997, 0f00000000; - mov.f32 %f1996, 0f3DAAAABD; - mov.f32 %f1995, 0f3C4CAF63; - mov.f32 %f1994, 0f3B18F0FE; - mov.f32 %f1993, 0fB5BFBE8E; - mov.f32 %f1992, 0fBF317200; - mov.f32 %f1991, 0f40000000; - cvt.rn.f32.s32 %f1990, %r266; - sub.f32 %f1989, %f1990, %f2151; - mul.f32 %f1619, %f2176, 0fBF000000; - setp.eq.f32 %p191, %f333, 0f3F800000; - selp.f32 %f1620, 0fBF000000, %f1619, %p191; - mul.f32 %f1621, %f1620, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1622, %f1621; - fma.rn.f32 %f1624, %f1622, %f1992, %f1620; - fma.rn.f32 %f1626, %f1622, %f1993, %f1624; - mul.f32 %f1627, %f1626, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1628, %f1627; - add.f32 %f1629, %f1622, 0f00000000; - ex2.approx.f32 %f1630, %f1629; - mul.f32 %f1631, %f1628, %f1630; - setp.lt.f32 %p192, %f1620, 0fC2D20000; - selp.f32 %f1632, 0f00000000, %f1631, %p192; - setp.gt.f32 %p193, %f1620, 0f42D20000; - selp.f32 %f346, 0f7F800000, %f1632, %p193; - div.rn.f32 %f347, %f1989, %f407; - abs.f32 %f348, %f347; - setp.lt.f32 %p194, %f348, 0f00800000; - mul.f32 %f1633, %f348, 0f4B800000; - selp.f32 %f1634, 0fC3170000, 0fC2FE0000, %p194; - selp.f32 %f1635, %f1633, %f348, %p194; - mov.b32 %r213, %f1635; - and.b32 %r214, %r213, 8388607; - or.b32 %r215, %r214, 1065353216; - mov.b32 %f1636, %r215; - shr.u32 %r216, %r213, 23; - cvt.rn.f32.u32 %f1637, %r216; - add.f32 %f1638, %f1634, %f1637; - setp.gt.f32 %p195, %f1636, 0f3FB504F3; - mul.f32 %f1639, %f1636, 0f3F000000; - add.f32 %f1640, %f1638, 0f3F800000; - selp.f32 %f1641, %f1639, %f1636, %p195; - selp.f32 %f1642, %f1640, %f1638, %p195; - add.f32 %f1643, %f1641, 0fBF800000; - add.f32 %f1618, %f1641, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1617,%f1618; - // inline asm - add.f32 %f1644, %f1643, %f1643; - mul.f32 %f1645, %f1617, %f1644; - mul.f32 %f1646, %f1645, %f1645; - fma.rn.f32 %f1649, %f1994, %f1646, %f1995; - fma.rn.f32 %f1651, %f1649, %f1646, %f1996; - mul.rn.f32 %f1652, %f1651, %f1646; - mul.rn.f32 %f1653, %f1652, %f1645; - sub.f32 %f1654, %f1643, %f1645; - neg.f32 %f1655, %f1645; - add.f32 %f1656, %f1654, %f1654; - fma.rn.f32 %f1657, %f1655, %f1643, %f1656; - mul.rn.f32 %f1658, %f1617, %f1657; - add.f32 %f1659, %f1653, %f1645; - sub.f32 %f1660, %f1645, %f1659; - add.f32 %f1661, %f1653, %f1660; - add.f32 %f1662, %f1658, %f1661; - add.f32 %f1663, %f1659, %f1662; - sub.f32 %f1664, %f1659, %f1663; - add.f32 %f1665, %f1662, %f1664; - mul.rn.f32 %f1667, %f1642, %f1998; - mul.rn.f32 %f1669, %f1642, %f1999; - add.f32 %f1670, %f1667, %f1663; - sub.f32 %f1671, %f1667, %f1670; - add.f32 %f1672, %f1663, %f1671; - add.f32 %f1673, %f1665, %f1672; - add.f32 %f1674, %f1669, %f1673; - add.f32 %f1675, %f1670, %f1674; - sub.f32 %f1676, %f1670, %f1675; - add.f32 %f1677, %f1674, %f1676; - mul.rn.f32 %f1679, %f1991, %f1675; - neg.f32 %f1680, %f1679; - fma.rn.f32 %f1681, %f1991, %f1675, %f1680; - fma.rn.f32 %f1682, %f1991, %f1677, %f1681; - fma.rn.f32 %f1684, %f1997, %f1675, %f1682; - add.rn.f32 %f1685, %f1679, %f1684; - neg.f32 %f1686, %f1685; - add.rn.f32 %f1687, %f1679, %f1686; - add.rn.f32 %f1688, %f1687, %f1684; - mov.b32 %r217, %f1685; - setp.eq.s32 %p196, %r217, 1118925336; - add.s32 %r218, %r217, -1; - mov.b32 %f1689, %r218; - add.f32 %f1690, %f1688, 0f37000000; - selp.f32 %f1691, %f1689, %f1685, %p196; - selp.f32 %f349, %f1690, %f1688, %p196; - mul.f32 %f1692, %f1691, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1693, %f1692; - fma.rn.f32 %f1694, %f1693, %f1992, %f1691; - fma.rn.f32 %f1695, %f1693, %f1993, %f1694; - mul.f32 %f1696, %f1695, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1697, %f1696; - add.f32 %f1698, %f1693, 0f00000000; - ex2.approx.f32 %f1699, %f1698; - mul.f32 %f1700, %f1697, %f1699; - setp.lt.f32 %p197, %f1691, 0fC2D20000; - selp.f32 %f1701, 0f00000000, %f1700, %p197; - setp.gt.f32 %p198, %f1691, 0f42D20000; - selp.f32 %f2177, 0f7F800000, %f1701, %p198; - setp.eq.f32 %p199, %f2177, 0f7F800000; - @%p199 bra BB4_124; - - fma.rn.f32 %f2177, %f2177, %f349, %f2177; - -BB4_124: - setp.lt.f32 %p200, %f347, 0f00000000; - and.pred %p12, %p200, %p137; - mov.b32 %r219, %f2177; - xor.b32 %r220, %r219, -2147483648; - mov.b32 %f1702, %r220; - selp.f32 %f2179, %f1702, %f2177, %p12; - setp.eq.f32 %p202, %f347, 0f00000000; - @%p202 bra BB4_127; - bra.uni BB4_125; - -BB4_127: - add.f32 %f1705, %f347, %f347; - selp.f32 %f2179, %f1705, 0f00000000, %p137; - bra.uni BB4_128; - -BB4_125: - setp.geu.f32 %p203, %f347, 0f00000000; - @%p203 bra BB4_128; - - mov.f32 %f2048, 0f40000000; - cvt.rzi.f32.f32 %f1704, %f2048; - setp.neu.f32 %p204, %f1704, 0f40000000; - selp.f32 %f2179, 0f7FFFFFFF, %f2179, %p204; - -BB4_128: - abs.f32 %f2052, %f347; - add.f32 %f1706, %f2052, 0f40000000; - mov.b32 %r221, %f1706; - setp.lt.s32 %p206, %r221, 2139095040; - @%p206 bra BB4_133; - - abs.f32 %f2053, %f347; - setp.gtu.f32 %p207, %f2053, 0f7F800000; - @%p207 bra BB4_132; - bra.uni BB4_130; - -BB4_132: - add.f32 %f2179, %f347, 0f40000000; - bra.uni BB4_133; - -BB4_130: - abs.f32 %f2054, %f347; - setp.neu.f32 %p208, %f2054, 0f7F800000; - @%p208 bra BB4_133; - - selp.f32 %f2179, 0fFF800000, 0f7F800000, %p12; - -BB4_133: - mov.f32 %f2001, 0fB5BFBE8E; - mov.f32 %f2000, 0fBF317200; - mul.f32 %f1707, %f2179, 0fBF000000; - setp.eq.f32 %p209, %f347, 0f3F800000; - selp.f32 %f1708, 0fBF000000, %f1707, %p209; - mul.f32 %f1709, %f1708, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1710, %f1709; - fma.rn.f32 %f1712, %f1710, %f2000, %f1708; - fma.rn.f32 %f1714, %f1710, %f2001, %f1712; - mul.f32 %f1715, %f1714, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1716, %f1715; - add.f32 %f1717, %f1710, 0f00000000; - ex2.approx.f32 %f1718, %f1717; - mul.f32 %f1719, %f1716, %f1718; - setp.lt.f32 %p210, %f1708, 0fC2D20000; - selp.f32 %f1720, 0f00000000, %f1719, %p210; - setp.gt.f32 %p211, %f1708, 0f42D20000; - selp.f32 %f1721, 0f7F800000, %f1720, %p211; - sub.f32 %f1722, %f346, %f1721; - mul.f32 %f1723, %f242, %f1722; - mul.f32 %f1724, %f299, %f1723; - mul.f32 %f1725, %f332, %f332; - add.f32 %f360, %f2148, %f313; - div.rn.f32 %f1726, %f1725, %f360; - add.f32 %f2164, %f1726, %f2164; - mul.f32 %f1727, %f1724, %f332; - div.rn.f32 %f1728, %f1727, %f360; - add.f32 %f2163, %f1728, %f2163; - mul.f32 %f1729, %f299, %f312; - mul.f32 %f1730, %f1729, %f332; - div.rn.f32 %f1731, %f1730, %f360; - add.f32 %f2162, %f1731, %f2162; - div.rn.f32 %f1732, %f332, %f360; - add.f32 %f2161, %f1732, %f2161; - mul.f32 %f1733, %f1724, %f1724; - div.rn.f32 %f1734, %f1733, %f360; - add.f32 %f2160, %f1734, %f2160; - mul.f32 %f1735, %f1729, %f1724; - div.rn.f32 %f1736, %f1735, %f360; - add.f32 %f2159, %f1736, %f2159; - div.rn.f32 %f1737, %f1724, %f360; - add.f32 %f2158, %f1737, %f2158; - mul.f32 %f1738, %f1729, %f1729; - div.rn.f32 %f1739, %f1738, %f360; - add.f32 %f2157, %f1739, %f2157; - div.rn.f32 %f1740, %f1729, %f360; - add.f32 %f2156, %f1740, %f2156; - rcp.rn.f32 %f1741, %f360; - add.f32 %f2155, %f1741, %f2155; - setp.leu.f32 %p212, %f360, 0f00000000; - @%p212 bra BB4_142; - - add.f32 %f371, %f2148, %f314; - setp.gt.f32 %p213, %f371, 0f00000000; - @%p213 bra BB4_136; - bra.uni BB4_135; - -BB4_136: - setp.lt.f32 %p214, %f360, 0f00800000; - mul.f32 %f1743, %f360, 0f4B000000; - selp.f32 %f373, %f1743, %f360, %p214; - selp.f32 %f1744, 0fC1B80000, 0f00000000, %p214; - mov.b32 %r222, %f373; - add.s32 %r223, %r222, -1059760811; - and.b32 %r224, %r223, -8388608; - sub.s32 %r225, %r222, %r224; - mov.b32 %f1745, %r225; - cvt.rn.f32.s32 %f1746, %r224; - mov.f32 %f1747, 0f34000000; - fma.rn.f32 %f1748, %f1746, %f1747, %f1744; - add.f32 %f1749, %f1745, 0fBF800000; - mov.f32 %f1750, 0f3E1039F6; - mov.f32 %f1751, 0fBE055027; - fma.rn.f32 %f1752, %f1751, %f1749, %f1750; - mov.f32 %f1753, 0fBDF8CDCC; - fma.rn.f32 %f1754, %f1752, %f1749, %f1753; - mov.f32 %f1755, 0f3E0F2955; - fma.rn.f32 %f1756, %f1754, %f1749, %f1755; - mov.f32 %f1757, 0fBE2AD8B9; - fma.rn.f32 %f1758, %f1756, %f1749, %f1757; - mov.f32 %f1759, 0f3E4CED0B; - fma.rn.f32 %f1760, %f1758, %f1749, %f1759; - mov.f32 %f1761, 0fBE7FFF22; - fma.rn.f32 %f1762, %f1760, %f1749, %f1761; - mov.f32 %f1763, 0f3EAAAA78; - fma.rn.f32 %f1764, %f1762, %f1749, %f1763; - mov.f32 %f1765, 0fBF000000; - fma.rn.f32 %f1766, %f1764, %f1749, %f1765; - mul.f32 %f1767, %f1749, %f1766; - fma.rn.f32 %f1768, %f1767, %f1749, %f1749; - mov.f32 %f1769, 0f3F317218; - fma.rn.f32 %f2180, %f1748, %f1769, %f1768; - setp.lt.u32 %p215, %r222, 2139095040; - @%p215 bra BB4_138; - - mov.f32 %f1770, 0f7F800000; - fma.rn.f32 %f2180, %f373, %f1770, %f1770; - -BB4_138: - setp.eq.f32 %p216, %f373, 0f00000000; - selp.f32 %f1771, 0fFF800000, %f2180, %p216; - mul.f32 %f1772, %f371, %f1771; - sub.f32 %f377, %f1772, %f313; - mul.f32 %f1773, %f371, 0f4B000000; - setp.lt.f32 %p217, %f371, 0f00800000; - selp.f32 %f378, %f1773, %f371, %p217; - selp.f32 %f1774, 0fC1B80000, 0f00000000, %p217; - mov.b32 %r226, %f378; - add.s32 %r227, %r226, -1059760811; - and.b32 %r228, %r227, -8388608; - sub.s32 %r229, %r226, %r228; - mov.b32 %f1775, %r229; - cvt.rn.f32.s32 %f1776, %r228; - fma.rn.f32 %f1778, %f1776, %f1747, %f1774; - add.f32 %f1779, %f1775, 0fBF800000; - fma.rn.f32 %f1782, %f1751, %f1779, %f1750; - fma.rn.f32 %f1784, %f1782, %f1779, %f1753; - fma.rn.f32 %f1786, %f1784, %f1779, %f1755; - fma.rn.f32 %f1788, %f1786, %f1779, %f1757; - fma.rn.f32 %f1790, %f1788, %f1779, %f1759; - fma.rn.f32 %f1792, %f1790, %f1779, %f1761; - fma.rn.f32 %f1794, %f1792, %f1779, %f1763; - fma.rn.f32 %f1796, %f1794, %f1779, %f1765; - mul.f32 %f1797, %f1779, %f1796; - fma.rn.f32 %f1798, %f1797, %f1779, %f1779; - fma.rn.f32 %f2181, %f1778, %f1769, %f1798; - setp.lt.u32 %p218, %r226, 2139095040; - @%p218 bra BB4_140; - - mov.f32 %f1800, 0f7F800000; - fma.rn.f32 %f2181, %f378, %f1800, %f1800; - -BB4_140: - setp.eq.f32 %p219, %f378, 0f00000000; - selp.f32 %f1801, 0fFF800000, %f2181, %p219; - mul.f32 %f1802, %f371, %f1801; - sub.f32 %f1803, %f377, %f1802; - add.f32 %f2182, %f314, %f1803; - bra.uni BB4_141; - -BB4_135: - neg.f32 %f1742, %f313; - sub.f32 %f2182, %f1742, %f2148; - -BB4_141: - add.f32 %f2165, %f2165, %f2182; - -BB4_142: - add.s32 %r266, %r266, 1; - setp.lt.s32 %p220, %r266, %r49; - @%p220 bra BB4_91; - - st.local.v4.f32 [%rd2], {%f2164, %f2163, %f2162, %f2161}; - st.local.v4.f32 [%rd2+16], {%f2163, %f2160, %f2159, %f2158}; - st.local.v4.f32 [%rd2+32], {%f2162, %f2159, %f2157, %f2156}; - st.local.v4.f32 [%rd2+48], {%f2161, %f2158, %f2156, %f2155}; - add.s32 %r265, %r265, 1; - setp.lt.s32 %p221, %r265, %r49; - @%p221 bra BB4_90; - -BB4_144: - mov.f32 %f2185, 0f00000000; - ld.local.v4.f32 {%f1805, %f1806, %f1807, %f1808}, [%rd2]; - rcp.rn.f32 %f387, %f1805; - mul.f32 %f388, %f387, %f1806; - st.local.f32 [%rd2+4], %f388; - mul.f32 %f389, %f387, %f1807; - mul.f32 %f390, %f387, %f1808; - st.local.v2.f32 [%rd2+8], {%f389, %f390}; - ld.local.v4.f32 {%f1813, %f1814, %f1815, %f1816}, [%rd2+16]; - ld.local.f32 %f1821, [%rd2+4]; - fma.rn.f32 %f1822, %f1821, %f1813, 0f00000000; - sub.f32 %f1823, %f1814, %f1822; - ld.local.f32 %f391, [%rd2+16]; - st.local.f32 [%rd2+20], %f1823; - fma.rn.f32 %f1824, %f389, %f391, 0f00000000; - rcp.rn.f32 %f392, %f1823; - sub.f32 %f1825, %f1815, %f1824; - mul.f32 %f393, %f392, %f1825; - fma.rn.f32 %f1826, %f390, %f391, 0f00000000; - sub.f32 %f1827, %f1816, %f1826; - mul.f32 %f394, %f392, %f1827; - st.local.v2.f32 [%rd2+24], {%f393, %f394}; - ld.local.v2.f32 {%f1828, %f1829}, [%rd2+32]; - ld.local.f32 %f1832, [%rd2+4]; - fma.rn.f32 %f1833, %f1832, %f1828, 0f00000000; - sub.f32 %f395, %f1829, %f1833; - st.local.f32 [%rd2+36], %f395; - add.s64 %rd87, %rd2, 32; - add.s64 %rd86, %rd2, 8; - mov.u32 %r267, -1; - -BB4_145: - ld.local.f32 %f1834, [%rd87]; - ld.local.f32 %f1835, [%rd86]; - fma.rn.f32 %f2185, %f1835, %f1834, %f2185; - add.s64 %rd87, %rd87, 4; - add.s64 %rd86, %rd86, 16; - add.s32 %r267, %r267, 1; - setp.lt.s32 %p222, %r267, 1; - @%p222 bra BB4_145; - - ld.local.v4.f32 {%f1837, %f1838, %f1839, %f1840}, [%rd2+32]; - fma.rn.f32 %f1841, %f390, %f1837, 0f00000000; - fma.rn.f32 %f1842, %f394, %f395, %f1841; - sub.f32 %f1844, %f1839, %f2185; - rcp.rn.f32 %f399, %f1844; - sub.f32 %f1846, %f1840, %f1842; - mul.f32 %f400, %f399, %f1846; - ld.local.f32 %f1847, [%rd2+4]; - st.local.v2.f32 [%rd2+40], {%f1844, %f400}; - ld.local.v2.f32 {%f1848, %f1849}, [%rd2+48]; - fma.rn.f32 %f1852, %f1847, %f1848, 0f00000000; - sub.f32 %f401, %f1849, %f1852; - st.local.f32 [%rd2+52], %f401; - add.s64 %rd89, %rd2, 48; - add.s64 %rd88, %rd2, 8; - mov.f32 %f2186, 0f00000000; - mov.u32 %r268, -1; - -BB4_147: - ld.local.f32 %f1853, [%rd89]; - ld.local.f32 %f1854, [%rd88]; - fma.rn.f32 %f2186, %f1854, %f1853, %f2186; - add.s64 %rd89, %rd89, 4; - add.s64 %rd88, %rd88, 16; - add.s32 %r268, %r268, 1; - setp.lt.s32 %p223, %r268, 1; - @%p223 bra BB4_147; - - ld.local.f32 %f1856, [%rd2+56]; - sub.f32 %f404, %f1856, %f2186; - st.local.f32 [%rd2+56], %f404; - add.s64 %rd91, %rd2, 48; - add.s64 %rd90, %rd2, 12; - mov.f32 %f2187, 0f00000000; - mov.u32 %r269, -1; - -BB4_149: - ld.local.f32 %f1857, [%rd91]; - ld.local.f32 %f1858, [%rd90]; - fma.rn.f32 %f2187, %f1858, %f1857, %f2187; - add.s64 %rd91, %rd91, 4; - add.s64 %rd90, %rd90, 16; - add.s32 %r269, %r269, 1; - setp.lt.s32 %p224, %r269, 2; - @%p224 bra BB4_149; - - mov.u32 %r243, %tid.x; - mov.u32 %r242, %ctaid.x; - mov.u32 %r241, %ntid.x; - mad.lo.s32 %r240, %r241, %r242, %r243; - ld.param.u64 %rd85, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_9]; - ld.param.u64 %rd84, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_8]; - ld.param.u32 %r238, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_10]; - ld.param.u64 %rd83, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_7]; - ld.local.v4.f32 {%f1859, %f1860, %f1861, %f1862}, [%rd2+48]; - sub.f32 %f1865, %f1862, %f2187; - st.local.f32 [%rd2+60], %f1865; - add.f32 %f1866, %f388, 0f00000000; - mov.f32 %f1867, 0f00000000; - sub.f32 %f1868, %f1867, %f1866; - add.f32 %f1869, %f389, 0f00000000; - fma.rn.f32 %f1870, %f393, %f1868, %f1869; - sub.f32 %f1871, %f1867, %f1870; - add.f32 %f1872, %f390, 0f00000000; - fma.rn.f32 %f1873, %f394, %f1868, %f1872; - fma.rn.f32 %f1874, %f400, %f1871, %f1873; - sub.f32 %f1875, %f1867, %f1874; - div.rn.f32 %f1876, %f1875, %f1865; - fma.rn.f32 %f1877, %f404, %f1876, 0f00000000; - sub.f32 %f1878, %f1871, %f1877; - mul.f32 %f1879, %f399, %f1878; - fma.rn.f32 %f1880, %f395, %f1879, 0f00000000; - fma.rn.f32 %f1881, %f401, %f1876, %f1880; - sub.f32 %f1882, %f1868, %f1881; - mul.f32 %f1883, %f392, %f1882; - fma.rn.f32 %f1884, %f391, %f1883, 0f00000000; - fma.rn.f32 %f1885, %f1837, %f1879, %f1884; - fma.rn.f32 %f1886, %f1859, %f1876, %f1885; - mov.f32 %f1887, 0f3F800000; - sub.f32 %f1888, %f1887, %f1886; - mul.f32 %f1889, %f387, %f1888; - fma.rn.f32 %f1890, %f388, 0f00000000, 0f00000000; - sub.f32 %f1891, %f1887, %f1890; - fma.rn.f32 %f1892, %f389, 0f00000000, 0f00000000; - fma.rn.f32 %f1893, %f393, %f1891, %f1892; - sub.f32 %f1894, %f1867, %f1893; - fma.rn.f32 %f1895, %f390, 0f00000000, 0f00000000; - fma.rn.f32 %f1896, %f394, %f1891, %f1895; - fma.rn.f32 %f1897, %f400, %f1894, %f1896; - sub.f32 %f1898, %f1867, %f1897; - div.rn.f32 %f1899, %f1898, %f1865; - fma.rn.f32 %f1900, %f404, %f1899, 0f00000000; - sub.f32 %f1901, %f1894, %f1900; - mul.f32 %f1902, %f399, %f1901; - fma.rn.f32 %f1903, %f395, %f1902, 0f00000000; - fma.rn.f32 %f1904, %f401, %f1899, %f1903; - sub.f32 %f1905, %f1891, %f1904; - mul.f32 %f1906, %f392, %f1905; - sub.f32 %f1907, %f1867, %f1890; - fma.rn.f32 %f1908, %f393, %f1907, %f1892; - sub.f32 %f1909, %f1887, %f1908; - fma.rn.f32 %f1910, %f394, %f1907, %f1895; - fma.rn.f32 %f1911, %f400, %f1909, %f1910; - sub.f32 %f1912, %f1867, %f1911; - div.rn.f32 %f1913, %f1912, %f1865; - fma.rn.f32 %f1914, %f404, %f1913, 0f00000000; - sub.f32 %f1915, %f1909, %f1914; - mul.f32 %f1916, %f399, %f1915; - sub.f32 %f1917, %f1867, %f1908; - fma.rn.f32 %f1918, %f400, %f1917, %f1910; - sub.f32 %f1919, %f1887, %f1918; - div.rn.f32 %f1920, %f1919, %f1865; - cvta.to.global.u64 %rd69, %rd83; - mul.wide.s32 %rd70, %r240, 4; - add.s64 %rd71, %rd69, %rd70; - st.global.f32 [%rd71], %f2152; - shl.b32 %r237, %r238, 2; - cvt.s64.s32 %rd72, %r237; - add.s64 %rd73, %rd71, %rd72; - st.global.f32 [%rd73], %f2151; - add.s64 %rd74, %rd73, %rd72; - st.global.f32 [%rd74], %f2150; - add.s64 %rd75, %rd74, %rd72; - st.global.f32 [%rd75], %f2081; - cvta.to.global.u64 %rd76, %rd84; - add.s64 %rd77, %rd76, %rd70; - st.global.f32 [%rd77], %f1889; - add.s64 %rd78, %rd77, %rd72; - st.global.f32 [%rd78], %f1906; - add.s64 %rd79, %rd78, %rd72; - st.global.f32 [%rd79], %f1916; - add.s64 %rd80, %rd79, %rd72; - st.global.f32 [%rd80], %f1920; - cvta.to.global.u64 %rd81, %rd85; - add.s64 %rd82, %rd81, %rd70; - st.global.f32 [%rd82], %f2165; - -BB4_151: + sub.f32 %f1108, %f1103, %f1107; + add.f32 %f1109, %f1106, %f1108; + mul.rn.f32 %f1110, %f1086, %f1037; + mul.rn.f32 %f1111, %f1086, %f1039; + add.f32 %f1112, %f1110, %f1107; + sub.f32 %f1113, %f1110, %f1112; + add.f32 %f1114, %f1107, %f1113; + add.f32 %f1115, %f1109, %f1114; + add.f32 %f1116, %f1111, %f1115; + add.f32 %f1117, %f1112, %f1116; + sub.f32 %f1118, %f1112, %f1117; + add.f32 %f1119, %f1116, %f1118; + mul.rn.f32 %f1120, %f957, %f1117; + neg.f32 %f1121, %f1120; + fma.rn.f32 %f1122, %f957, %f1117, %f1121; + fma.rn.f32 %f1123, %f957, %f1119, %f1122; + fma.rn.f32 %f1124, %f941, %f1117, %f1123; + add.rn.f32 %f1125, %f1120, %f1124; + neg.f32 %f1126, %f1125; + add.rn.f32 %f1127, %f1120, %f1126; + add.rn.f32 %f1128, %f1127, %f1124; + mov.b32 %r406, %f1125; + setp.eq.s32 %p314, %r406, 1118925336; + add.s32 %r407, %r406, -1; + mov.b32 %f1129, %r407; + add.f32 %f1130, %f1128, 0f37000000; + selp.f32 %f217, %f1130, %f1128, %p314; + selp.f32 %f1131, %f1129, %f1125, %p314; + mul.rn.f32 %f1132, %f1131, %f1062; + cvt.rzi.f32.f32 %f1133, %f1132; + abs.f32 %f1134, %f1133; + setp.gt.f32 %p315, %f1134, 0f42FC0000; + mov.b32 %r408, %f1133; + and.b32 %r409, %r408, -2147483648; + or.b32 %r410, %r409, 1123811328; + mov.b32 %f1135, %r410; + selp.f32 %f1136, %f1135, %f1133, %p315; + fma.rn.f32 %f1137, %f1136, %f1068, %f1131; + fma.rn.f32 %f1138, %f1136, %f1070, %f1137; + mul.f32 %f1139, %f1138, 0f3FB8AA3B; + add.f32 %f1140, %f1136, 0f4B40007F; + mov.b32 %r411, %f1140; + shl.b32 %r412, %r411, 23; + mov.b32 %f1141, %r412; + ex2.approx.ftz.f32 %f1142, %f1139; + mul.f32 %f218, %f1142, %f1141; + add.f32 %f219, %f210, 0f40000000; + setp.lt.f32 %p316, %f215, 0f00000000; + and.pred %p14, %p316, %p311; + selp.f32 %f220, 0fFF800000, 0f7F800000, %p13; + add.f32 %f1143, %f215, %f215; + selp.f32 %f221, %f1143, 0f00000000, %p311; + add.f32 %f1144, %f216, 0f40000000; + mov.b32 %r81, %f1144; + add.f32 %f222, %f215, 0f40000000; + selp.f32 %f223, 0fFF800000, 0f7F800000, %p14; + setp.geu.f32 %p15, %f210, 0f00000000; + setp.geu.f32 %p16, %f215, 0f00000000; + mov.u32 %r525, %r387; + +$L__BB4_208: + setp.ltu.f32 %p317, %f206, 0f3F8060FE; + mov.f32 %f1801, %f207; + @%p317 bra $L__BB4_210; + + ex2.approx.ftz.f32 %f1145, %f207; + sub.f32 %f1147, %f954, %f1145; + mov.b32 %r413, %f1147; + or.b32 %r414, %r78, %r413; + mov.b32 %f1801, %r414; + +$L__BB4_210: + setp.ltu.f32 %p318, %f208, 0f3F8060FE; + mov.f32 %f1802, %f209; + @%p318 bra $L__BB4_212; + + ex2.approx.ftz.f32 %f1148, %f209; + sub.f32 %f1150, %f954, %f1148; + mov.b32 %r415, %f1150; + or.b32 %r416, %r79, %r415; + mov.b32 %f1802, %r416; + +$L__BB4_212: + sub.f32 %f1151, %f1801, %f1802; + mul.f32 %f239, %f1151, 0f3F000000; + cvt.rn.f32.s32 %f240, %r525; + sub.f32 %f241, %f240, %f1776; + add.f32 %f1152, %f241, 0f3F000000; + mul.f32 %f242, %f193, %f1152; + abs.f32 %f1153, %f242; + setp.ltu.f32 %p319, %f1153, 0f3F8060FE; + setp.ge.f32 %p320, %f1153, 0f3F8060FE; + mul.f32 %f1154, %f242, %f242; + selp.f32 %f1155, %f1153, %f1154, %p320; + selp.f32 %f1156, 0f3789CA3C, 0f38B1E96A, %p320; + selp.f32 %f1157, 0fB9F560B9, 0fBA574D20, %p320; + fma.rn.f32 %f1158, %f1156, %f1155, %f1157; + selp.f32 %f1159, 0f3BAC840B, 0f3BAAD5EA, %p320; + fma.rn.f32 %f1160, %f1158, %f1155, %f1159; + selp.f32 %f1161, 0fBD0C8162, 0fBCDC1BE7, %p320; + fma.rn.f32 %f1162, %f1160, %f1155, %f1161; + selp.f32 %f1163, 0f3E1CF906, 0f3DE718AF, %p320; + fma.rn.f32 %f1164, %f1162, %f1155, %f1163; + selp.f32 %f1165, 0f3F6A937E, 0fBEC093AC, %p320; + fma.rn.f32 %f1166, %f1164, %f1155, %f1165; + selp.f32 %f1167, 0f3F20D842, 0f3E0375D3, %p320; + fma.rn.f32 %f1168, %f1166, %f1155, %f1167; + neg.f32 %f1169, %f1153; + selp.f32 %f1170, %f1169, %f242, %p320; + fma.rn.f32 %f1803, %f1168, %f1170, %f1170; + @%p319 bra $L__BB4_214; + + ex2.approx.ftz.f32 %f1171, %f1803; + sub.f32 %f1173, %f954, %f1171; + mov.b32 %r417, %f1173; + mov.b32 %r418, %f242; + and.b32 %r419, %r418, -2147483648; + or.b32 %r420, %r419, %r417; + mov.b32 %f1803, %r420; + +$L__BB4_214: + add.f32 %f246, %f241, 0fBF000000; + mul.f32 %f247, %f193, %f246; + abs.f32 %f1174, %f247; + setp.ltu.f32 %p321, %f1174, 0f3F8060FE; + setp.ge.f32 %p322, %f1174, 0f3F8060FE; + mul.f32 %f1175, %f247, %f247; + selp.f32 %f1176, %f1174, %f1175, %p322; + selp.f32 %f1177, 0f3789CA3C, 0f38B1E96A, %p322; + selp.f32 %f1178, 0fB9F560B9, 0fBA574D20, %p322; + fma.rn.f32 %f1179, %f1177, %f1176, %f1178; + selp.f32 %f1180, 0f3BAC840B, 0f3BAAD5EA, %p322; + fma.rn.f32 %f1181, %f1179, %f1176, %f1180; + selp.f32 %f1182, 0fBD0C8162, 0fBCDC1BE7, %p322; + fma.rn.f32 %f1183, %f1181, %f1176, %f1182; + selp.f32 %f1184, 0f3E1CF906, 0f3DE718AF, %p322; + fma.rn.f32 %f1185, %f1183, %f1176, %f1184; + selp.f32 %f1186, 0f3F6A937E, 0fBEC093AC, %p322; + fma.rn.f32 %f1187, %f1185, %f1176, %f1186; + selp.f32 %f1188, 0f3F20D842, 0f3E0375D3, %p322; + fma.rn.f32 %f1189, %f1187, %f1176, %f1188; + neg.f32 %f1190, %f1174; + selp.f32 %f1191, %f1190, %f247, %p322; + fma.rn.f32 %f1804, %f1189, %f1191, %f1191; + @%p321 bra $L__BB4_216; + + ex2.approx.ftz.f32 %f1192, %f1804; + sub.f32 %f1194, %f954, %f1192; + mov.b32 %r421, %f1194; + mov.b32 %r422, %f247; + and.b32 %r423, %r422, -2147483648; + or.b32 %r424, %r423, %r421; + mov.b32 %f1804, %r424; + +$L__BB4_216: + sub.f32 %f1196, %f1803, %f1804; + mul.f32 %f251, %f1196, 0f3F000000; + mul.f32 %f1197, %f239, %f1775; + fma.rn.f32 %f252, %f251, %f1197, %f1774; + mad.lo.s32 %r425, %r525, %r85, %r524; + add.s32 %r426, %r425, %r2; + mul.wide.s32 %rd34, %r426, 4; + add.s64 %rd35, %rd1, %rd34; + ld.global.f32 %f253, [%rd35]; + setp.eq.f32 %p323, %f213, 0f7F800000; + mov.f32 %f1805, 0f7F800000; + @%p323 bra $L__BB4_218; + + fma.rn.f32 %f1805, %f213, %f212, %f213; + +$L__BB4_218: + mov.b32 %r427, %f1805; + xor.b32 %r428, %r427, -2147483648; + mov.b32 %f1198, %r428; + selp.f32 %f256, %f1198, %f1805, %p13; + setp.eq.f32 %p324, %f210, 0f00000000; + selp.f32 %f1806, %f214, %f256, %p324; + @%p15 bra $L__BB4_221; + + cvt.rzi.f32.f32 %f1200, %f957; + setp.eq.f32 %p325, %f1200, 0f40000000; + mov.f32 %f1806, %f256; + @%p325 bra $L__BB4_221; + + mov.f32 %f1806, 0f7FFFFFFF; + +$L__BB4_221: + setp.eq.f32 %p326, %f218, 0f7F800000; + mov.f32 %f1807, 0f7F800000; + @%p326 bra $L__BB4_223; + + fma.rn.f32 %f1807, %f218, %f217, %f218; + +$L__BB4_223: + mov.b32 %r429, %f1807; + xor.b32 %r430, %r429, -2147483648; + mov.b32 %f1203, %r430; + selp.f32 %f261, %f1203, %f1807, %p14; + setp.eq.f32 %p327, %f215, 0f00000000; + selp.f32 %f1808, %f221, %f261, %p327; + @%p16 bra $L__BB4_226; + + cvt.rzi.f32.f32 %f1205, %f957; + setp.eq.f32 %p328, %f1205, 0f40000000; + mov.f32 %f1808, %f261; + @%p328 bra $L__BB4_226; + + mov.f32 %f1808, 0f7FFFFFFF; + +$L__BB4_226: + setp.gtu.f32 %p329, %f211, 0f7F800000; + mov.f32 %f1809, 0f7F800000; + selp.f32 %f1208, %f219, %f1806, %p329; + setp.neu.f32 %p330, %f211, 0f7F800000; + selp.f32 %f1209, %f1208, %f220, %p330; + setp.gt.s32 %p331, %r80, 2139095039; + selp.f32 %f1210, %f1209, %f1806, %p331; + mul.f32 %f1211, %f1210, 0fBF000000; + setp.eq.f32 %p332, %f210, 0f3F800000; + selp.f32 %f1212, 0fBF000000, %f1211, %p332; + mov.f32 %f1214, 0f3BBB989D; + fma.rn.f32 %f1215, %f1212, %f1214, %f358; + mov.f32 %f1217, 0f437C0000; + cvt.sat.f32.f32 %f1218, %f1215; + mov.f32 %f1219, 0f4B400001; + fma.rm.f32 %f1220, %f1218, %f1217, %f1219; + setp.gtu.f32 %p333, %f216, 0f7F800000; + selp.f32 %f1221, %f222, %f1808, %p333; + setp.neu.f32 %p334, %f216, 0f7F800000; + selp.f32 %f1222, %f1221, %f223, %p334; + setp.gt.s32 %p335, %r81, 2139095039; + selp.f32 %f1223, %f1222, %f1808, %p335; + mul.f32 %f1224, %f1223, 0fBF000000; + setp.eq.f32 %p336, %f215, 0f3F800000; + selp.f32 %f1225, 0fBF000000, %f1224, %p336; + fma.rn.f32 %f1226, %f1225, %f1214, %f358; + cvt.sat.f32.f32 %f1227, %f1226; + fma.rm.f32 %f1228, %f1227, %f1217, %f1219; + add.f32 %f1229, %f1228, 0fCB40007F; + neg.f32 %f1230, %f1229; + fma.rn.f32 %f1231, %f1225, %f1062, %f1230; + mov.f32 %f1232, 0f32A57060; + fma.rn.f32 %f1233, %f1225, %f1232, %f1231; + mov.b32 %r431, %f1228; + shl.b32 %r432, %r431, 23; + mov.b32 %f1234, %r432; + ex2.approx.ftz.f32 %f1235, %f1233; + mul.f32 %f1236, %f1235, %f1234; + mov.b32 %r433, %f1220; + shl.b32 %r434, %r433, 23; + mov.b32 %f1237, %r434; + add.f32 %f1238, %f1220, 0fCB40007F; + neg.f32 %f1239, %f1238; + fma.rn.f32 %f1240, %f1212, %f1062, %f1239; + fma.rn.f32 %f1241, %f1212, %f1232, %f1240; + ex2.approx.ftz.f32 %f1242, %f1241; + mul.f32 %f1243, %f1242, %f1237; + sub.f32 %f1244, %f1243, %f1236; + mul.f32 %f1245, %f192, %f1244; + mul.f32 %f264, %f251, %f1245; + add.f32 %f1246, %f240, 0f3F000000; + sub.f32 %f1247, %f1246, %f1776; + div.rn.f32 %f265, %f1247, %f325; + abs.f32 %f266, %f265; + setp.lt.f32 %p337, %f266, 0f00800000; + mul.f32 %f1248, %f266, 0f4B800000; + selp.f32 %f1249, %f1248, %f266, %p337; + selp.f32 %f1250, 0fC3170000, 0fC2FE0000, %p337; + mov.b32 %r435, %f1249; + and.b32 %r436, %r435, 8388607; + or.b32 %r437, %r436, 1065353216; + mov.b32 %f1251, %r437; + shr.u32 %r438, %r435, 23; + cvt.rn.f32.u32 %f1252, %r438; + add.f32 %f1253, %f1250, %f1252; + setp.gt.f32 %p338, %f1251, 0f3FB504F3; + mul.f32 %f1254, %f1251, 0f3F000000; + add.f32 %f1255, %f1253, 0f3F800000; + selp.f32 %f1256, %f1255, %f1253, %p338; + selp.f32 %f1257, %f1254, %f1251, %p338; + add.f32 %f1258, %f1257, 0fBF800000; + add.f32 %f1259, %f1257, 0f3F800000; + rcp.approx.ftz.f32 %f1260, %f1259; + add.f32 %f1261, %f1258, %f1258; + mul.f32 %f1263, %f1261, %f1260; + mul.f32 %f1264, %f1263, %f1263; + fma.rn.f32 %f1267, %f1019, %f1264, %f1018; + fma.rn.f32 %f1269, %f1267, %f1264, %f1021; + mul.rn.f32 %f1270, %f1269, %f1264; + mul.rn.f32 %f1271, %f1270, %f1263; + sub.f32 %f1272, %f1258, %f1263; + add.f32 %f1273, %f1272, %f1272; + neg.f32 %f1274, %f1263; + fma.rn.f32 %f1275, %f1274, %f1258, %f1273; + mul.rn.f32 %f1276, %f1260, %f1275; + add.f32 %f1277, %f1271, %f1263; + sub.f32 %f1278, %f1263, %f1277; + add.f32 %f1279, %f1271, %f1278; + add.f32 %f1280, %f1276, %f1279; + add.f32 %f1281, %f1277, %f1280; + sub.f32 %f1282, %f1277, %f1281; + add.f32 %f1283, %f1280, %f1282; + mul.rn.f32 %f1285, %f1256, %f1037; + mul.rn.f32 %f1287, %f1256, %f1039; + add.f32 %f1288, %f1285, %f1281; + sub.f32 %f1289, %f1285, %f1288; + add.f32 %f1290, %f1281, %f1289; + add.f32 %f1291, %f1283, %f1290; + add.f32 %f1292, %f1287, %f1291; + add.f32 %f1293, %f1288, %f1292; + sub.f32 %f1294, %f1288, %f1293; + add.f32 %f1295, %f1292, %f1294; + mul.rn.f32 %f1296, %f957, %f1293; + neg.f32 %f1297, %f1296; + fma.rn.f32 %f1298, %f957, %f1293, %f1297; + fma.rn.f32 %f1299, %f957, %f1295, %f1298; + mov.f32 %f1300, 0f00000000; + fma.rn.f32 %f1301, %f1300, %f1293, %f1299; + add.rn.f32 %f1302, %f1296, %f1301; + neg.f32 %f1303, %f1302; + add.rn.f32 %f1304, %f1296, %f1303; + add.rn.f32 %f1305, %f1304, %f1301; + mov.b32 %r439, %f1302; + setp.eq.s32 %p339, %r439, 1118925336; + add.s32 %r440, %r439, -1; + mov.b32 %f1306, %r440; + add.f32 %f1307, %f1305, 0f37000000; + selp.f32 %f267, %f1307, %f1305, %p339; + selp.f32 %f1308, %f1306, %f1302, %p339; + mul.rn.f32 %f1309, %f1308, %f1062; + cvt.rzi.f32.f32 %f1310, %f1309; + abs.f32 %f1311, %f1310; + setp.gt.f32 %p340, %f1311, 0f42FC0000; + mov.b32 %r441, %f1310; + and.b32 %r442, %r441, -2147483648; + or.b32 %r443, %r442, 1123811328; + mov.b32 %f1312, %r443; + selp.f32 %f1313, %f1312, %f1310, %p340; + fma.rn.f32 %f1315, %f1313, %f1068, %f1308; + fma.rn.f32 %f1317, %f1313, %f1070, %f1315; + mul.f32 %f1318, %f1317, 0f3FB8AA3B; + add.f32 %f1319, %f1313, 0f4B40007F; + mov.b32 %r444, %f1319; + shl.b32 %r445, %r444, 23; + mov.b32 %f1320, %r445; + ex2.approx.ftz.f32 %f1321, %f1318; + mul.f32 %f268, %f1321, %f1320; + setp.eq.f32 %p341, %f268, 0f7F800000; + @%p341 bra $L__BB4_228; + + fma.rn.f32 %f1809, %f268, %f267, %f268; + +$L__BB4_228: + setp.lt.f32 %p342, %f265, 0f00000000; + and.pred %p17, %p342, %p311; + setp.eq.f32 %p344, %f265, 0f00000000; + @%p344 bra $L__BB4_232; + bra.uni $L__BB4_229; + +$L__BB4_232: + add.f32 %f1326, %f265, %f265; + selp.f32 %f1811, %f1326, 0f00000000, %p311; + bra.uni $L__BB4_233; + +$L__BB4_229: + mov.b32 %r446, %f1809; + xor.b32 %r447, %r446, -2147483648; + mov.b32 %f1322, %r447; + selp.f32 %f1811, %f1322, %f1809, %p17; + setp.geu.f32 %p345, %f265, 0f00000000; + @%p345 bra $L__BB4_233; + + cvt.rzi.f32.f32 %f1324, %f957; + setp.eq.f32 %p346, %f1324, 0f40000000; + @%p346 bra $L__BB4_233; + + mov.f32 %f1811, 0f7FFFFFFF; + +$L__BB4_233: + add.f32 %f1327, %f266, 0f40000000; + mov.b32 %r448, %f1327; + setp.lt.s32 %p348, %r448, 2139095040; + @%p348 bra $L__BB4_238; + + setp.gtu.f32 %p349, %f266, 0f7F800000; + @%p349 bra $L__BB4_237; + bra.uni $L__BB4_235; + +$L__BB4_237: + add.f32 %f1811, %f265, 0f40000000; + bra.uni $L__BB4_238; + +$L__BB4_235: + setp.neu.f32 %p350, %f266, 0f7F800000; + @%p350 bra $L__BB4_238; + + selp.f32 %f1811, 0fFF800000, 0f7F800000, %p17; + +$L__BB4_238: + mul.f32 %f1329, %f1811, 0fBF000000; + setp.eq.f32 %p351, %f265, 0f3F800000; + selp.f32 %f1330, 0fBF000000, %f1329, %p351; + fma.rn.f32 %f1333, %f1330, %f1214, %f358; + cvt.sat.f32.f32 %f1336, %f1333; + fma.rm.f32 %f1338, %f1336, %f1217, %f1219; + add.f32 %f1339, %f1338, 0fCB40007F; + neg.f32 %f1340, %f1339; + fma.rn.f32 %f1341, %f1330, %f1062, %f1340; + fma.rn.f32 %f1343, %f1330, %f1232, %f1341; + mov.b32 %r449, %f1338; + shl.b32 %r450, %r449, 23; + mov.b32 %f1344, %r450; + ex2.approx.ftz.f32 %f1345, %f1343; + mul.f32 %f277, %f1345, %f1344; + div.rn.f32 %f278, %f246, %f325; + abs.f32 %f279, %f278; + setp.lt.f32 %p352, %f279, 0f00800000; + mul.f32 %f1346, %f279, 0f4B800000; + selp.f32 %f1347, %f1346, %f279, %p352; + selp.f32 %f1348, 0fC3170000, 0fC2FE0000, %p352; + mov.b32 %r451, %f1347; + and.b32 %r452, %r451, 8388607; + or.b32 %r453, %r452, 1065353216; + mov.b32 %f1349, %r453; + shr.u32 %r454, %r451, 23; + cvt.rn.f32.u32 %f1350, %r454; + add.f32 %f1351, %f1348, %f1350; + setp.gt.f32 %p353, %f1349, 0f3FB504F3; + mul.f32 %f1352, %f1349, 0f3F000000; + add.f32 %f1353, %f1351, 0f3F800000; + selp.f32 %f1354, %f1353, %f1351, %p353; + selp.f32 %f1355, %f1352, %f1349, %p353; + add.f32 %f1356, %f1355, 0fBF800000; + add.f32 %f1357, %f1355, 0f3F800000; + rcp.approx.ftz.f32 %f1358, %f1357; + add.f32 %f1359, %f1356, %f1356; + mul.f32 %f1361, %f1359, %f1358; + mul.f32 %f1362, %f1361, %f1361; + fma.rn.f32 %f1365, %f1019, %f1362, %f1018; + fma.rn.f32 %f1367, %f1365, %f1362, %f1021; + mul.rn.f32 %f1368, %f1367, %f1362; + mul.rn.f32 %f1369, %f1368, %f1361; + sub.f32 %f1370, %f1356, %f1361; + add.f32 %f1371, %f1370, %f1370; + neg.f32 %f1372, %f1361; + fma.rn.f32 %f1373, %f1372, %f1356, %f1371; + mul.rn.f32 %f1374, %f1358, %f1373; + add.f32 %f1375, %f1369, %f1361; + sub.f32 %f1376, %f1361, %f1375; + add.f32 %f1377, %f1369, %f1376; + add.f32 %f1378, %f1374, %f1377; + add.f32 %f1379, %f1375, %f1378; + sub.f32 %f1380, %f1375, %f1379; + add.f32 %f1381, %f1378, %f1380; + mul.rn.f32 %f1383, %f1354, %f1037; + mul.rn.f32 %f1385, %f1354, %f1039; + add.f32 %f1386, %f1383, %f1379; + sub.f32 %f1387, %f1383, %f1386; + add.f32 %f1388, %f1379, %f1387; + add.f32 %f1389, %f1381, %f1388; + add.f32 %f1390, %f1385, %f1389; + add.f32 %f1391, %f1386, %f1390; + sub.f32 %f1392, %f1386, %f1391; + add.f32 %f1393, %f1390, %f1392; + mul.rn.f32 %f1394, %f957, %f1391; + neg.f32 %f1395, %f1394; + fma.rn.f32 %f1396, %f957, %f1391, %f1395; + fma.rn.f32 %f1397, %f957, %f1393, %f1396; + fma.rn.f32 %f1399, %f1300, %f1391, %f1397; + add.rn.f32 %f1400, %f1394, %f1399; + neg.f32 %f1401, %f1400; + add.rn.f32 %f1402, %f1394, %f1401; + add.rn.f32 %f1403, %f1402, %f1399; + mov.b32 %r455, %f1400; + setp.eq.s32 %p354, %r455, 1118925336; + add.s32 %r456, %r455, -1; + mov.b32 %f1404, %r456; + add.f32 %f1405, %f1403, 0f37000000; + selp.f32 %f280, %f1405, %f1403, %p354; + selp.f32 %f1406, %f1404, %f1400, %p354; + mul.rn.f32 %f1407, %f1406, %f1062; + cvt.rzi.f32.f32 %f1408, %f1407; + abs.f32 %f1409, %f1408; + setp.gt.f32 %p355, %f1409, 0f42FC0000; + mov.b32 %r457, %f1408; + and.b32 %r458, %r457, -2147483648; + or.b32 %r459, %r458, 1123811328; + mov.b32 %f1410, %r459; + selp.f32 %f1411, %f1410, %f1408, %p355; + fma.rn.f32 %f1413, %f1411, %f1068, %f1406; + fma.rn.f32 %f1415, %f1411, %f1070, %f1413; + mul.f32 %f1416, %f1415, 0f3FB8AA3B; + add.f32 %f1417, %f1411, 0f4B40007F; + mov.b32 %r460, %f1417; + shl.b32 %r461, %r460, 23; + mov.b32 %f1418, %r461; + ex2.approx.ftz.f32 %f1419, %f1416; + mul.f32 %f281, %f1419, %f1418; + setp.eq.f32 %p356, %f281, 0f7F800000; + mov.f32 %f1812, 0f7F800000; + @%p356 bra $L__BB4_240; + + fma.rn.f32 %f1812, %f281, %f280, %f281; + +$L__BB4_240: + setp.lt.f32 %p357, %f278, 0f00000000; + and.pred %p18, %p357, %p311; + setp.eq.f32 %p359, %f278, 0f00000000; + @%p359 bra $L__BB4_244; + bra.uni $L__BB4_241; + +$L__BB4_244: + add.f32 %f1424, %f278, %f278; + selp.f32 %f1814, %f1424, 0f00000000, %p311; + bra.uni $L__BB4_245; + +$L__BB4_241: + mov.b32 %r462, %f1812; + xor.b32 %r463, %r462, -2147483648; + mov.b32 %f1420, %r463; + selp.f32 %f1814, %f1420, %f1812, %p18; + setp.geu.f32 %p360, %f278, 0f00000000; + @%p360 bra $L__BB4_245; + + cvt.rzi.f32.f32 %f1422, %f957; + setp.eq.f32 %p361, %f1422, 0f40000000; + @%p361 bra $L__BB4_245; + + mov.f32 %f1814, 0f7FFFFFFF; + +$L__BB4_245: + add.f32 %f1425, %f279, 0f40000000; + mov.b32 %r464, %f1425; + setp.lt.s32 %p363, %r464, 2139095040; + @%p363 bra $L__BB4_250; + + setp.gtu.f32 %p364, %f279, 0f7F800000; + @%p364 bra $L__BB4_249; + bra.uni $L__BB4_247; + +$L__BB4_249: + add.f32 %f1814, %f278, 0f40000000; + bra.uni $L__BB4_250; + +$L__BB4_247: + setp.neu.f32 %p365, %f279, 0f7F800000; + @%p365 bra $L__BB4_250; + + selp.f32 %f1814, 0fFF800000, 0f7F800000, %p18; + +$L__BB4_250: + mul.f32 %f1426, %f1814, 0fBF000000; + setp.eq.f32 %p366, %f278, 0f3F800000; + selp.f32 %f1427, 0fBF000000, %f1426, %p366; + fma.rn.f32 %f1430, %f1427, %f1214, %f358; + cvt.sat.f32.f32 %f1433, %f1430; + fma.rm.f32 %f1435, %f1433, %f1217, %f1219; + add.f32 %f1436, %f1435, 0fCB40007F; + neg.f32 %f1437, %f1436; + fma.rn.f32 %f1438, %f1427, %f1062, %f1437; + fma.rn.f32 %f1440, %f1427, %f1232, %f1438; + mov.b32 %r465, %f1435; + shl.b32 %r466, %r465, 23; + mov.b32 %f1441, %r466; + ex2.approx.ftz.f32 %f1442, %f1440; + mul.f32 %f1443, %f1442, %f1441; + sub.f32 %f1444, %f277, %f1443; + mul.f32 %f1445, %f192, %f1444; + mul.f32 %f1446, %f239, %f1445; + mul.f32 %f1447, %f264, %f264; + add.f32 %f290, %f1773, %f252; + div.rn.f32 %f1448, %f1447, %f290; + add.f32 %f1796, %f1796, %f1448; + mul.f32 %f1449, %f1446, %f264; + div.rn.f32 %f1450, %f1449, %f290; + add.f32 %f1795, %f1795, %f1450; + mul.f32 %f1451, %f239, %f251; + mul.f32 %f1452, %f1451, %f264; + div.rn.f32 %f1453, %f1452, %f290; + add.f32 %f1794, %f1794, %f1453; + div.rn.f32 %f1454, %f264, %f290; + add.f32 %f1793, %f1793, %f1454; + mul.f32 %f1455, %f1446, %f1446; + div.rn.f32 %f1456, %f1455, %f290; + add.f32 %f1792, %f1792, %f1456; + mul.f32 %f1457, %f1451, %f1446; + div.rn.f32 %f1458, %f1457, %f290; + add.f32 %f1791, %f1791, %f1458; + div.rn.f32 %f1459, %f1446, %f290; + add.f32 %f1790, %f1790, %f1459; + mul.f32 %f1460, %f1451, %f1451; + div.rn.f32 %f1461, %f1460, %f290; + add.f32 %f1797, %f1797, %f1461; + div.rn.f32 %f1462, %f1451, %f290; + add.f32 %f1798, %f1798, %f1462; + rcp.rn.f32 %f1463, %f290; + add.f32 %f1799, %f1799, %f1463; + setp.leu.f32 %p367, %f290, 0f00000000; + @%p367 bra $L__BB4_258; + + add.f32 %f301, %f1773, %f253; + setp.gt.f32 %p368, %f301, 0f00000000; + @%p368 bra $L__BB4_253; + bra.uni $L__BB4_252; + +$L__BB4_253: + setp.lt.f32 %p369, %f290, 0f00800000; + mul.f32 %f1466, %f290, 0f4B000000; + selp.f32 %f303, %f1466, %f290, %p369; + selp.f32 %f1467, 0fC1B80000, 0f00000000, %p369; + mov.b32 %r467, %f303; + add.s32 %r468, %r467, -1059760811; + and.b32 %r469, %r468, -8388608; + sub.s32 %r470, %r467, %r469; + mov.b32 %f1468, %r470; + cvt.rn.f32.s32 %f1469, %r469; + mov.f32 %f1470, 0f34000000; + fma.rn.f32 %f1471, %f1469, %f1470, %f1467; + add.f32 %f1472, %f1468, 0fBF800000; + mov.f32 %f1473, 0f3E1039F6; + mov.f32 %f1474, 0fBE055027; + fma.rn.f32 %f1475, %f1474, %f1472, %f1473; + mov.f32 %f1476, 0fBDF8CDCC; + fma.rn.f32 %f1477, %f1475, %f1472, %f1476; + mov.f32 %f1478, 0f3E0F2955; + fma.rn.f32 %f1479, %f1477, %f1472, %f1478; + mov.f32 %f1480, 0fBE2AD8B9; + fma.rn.f32 %f1481, %f1479, %f1472, %f1480; + mov.f32 %f1482, 0f3E4CED0B; + fma.rn.f32 %f1483, %f1481, %f1472, %f1482; + mov.f32 %f1484, 0fBE7FFF22; + fma.rn.f32 %f1485, %f1483, %f1472, %f1484; + mov.f32 %f1486, 0f3EAAAA78; + fma.rn.f32 %f1487, %f1485, %f1472, %f1486; + mov.f32 %f1488, 0fBF000000; + fma.rn.f32 %f1489, %f1487, %f1472, %f1488; + mul.f32 %f1490, %f1472, %f1489; + fma.rn.f32 %f1491, %f1490, %f1472, %f1472; + mov.f32 %f1492, 0f3F317218; + fma.rn.f32 %f1815, %f1471, %f1492, %f1491; + setp.lt.u32 %p370, %r467, 2139095040; + @%p370 bra $L__BB4_255; + + mov.f32 %f1493, 0f7F800000; + fma.rn.f32 %f1815, %f303, %f1493, %f1493; + +$L__BB4_255: + setp.eq.f32 %p371, %f303, 0f00000000; + selp.f32 %f1494, 0fFF800000, %f1815, %p371; + mul.f32 %f1495, %f301, %f1494; + sub.f32 %f307, %f1495, %f252; + mul.f32 %f1496, %f301, 0f4B000000; + setp.lt.f32 %p372, %f301, 0f00800000; + selp.f32 %f308, %f1496, %f301, %p372; + selp.f32 %f1497, 0fC1B80000, 0f00000000, %p372; + mov.b32 %r471, %f308; + add.s32 %r472, %r471, -1059760811; + and.b32 %r473, %r472, -8388608; + sub.s32 %r474, %r471, %r473; + mov.b32 %f1498, %r474; + cvt.rn.f32.s32 %f1499, %r473; + fma.rn.f32 %f1501, %f1499, %f1470, %f1497; + add.f32 %f1502, %f1498, 0fBF800000; + fma.rn.f32 %f1505, %f1474, %f1502, %f1473; + fma.rn.f32 %f1507, %f1505, %f1502, %f1476; + fma.rn.f32 %f1509, %f1507, %f1502, %f1478; + fma.rn.f32 %f1511, %f1509, %f1502, %f1480; + fma.rn.f32 %f1513, %f1511, %f1502, %f1482; + fma.rn.f32 %f1515, %f1513, %f1502, %f1484; + fma.rn.f32 %f1517, %f1515, %f1502, %f1486; + fma.rn.f32 %f1519, %f1517, %f1502, %f1488; + mul.f32 %f1520, %f1502, %f1519; + fma.rn.f32 %f1521, %f1520, %f1502, %f1502; + fma.rn.f32 %f1816, %f1501, %f1492, %f1521; + setp.lt.u32 %p373, %r471, 2139095040; + @%p373 bra $L__BB4_257; + + mov.f32 %f1523, 0f7F800000; + fma.rn.f32 %f1816, %f308, %f1523, %f1523; + +$L__BB4_257: + setp.eq.f32 %p374, %f308, 0f00000000; + selp.f32 %f1524, 0fFF800000, %f1816, %p374; + mul.f32 %f1525, %f301, %f1524; + sub.f32 %f1526, %f307, %f1525; + add.f32 %f1527, %f253, %f1526; + add.f32 %f1817, %f1817, %f1527; + bra.uni $L__BB4_258; + +$L__BB4_252: + neg.f32 %f1464, %f252; + sub.f32 %f1465, %f1464, %f1773; + add.f32 %f1817, %f1817, %f1465; + +$L__BB4_258: + add.s32 %r525, %r525, 1; + setp.lt.s32 %p375, %r525, %r85; + @%p375 bra $L__BB4_208; + + add.s32 %r524, %r524, 1; + setp.lt.s32 %p376, %r524, %r85; + @%p376 bra $L__BB4_207; + +$L__BB4_260: + ld.param.u64 %rd54, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_9]; + ld.param.u64 %rd53, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_8]; + ld.param.u32 %r485, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_10]; + ld.param.u64 %rd52, [_Z24kernel_MLEFit_SCMOSXYNB_PKfS0_S0_fiiiPfS1_S1_i_param_7]; + rcp.rn.f32 %f1528, %f1796; + mov.f32 %f1529, 0f3F800000; + mul.f32 %f1530, %f1528, %f1795; + mul.f32 %f1531, %f1528, %f1794; + mul.f32 %f1532, %f1528, %f1793; + fma.rn.f32 %f1533, %f1530, %f1795, 0f00000000; + sub.f32 %f1535, %f1792, %f1533; + fma.rn.f32 %f1536, %f1531, %f1795, 0f00000000; + rcp.rn.f32 %f1537, %f1535; + sub.f32 %f1538, %f1791, %f1536; + mul.f32 %f1539, %f1537, %f1538; + fma.rn.f32 %f1540, %f1532, %f1795, 0f00000000; + sub.f32 %f1541, %f1790, %f1540; + mul.f32 %f1542, %f1537, %f1541; + fma.rn.f32 %f1543, %f1530, %f1794, 0f00000000; + sub.f32 %f1544, %f1791, %f1543; + fma.rn.f32 %f1545, %f1531, %f1794, 0f00000000; + fma.rn.f32 %f1546, %f1539, %f1544, %f1545; + sub.f32 %f1547, %f1797, %f1546; + fma.rn.f32 %f1548, %f1532, %f1794, 0f00000000; + fma.rn.f32 %f1549, %f1542, %f1544, %f1548; + rcp.rn.f32 %f1550, %f1547; + sub.f32 %f1551, %f1798, %f1549; + mul.f32 %f1552, %f1550, %f1551; + fma.rn.f32 %f1553, %f1530, %f1793, 0f00000000; + sub.f32 %f1554, %f1790, %f1553; + fma.rn.f32 %f1555, %f1531, %f1793, 0f00000000; + fma.rn.f32 %f1556, %f1539, %f1554, %f1555; + sub.f32 %f1557, %f1798, %f1556; + fma.rn.f32 %f1558, %f1532, %f1793, 0f00000000; + fma.rn.f32 %f1559, %f1542, %f1554, %f1558; + fma.rn.f32 %f1560, %f1552, %f1557, %f1559; + sub.f32 %f1561, %f1799, %f1560; + add.f32 %f1562, %f1530, 0f00000000; + sub.f32 %f1563, %f941, %f1562; + add.f32 %f1564, %f1531, 0f00000000; + fma.rn.f32 %f1565, %f1539, %f1563, %f1564; + sub.f32 %f1566, %f941, %f1565; + add.f32 %f1567, %f1532, 0f00000000; + fma.rn.f32 %f1568, %f1542, %f1563, %f1567; + fma.rn.f32 %f1569, %f1552, %f1566, %f1568; + sub.f32 %f1570, %f941, %f1569; + div.rn.f32 %f1571, %f1570, %f1561; + fma.rn.f32 %f1572, %f1557, %f1571, 0f00000000; + sub.f32 %f1573, %f1566, %f1572; + mul.f32 %f1574, %f1550, %f1573; + fma.rn.f32 %f1575, %f1544, %f1574, 0f00000000; + fma.rn.f32 %f1576, %f1554, %f1571, %f1575; + sub.f32 %f1577, %f1563, %f1576; + mul.f32 %f1578, %f1537, %f1577; + fma.rn.f32 %f1579, %f1795, %f1578, 0f00000000; + fma.rn.f32 %f1580, %f1794, %f1574, %f1579; + fma.rn.f32 %f1581, %f1793, %f1571, %f1580; + sub.f32 %f1582, %f1529, %f1581; + mul.f32 %f1583, %f1528, %f1582; + fma.rn.f32 %f1584, %f1530, 0f00000000, 0f00000000; + sub.f32 %f1585, %f1529, %f1584; + fma.rn.f32 %f1586, %f1531, 0f00000000, 0f00000000; + fma.rn.f32 %f1587, %f1539, %f1585, %f1586; + sub.f32 %f1588, %f941, %f1587; + fma.rn.f32 %f1589, %f1532, 0f00000000, 0f00000000; + fma.rn.f32 %f1590, %f1542, %f1585, %f1589; + fma.rn.f32 %f1591, %f1552, %f1588, %f1590; + sub.f32 %f1592, %f941, %f1591; + div.rn.f32 %f1593, %f1592, %f1561; + fma.rn.f32 %f1594, %f1557, %f1593, 0f00000000; + sub.f32 %f1595, %f1588, %f1594; + mul.f32 %f1596, %f1550, %f1595; + fma.rn.f32 %f1597, %f1544, %f1596, 0f00000000; + fma.rn.f32 %f1598, %f1554, %f1593, %f1597; + sub.f32 %f1599, %f1585, %f1598; + mul.f32 %f1600, %f1537, %f1599; + sub.f32 %f1601, %f941, %f1584; + fma.rn.f32 %f1602, %f1539, %f1601, %f1586; + sub.f32 %f1603, %f1529, %f1602; + fma.rn.f32 %f1604, %f1542, %f1601, %f1589; + fma.rn.f32 %f1605, %f1552, %f1603, %f1604; + sub.f32 %f1606, %f941, %f1605; + div.rn.f32 %f1607, %f1606, %f1561; + fma.rn.f32 %f1608, %f1557, %f1607, 0f00000000; + sub.f32 %f1609, %f1603, %f1608; + mul.f32 %f1610, %f1550, %f1609; + sub.f32 %f1611, %f941, %f1602; + fma.rn.f32 %f1612, %f1552, %f1611, %f1604; + sub.f32 %f1613, %f1529, %f1612; + div.rn.f32 %f1614, %f1613, %f1561; + cvta.to.global.u64 %rd36, %rd52; + mul.wide.s32 %rd37, %r1, 4; + add.s64 %rd38, %rd36, %rd37; + st.global.f32 [%rd38], %f1777; + add.s32 %r479, %r1, %r485; + mul.wide.s32 %rd39, %r485, 4; + add.s64 %rd40, %rd38, %rd39; + st.global.f32 [%rd40], %f1776; + add.s32 %r480, %r479, %r485; + mul.wide.s32 %rd41, %r480, 4; + add.s64 %rd42, %rd36, %rd41; + st.global.f32 [%rd42], %f1775; + add.s32 %r481, %r480, %r485; + mul.wide.s32 %rd43, %r481, 4; + add.s64 %rd44, %rd36, %rd43; + st.global.f32 [%rd44], %f1774; + cvta.to.global.u64 %rd45, %rd53; + add.s64 %rd46, %rd45, %rd37; + st.global.f32 [%rd46], %f1583; + add.s64 %rd47, %rd46, %rd39; + st.global.f32 [%rd47], %f1600; + add.s64 %rd48, %rd45, %rd41; + st.global.f32 [%rd48], %f1610; + add.s64 %rd49, %rd45, %rd43; + st.global.f32 [%rd49], %f1614; + cvta.to.global.u64 %rd50, %rd54; + add.s64 %rd51, %rd50, %rd37; + st.global.f32 [%rd51], %f1817; + +$L__BB4_261: ret; -} +} // .globl _Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i .visible .entry _Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i( .param .u64 _Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_0, @@ -17697,907 +29094,1630 @@ BB4_151: .param .u32 _Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_10 ) { - .local .align 4 .b8 __local_depot5[100]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<371>; - .reg .f32 %f<3156>; - .reg .b32 %r<326>; - .reg .b64 %rd<115>; - - - mov.u64 %SPL, __local_depot5; - ld.param.u64 %rd44, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_0]; - ld.param.u64 %rd45, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_1]; - ld.param.u64 %rd46, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_2]; - ld.param.f32 %f3098, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_3]; - ld.param.u32 %r63, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_4]; - ld.param.u32 %r64, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_5]; - ld.param.u32 %r65, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_6]; - ld.param.u32 %r66, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_10]; - cvta.to.global.u64 %rd1, %rd44; - add.u64 %rd2, %SPL, 0; - mov.u32 %r67, %ntid.x; - mov.u32 %r68, %ctaid.x; - mov.u32 %r69, %tid.x; - mad.lo.s32 %r1, %r67, %r68, %r69; - setp.ge.s32 %p13, %r1, %r66; - @%p13 bra BB5_219; - - mov.u32 %r70, 0; - mov.u64 %rd102, %rd2; - mov.u32 %r302, %r70; - -BB5_2: - st.local.u32 [%rd102], %r70; - add.s64 %rd102, %rd102, 4; - add.s32 %r302, %r302, 1; - setp.lt.u32 %p14, %r302, 25; - @%p14 bra BB5_2; - - mul.lo.s32 %r72, %r63, %r63; - mul.lo.s32 %r4, %r72, %r1; - mov.f32 %f572, 0f00000000; - setp.lt.s32 %p15, %r63, 1; - mov.f32 %f1, %f572; - mov.f32 %f2, %f572; - mov.f32 %f3, %f572; - @%p15 bra BB5_17; - - and.b32 %r5, %r63, 3; - shl.b32 %r6, %r63, 2; - mov.f32 %f575, 0f00000000; - mov.u32 %r73, 0; - mov.u32 %r303, %r73; - mov.f32 %f1, %f575; - mov.f32 %f2, %f575; - mov.f32 %f3, %f575; - -BB5_5: - cvt.rn.f32.s32 %f4, %r303; - setp.eq.s32 %p16, %r5, 0; - @%p16 bra BB5_6; - - setp.eq.s32 %p17, %r5, 1; - @%p17 bra BB5_8; - bra.uni BB5_9; - -BB5_8: - mov.u32 %r305, %r73; - bra.uni BB5_13; - -BB5_6: - mov.u32 %r307, %r73; - mov.f32 %f3002, %f1; - mov.f32 %f3003, %f2; - mov.f32 %f3004, %f3; - mov.f32 %f1, %f575; - mov.f32 %f2, %f575; - mov.f32 %f3, %f575; - bra.uni BB5_14; - -BB5_9: - setp.eq.s32 %p18, %r5, 2; - @%p18 bra BB5_10; - bra.uni BB5_11; - -BB5_10: - mov.u32 %r304, %r73; - bra.uni BB5_12; - -BB5_11: - add.s32 %r78, %r303, %r4; - mul.wide.s32 %rd52, %r78, 4; - add.s64 %rd53, %rd1, %rd52; - ld.global.f32 %f579, [%rd53]; - fma.rn.f32 %f3, %f4, %f579, %f3; - fma.rn.f32 %f2, %f579, 0f00000000, %f2; - add.f32 %f1, %f1, %f579; - mov.u32 %r304, 1; - -BB5_12: - neg.s32 %r79, %r304; - and.b32 %r80, %r79, %r63; - add.s32 %r81, %r80, %r303; - add.s32 %r82, %r81, %r4; - mul.wide.s32 %rd54, %r82, 4; - add.s64 %rd55, %rd1, %rd54; - ld.global.f32 %f580, [%rd55]; - fma.rn.f32 %f3, %f4, %f580, %f3; - cvt.rn.f32.s32 %f581, %r304; - fma.rn.f32 %f2, %f581, %f580, %f2; - add.f32 %f1, %f1, %f580; - add.s32 %r305, %r304, 1; - -BB5_13: - mad.lo.s32 %r83, %r305, %r63, %r303; - add.s32 %r84, %r83, %r4; - mul.wide.s32 %rd56, %r84, 4; - add.s64 %rd57, %rd1, %rd56; - ld.global.f32 %f582, [%rd57]; - fma.rn.f32 %f3004, %f4, %f582, %f3; - cvt.rn.f32.s32 %f583, %r305; - fma.rn.f32 %f3003, %f583, %f582, %f2; - add.f32 %f3002, %f1, %f582; - add.s32 %r307, %r305, 1; - mov.f32 %f1, %f3002; - mov.f32 %f2, %f3003; - mov.f32 %f3, %f3004; - -BB5_14: - setp.lt.u32 %p19, %r63, 4; - @%p19 bra BB5_16; - -BB5_15: - mad.lo.s32 %r85, %r307, %r63, %r303; - add.s32 %r86, %r85, %r4; - mul.wide.s32 %rd58, %r86, 4; - add.s64 %rd59, %rd1, %rd58; - ld.global.f32 %f584, [%rd59]; - fma.rn.f32 %f585, %f4, %f584, %f3004; - cvt.rn.f32.s32 %f586, %r307; - fma.rn.f32 %f587, %f586, %f584, %f3003; - add.f32 %f588, %f3002, %f584; - cvt.s64.s32 %rd60, %r6; - add.s64 %rd61, %rd59, %rd60; - ld.global.f32 %f589, [%rd61]; - fma.rn.f32 %f590, %f4, %f589, %f585; - add.s32 %r87, %r307, 1; - cvt.rn.f32.s32 %f591, %r87; - fma.rn.f32 %f592, %f591, %f589, %f587; - add.f32 %f593, %f588, %f589; - add.s64 %rd62, %rd61, %rd60; - ld.global.f32 %f594, [%rd62]; - fma.rn.f32 %f595, %f4, %f594, %f590; - add.s32 %r88, %r307, 2; - cvt.rn.f32.s32 %f596, %r88; - fma.rn.f32 %f597, %f596, %f594, %f592; - add.f32 %f598, %f593, %f594; - add.s64 %rd63, %rd62, %rd60; - ld.global.f32 %f599, [%rd63]; - fma.rn.f32 %f3004, %f4, %f599, %f595; - add.s32 %r89, %r307, 3; - cvt.rn.f32.s32 %f600, %r89; - fma.rn.f32 %f3003, %f600, %f599, %f597; - add.f32 %f3002, %f598, %f599; - add.s32 %r307, %r307, 4; - setp.lt.s32 %p20, %r307, %r63; - mov.f32 %f1, %f3002; - mov.f32 %f2, %f3003; - mov.f32 %f3, %f3004; - @%p20 bra BB5_15; - -BB5_16: - add.s32 %r303, %r303, 1; - setp.lt.s32 %p21, %r303, %r63; - @%p21 bra BB5_5; - -BB5_17: - div.rn.f32 %f3102, %f3, %f1; - div.rn.f32 %f3101, %f2, %f1; - mov.f32 %f603, 0f3F000000; - div.rn.f32 %f604, %f603, %f3098; - div.rn.f32 %f40, %f604, %f3098; - mov.f32 %f3013, 0f51BA43B7; - mov.f32 %f3014, %f572; - @%p15 bra BB5_36; - - and.b32 %r16, %r63, 3; - mov.f32 %f3014, 0f00000000; - mov.u32 %r90, 0; - mov.f32 %f3013, 0f51BA43B7; - mov.u32 %r308, %r90; - -BB5_19: - mov.u32 %r309, %r90; - -BB5_20: - cvt.rn.f32.s32 %f609, %r309; - mul.f32 %f610, %f609, %f609; - mul.f32 %f45, %f40, %f610; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f608, 0f00000000; - mov.f32 %f3031, %f608; - mov.f32 %f3032, %f608; - mov.u32 %r310, %r90; - -BB5_21: - sub.s32 %r94, %r310, %r308; - cvt.rn.f32.s32 %f50, %r94; - mul.lo.s32 %r20, %r310, %r63; - setp.eq.s32 %p23, %r16, 0; - @%p23 bra BB5_22; - - setp.eq.s32 %p24, %r16, 1; - @%p24 bra BB5_26; - bra.uni BB5_24; - -BB5_26: - mul.f32 %f625, %f50, %f50; - mul.f32 %f3022, %f40, %f625; - neg.f32 %f626, %f3022; - mul.f32 %f627, %f3022, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f628, %f627; - mov.f32 %f629, 0fBF317200; - fma.rn.f32 %f630, %f628, %f629, %f626; - mov.f32 %f631, 0fB5BFBE8E; - fma.rn.f32 %f632, %f628, %f631, %f630; - mul.f32 %f633, %f632, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f634, %f633; - add.f32 %f635, %f628, 0f00000000; - ex2.approx.f32 %f636, %f635; - mul.f32 %f3021, %f634, %f636; - mov.u32 %r312, 0; - bra.uni BB5_29; - -BB5_22: - mov.f32 %f3025, %f3031; - mov.f32 %f3026, %f3032; - mov.u32 %r314, %r90; - mov.f32 %f3031, %f608; - mov.f32 %f3032, %f608; - bra.uni BB5_30; - -BB5_24: - setp.ne.s32 %p25, %r16, 2; - @%p25 bra BB5_27; - - mul.f32 %f613, %f50, %f50; - mul.f32 %f3022, %f40, %f613; - neg.f32 %f614, %f3022; - mul.f32 %f615, %f3022, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f616, %f615; - mov.f32 %f617, 0fBF317200; - fma.rn.f32 %f618, %f616, %f617, %f614; - mov.f32 %f619, 0fB5BFBE8E; - fma.rn.f32 %f620, %f616, %f619, %f618; - mul.f32 %f621, %f620, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f622, %f621; - add.f32 %f623, %f616, 0f00000000; - ex2.approx.f32 %f624, %f623; - mul.f32 %f3021, %f622, %f624; - mov.u32 %r311, 0; - bra.uni BB5_28; - -BB5_27: - setp.lt.f32 %p26, %f45, 0fC2D20000; - mul.f32 %f637, %f50, %f50; - mul.f32 %f3022, %f40, %f637; - neg.f32 %f638, %f3022; - mul.f32 %f639, %f3022, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f640, %f639; - mov.f32 %f641, 0fBF317200; - fma.rn.f32 %f642, %f640, %f641, %f638; - mov.f32 %f643, 0fB5BFBE8E; - fma.rn.f32 %f644, %f640, %f643, %f642; - mul.f32 %f645, %f644, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f646, %f645; - add.f32 %f647, %f640, 0f00000000; - ex2.approx.f32 %f648, %f647; - mul.f32 %f3021, %f646, %f648; - setp.gt.f32 %p27, %f3022, 0f42D20000; - selp.f32 %f649, 0f00000000, %f3021, %p27; - setp.lt.f32 %p28, %f3022, 0fC2D20000; - selp.f32 %f650, 0f7F800000, %f649, %p28; - cvt.rzi.f32.f32 %f651, %f47; - fma.rn.f32 %f652, %f651, %f641, %f46; - fma.rn.f32 %f653, %f651, %f643, %f652; - mul.f32 %f654, %f653, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f655, %f654; - add.f32 %f656, %f651, 0f00000000; - ex2.approx.f32 %f657, %f656; - mul.f32 %f658, %f655, %f657; - setp.gt.f32 %p29, %f45, 0f42D20000; - selp.f32 %f659, 0f00000000, %f658, %p29; - selp.f32 %f660, 0f7F800000, %f659, %p26; - mul.f32 %f661, %f650, %f660; - add.s32 %r98, %r20, %r4; - mul.wide.s32 %rd64, %r98, 4; - add.s64 %rd65, %rd1, %rd64; - ld.global.f32 %f662, [%rd65]; - fma.rn.f32 %f3032, %f662, %f661, %f3032; - add.f32 %f3031, %f3031, %f661; - mov.u32 %r311, 1; - -BB5_28: - sub.s32 %r99, %r309, %r311; - cvt.rn.f32.s32 %f663, %r99; - mul.f32 %f664, %f663, %f663; - setp.gt.f32 %p30, %f3022, 0f42D20000; - selp.f32 %f665, 0f00000000, %f3021, %p30; - setp.lt.f32 %p31, %f3022, 0fC2D20000; - selp.f32 %f666, 0f7F800000, %f665, %p31; - mul.f32 %f667, %f40, %f664; - neg.f32 %f668, %f667; - mul.f32 %f669, %f667, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f670, %f669; - mov.f32 %f671, 0fBF317200; - fma.rn.f32 %f672, %f670, %f671, %f668; - mov.f32 %f673, 0fB5BFBE8E; - fma.rn.f32 %f674, %f670, %f673, %f672; - mul.f32 %f675, %f674, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f676, %f675; - add.f32 %f677, %f670, 0f00000000; - ex2.approx.f32 %f678, %f677; - mul.f32 %f679, %f676, %f678; - setp.gt.f32 %p32, %f667, 0f42D20000; - selp.f32 %f680, 0f00000000, %f679, %p32; - setp.lt.f32 %p33, %f667, 0fC2D20000; - selp.f32 %f681, 0f7F800000, %f680, %p33; - mul.f32 %f682, %f666, %f681; - add.s32 %r100, %r311, %r20; - add.s32 %r101, %r100, %r4; - mul.wide.s32 %rd66, %r101, 4; - add.s64 %rd67, %rd1, %rd66; - ld.global.f32 %f683, [%rd67]; - fma.rn.f32 %f3032, %f683, %f682, %f3032; - add.f32 %f3031, %f3031, %f682; - add.s32 %r312, %r311, 1; - -BB5_29: - sub.s32 %r102, %r309, %r312; - cvt.rn.f32.s32 %f684, %r102; - mul.f32 %f685, %f684, %f684; - setp.gt.f32 %p34, %f3022, 0f42D20000; - selp.f32 %f686, 0f00000000, %f3021, %p34; - setp.lt.f32 %p35, %f3022, 0fC2D20000; - selp.f32 %f687, 0f7F800000, %f686, %p35; - mul.f32 %f688, %f40, %f685; - neg.f32 %f689, %f688; - mul.f32 %f690, %f688, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f691, %f690; - mov.f32 %f692, 0fBF317200; - fma.rn.f32 %f693, %f691, %f692, %f689; - mov.f32 %f694, 0fB5BFBE8E; - fma.rn.f32 %f695, %f691, %f694, %f693; - mul.f32 %f696, %f695, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f697, %f696; - add.f32 %f698, %f691, 0f00000000; - ex2.approx.f32 %f699, %f698; - mul.f32 %f700, %f697, %f699; - setp.gt.f32 %p36, %f688, 0f42D20000; - selp.f32 %f701, 0f00000000, %f700, %p36; - setp.lt.f32 %p37, %f688, 0fC2D20000; - selp.f32 %f702, 0f7F800000, %f701, %p37; - mul.f32 %f703, %f687, %f702; - add.s32 %r103, %r312, %r20; - add.s32 %r104, %r103, %r4; - mul.wide.s32 %rd68, %r104, 4; - add.s64 %rd69, %rd1, %rd68; - ld.global.f32 %f704, [%rd69]; - fma.rn.f32 %f3026, %f704, %f703, %f3032; - add.f32 %f3025, %f3031, %f703; - add.s32 %r314, %r312, 1; - mov.f32 %f3031, %f3025; - mov.f32 %f3032, %f3026; - -BB5_30: - setp.lt.u32 %p38, %r63, 4; - @%p38 bra BB5_33; - - mul.f32 %f705, %f50, %f50; - mul.f32 %f706, %f40, %f705; + .reg .pred %p<680>; + .reg .f32 %f<2967>; + .reg .b32 %r<789>; + .reg .f64 %fd<558>; + .reg .b64 %rd<60>; + + + ld.param.u64 %rd7, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_0]; + ld.param.u64 %rd8, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_1]; + ld.param.u64 %rd9, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_2]; + ld.param.f32 %f2886, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_3]; + ld.param.u32 %r102, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_4]; + ld.param.u32 %r105, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_10]; + cvta.to.global.u64 %rd1, %rd7; + mov.u32 %r106, %ntid.x; + mov.u32 %r107, %ctaid.x; + mov.u32 %r108, %tid.x; + mad.lo.s32 %r1, %r107, %r106, %r108; + setp.ge.s32 %p39, %r1, %r105; + @%p39 bra $L__BB5_427; + + mul.lo.s32 %r109, %r102, %r102; + mul.lo.s32 %r2, %r109, %r1; + setp.lt.s32 %p40, %r102, 1; + mov.f32 %f2816, 0f00000000; + mov.f32 %f2807, %f2816; + mov.f32 %f2808, %f2816; + mov.f32 %f2809, %f2816; + @%p40 bra $L__BB5_11; + + add.s32 %r3, %r102, -1; + and.b32 %r4, %r102, 3; + sub.s32 %r5, %r102, %r4; + shl.b32 %r6, %r102, 2; + mov.u32 %r110, 0; + setp.lt.u32 %p41, %r3, 3; + setp.eq.s32 %p43, %r4, 0; + setp.eq.s32 %p44, %r4, 1; + setp.eq.s32 %p45, %r4, 2; + cvt.s64.s32 %rd15, %r6; + mov.u32 %r774, %r110; + +$L__BB5_3: + cvt.rn.f32.s32 %f4, %r774; + mov.u32 %r777, %r110; + @%p41 bra $L__BB5_6; + + mov.u32 %r777, %r110; + mov.u32 %r776, %r5; + +$L__BB5_5: + mad.lo.s32 %r113, %r777, %r102, %r774; + add.s32 %r114, %r113, %r2; + mul.wide.s32 %rd13, %r114, 4; + add.s64 %rd14, %rd1, %rd13; + ld.global.f32 %f475, [%rd14]; + fma.rn.f32 %f476, %f475, %f4, %f2807; + cvt.rn.f32.s32 %f477, %r777; + fma.rn.f32 %f478, %f475, %f477, %f2808; + add.f32 %f479, %f2809, %f475; + add.s64 %rd16, %rd14, %rd15; + ld.global.f32 %f480, [%rd16]; + fma.rn.f32 %f481, %f480, %f4, %f476; + add.s32 %r115, %r777, 1; + cvt.rn.f32.s32 %f482, %r115; + fma.rn.f32 %f483, %f480, %f482, %f478; + add.f32 %f484, %f479, %f480; + add.s64 %rd17, %rd16, %rd15; + ld.global.f32 %f485, [%rd17]; + fma.rn.f32 %f486, %f485, %f4, %f481; + add.s32 %r116, %r777, 2; + cvt.rn.f32.s32 %f487, %r116; + fma.rn.f32 %f488, %f485, %f487, %f483; + add.f32 %f489, %f484, %f485; + add.s64 %rd18, %rd17, %rd15; + ld.global.f32 %f490, [%rd18]; + fma.rn.f32 %f2807, %f490, %f4, %f486; + add.s32 %r117, %r777, 3; + cvt.rn.f32.s32 %f491, %r117; + fma.rn.f32 %f2808, %f490, %f491, %f488; + add.f32 %f2809, %f489, %f490; + add.s32 %r777, %r777, 4; + add.s32 %r776, %r776, -4; + setp.ne.s32 %p42, %r776, 0; + @%p42 bra $L__BB5_5; + +$L__BB5_6: + @%p43 bra $L__BB5_10; + + mad.lo.s32 %r13, %r777, %r102, %r774; + add.s32 %r118, %r13, %r2; + mul.wide.s32 %rd19, %r118, 4; + add.s64 %rd20, %rd1, %rd19; + ld.global.f32 %f492, [%rd20]; + fma.rn.f32 %f2807, %f492, %f4, %f2807; + cvt.rn.f32.s32 %f493, %r777; + fma.rn.f32 %f2808, %f492, %f493, %f2808; + add.f32 %f2809, %f2809, %f492; + @%p44 bra $L__BB5_10; + + add.s32 %r14, %r13, %r102; + add.s32 %r119, %r14, %r2; + mul.wide.s32 %rd21, %r119, 4; + add.s64 %rd22, %rd1, %rd21; + ld.global.f32 %f494, [%rd22]; + fma.rn.f32 %f2807, %f494, %f4, %f2807; + add.s32 %r120, %r777, 1; + cvt.rn.f32.s32 %f495, %r120; + fma.rn.f32 %f2808, %f494, %f495, %f2808; + add.f32 %f2809, %f2809, %f494; + @%p45 bra $L__BB5_10; + + add.s32 %r121, %r777, 2; + add.s32 %r122, %r14, %r102; + add.s32 %r123, %r122, %r2; + mul.wide.s32 %rd23, %r123, 4; + add.s64 %rd24, %rd1, %rd23; + ld.global.f32 %f496, [%rd24]; + fma.rn.f32 %f2807, %f496, %f4, %f2807; + cvt.rn.f32.s32 %f497, %r121; + fma.rn.f32 %f2808, %f496, %f497, %f2808; + add.f32 %f2809, %f2809, %f496; + +$L__BB5_10: + add.s32 %r774, %r774, 1; + setp.lt.s32 %p46, %r774, %r102; + @%p46 bra $L__BB5_3; + +$L__BB5_11: + div.rn.f32 %f2890, %f2807, %f2809; + div.rn.f32 %f2889, %f2808, %f2809; + mov.f32 %f2887, 0f51BA43B7; + @%p40 bra $L__BB5_51; + + mov.f32 %f502, 0f3F000000; + div.rn.f32 %f503, %f502, %f2886; + div.rn.f32 %f504, %f503, %f2886; + cvt.f64.f32 %fd1, %f504; + mov.f64 %fd215, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd215; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p48, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p48; + mov.u32 %r124, 0; + or.b32 %r20, %r19, -2147483648; + mul.wide.s32 %rd25, %r2, 4; + add.s64 %rd2, %rd1, %rd25; + setp.eq.s32 %p50, %r17, 1062207488; + setp.lt.s32 %p51, %r16, 0; + setp.ne.s32 %p56, %r18, 1071644672; + setp.eq.s32 %p83, %r18, 2146435072; + mov.u32 %r778, %r124; + +$L__BB5_13: + mov.u32 %r779, %r124; + +$L__BB5_14: + mov.u32 %r127, 1; + sub.s32 %r24, %r127, %r779; + mov.f32 %f2819, 0f00000000; + mov.f32 %f2820, %f2819; + mov.u32 %r780, %r124; + +$L__BB5_15: + add.s32 %r782, %r779, -1; + sub.s32 %r26, %r780, %r778; + cvt.rn.f32.s32 %f507, %r26; + cvt.f64.f32 %fd2, %f507; + { + .reg .b32 %temp; + mov.b64 {%temp, %r27}, %fd2; + } + abs.f64 %fd216, %fd2; + { // callseq 96, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd216; + .param .b64 param1; + st.param.f64 [param1+0], %fd215; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 96 + setp.lt.s32 %p49, %r27, 0; + and.pred %p1, %p49, %p50; + selp.b32 %r129, %r27, 0, %p50; + or.b32 %r130, %r129, 2146435072; + selp.b32 %r28, %r130, %r129, %p51; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r131}, %fd4; + } + and.b32 %r29, %r131, 2146435072; + setp.ne.s32 %p52, %r29, 2146435072; + setp.gtu.f64 %p53, %fd216, 0d7FF0000000000000; + setp.gt.f64 %p54, %fd216, 0d3FF0000000000000; + selp.b32 %r132, 2146435072, 0, %p54; + xor.b32 %r133, %r132, 2146435072; + selp.b32 %r134, %r133, %r132, %p51; + setp.eq.s32 %p55, %r26, -1; + selp.b32 %r30, 1072693248, %r134, %p55; + and.b32 %r31, %r27, 2147483647; + and.pred %p57, %p56, %p1; + selp.b32 %r32, %r20, %r19, %p57; + or.pred %p2, %p52, %p53; + mul.lo.s32 %r135, %r102, %r780; + mul.wide.s32 %rd26, %r135, 4; + add.s64 %rd59, %rd2, %rd26; + mov.u32 %r781, %r24; + mov.u32 %r783, %r124; + +$L__BB5_16: + not.pred %p58, %p1; + mov.f64 %fd502, %fd3; + @%p58 bra $L__BB5_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r136}, %fd3; + } + xor.b32 %r137, %r136, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r138, %temp}, %fd3; + } + mov.b64 %fd502, {%r138, %r137}; + +$L__BB5_18: + setp.eq.s32 %p59, %r26, 0; + @%p59 bra $L__BB5_22; + + setp.gt.s32 %p60, %r27, -1; + @%p60 bra $L__BB5_23; + + cvt.rzi.f64.f64 %fd219, %fd215; + setp.eq.f64 %p61, %fd219, 0d4000000000000000; + @%p61 bra $L__BB5_23; + + mov.f64 %fd502, 0dFFF8000000000000; + bra.uni $L__BB5_23; + +$L__BB5_22: + mov.u32 %r139, 0; + mov.b64 %fd502, {%r139, %r28}; + +$L__BB5_23: + selp.f64 %fd503, %fd502, %fd4, %p52; + @%p2 bra $L__BB5_28; + + { + .reg .b32 %temp; + mov.b64 {%r140, %temp}, %fd215; + } + setp.eq.s32 %p64, %r140, 0; + and.pred %p65, %p83, %p64; + @%p65 bra $L__BB5_27; + bra.uni $L__BB5_25; + +$L__BB5_27: + mov.u32 %r143, 0; + mov.b64 %fd503, {%r143, %r30}; + bra.uni $L__BB5_28; + +$L__BB5_25: + setp.ne.s32 %p66, %r31, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r141, %temp}, %fd2; + } + setp.ne.s32 %p67, %r141, 0; + or.pred %p68, %p66, %p67; + mov.f64 %fd503, %fd502; + @%p68 bra $L__BB5_28; + + mov.u32 %r142, 0; + mov.b64 %fd503, {%r142, %r32}; + +$L__BB5_28: + setp.eq.s32 %p69, %r26, 1; + selp.f64 %fd222, 0d3FF0000000000000, %fd503, %p69; + mov.f64 %fd223, 0d3FF0000000000000; + mul.f64 %fd13, %fd222, %fd1; + neg.f64 %fd224, %fd13; + mov.f64 %fd225, 0d4338000000000000; + mov.f64 %fd226, 0d3FF71547652B82FE; + fma.rn.f64 %fd227, %fd224, %fd226, %fd225; + { + .reg .b32 %temp; + mov.b64 {%r36, %temp}, %fd227; + } + mov.f64 %fd228, 0dC338000000000000; + add.rn.f64 %fd229, %fd227, %fd228; + mov.f64 %fd230, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd231, %fd229, %fd230, %fd224; + mov.f64 %fd232, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd233, %fd229, %fd232, %fd231; + mov.f64 %fd234, 0d3E928AF3FCA213EA; + mov.f64 %fd235, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd236, %fd235, %fd233, %fd234; + mov.f64 %fd237, 0d3EC71DEE62401315; + fma.rn.f64 %fd238, %fd236, %fd233, %fd237; + mov.f64 %fd239, 0d3EFA01997C89EB71; + fma.rn.f64 %fd240, %fd238, %fd233, %fd239; + mov.f64 %fd241, 0d3F2A01A014761F65; + fma.rn.f64 %fd242, %fd240, %fd233, %fd241; + mov.f64 %fd243, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd244, %fd242, %fd233, %fd243; + mov.f64 %fd245, 0d3F81111111122322; + fma.rn.f64 %fd246, %fd244, %fd233, %fd245; + mov.f64 %fd247, 0d3FA55555555502A1; + fma.rn.f64 %fd248, %fd246, %fd233, %fd247; + mov.f64 %fd249, 0d3FC5555555555511; + fma.rn.f64 %fd250, %fd248, %fd233, %fd249; + mov.f64 %fd251, 0d3FE000000000000B; + fma.rn.f64 %fd252, %fd250, %fd233, %fd251; + fma.rn.f64 %fd253, %fd252, %fd233, %fd223; + fma.rn.f64 %fd254, %fd253, %fd233, %fd223; + { + .reg .b32 %temp; + mov.b64 {%r37, %temp}, %fd254; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r38}, %fd254; + } + shl.b32 %r144, %r36, 20; + add.s32 %r145, %r38, %r144; + mov.b64 %fd504, {%r37, %r145}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r146}, %fd224; + } + mov.b32 %f508, %r146; + abs.f32 %f42, %f508; + setp.lt.f32 %p70, %f42, 0f4086232B; + @%p70 bra $L__BB5_31; + + setp.gt.f64 %p71, %fd13, 0d8000000000000000; + mov.f64 %fd255, 0d7FF0000000000000; + sub.f64 %fd256, %fd255, %fd13; + selp.f64 %fd504, 0d0000000000000000, %fd256, %p71; + setp.geu.f32 %p72, %f42, 0f40874800; + @%p72 bra $L__BB5_31; + + shr.u32 %r147, %r36, 31; + add.s32 %r148, %r36, %r147; + shr.s32 %r149, %r148, 1; + shl.b32 %r150, %r149, 20; + add.s32 %r151, %r38, %r150; + mov.b64 %fd257, {%r37, %r151}; + sub.s32 %r152, %r36, %r149; + shl.b32 %r153, %r152, 20; + add.s32 %r154, %r153, 1072693248; + mov.u32 %r155, 0; + mov.b64 %fd258, {%r155, %r154}; + mul.f64 %fd504, %fd257, %fd258; + +$L__BB5_31: + add.s32 %r156, %r782, 1; + cvt.rn.f32.s32 %f509, %r156; + cvt.f64.f32 %fd18, %f509; + { + .reg .b32 %temp; + mov.b64 {%temp, %r39}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 97, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd215; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd506, [retval0+0]; + } // callseq 97 + setp.lt.s32 %p73, %r39, 0; + and.pred %p3, %p73, %p50; + not.pred %p75, %p3; + @%p75 bra $L__BB5_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r157}, %fd506; + } + xor.b32 %r158, %r157, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r159, %temp}, %fd506; + } + mov.b64 %fd506, {%r159, %r158}; + +$L__BB5_33: + setp.eq.s32 %p76, %r781, 1; + @%p76 bra $L__BB5_37; + bra.uni $L__BB5_34; + +$L__BB5_37: + mov.u32 %r160, 0; + selp.b32 %r161, %r39, 0, %p50; + or.b32 %r162, %r161, 2146435072; + selp.b32 %r163, %r162, %r161, %p51; + mov.b64 %fd506, {%r160, %r163}; + bra.uni $L__BB5_38; + +$L__BB5_34: + setp.gt.s32 %p77, %r39, -1; + @%p77 bra $L__BB5_38; + + cvt.rzi.f64.f64 %fd261, %fd215; + setp.eq.f64 %p78, %fd261, 0d4000000000000000; + @%p78 bra $L__BB5_38; + + mov.f64 %fd506, 0dFFF8000000000000; + +$L__BB5_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r164}, %fd25; + } + and.b32 %r165, %r164, 2146435072; + setp.ne.s32 %p81, %r165, 2146435072; + mov.f64 %fd507, %fd506; + @%p81 bra $L__BB5_44; + + setp.gtu.f64 %p82, %fd19, 0d7FF0000000000000; + mov.f64 %fd507, %fd25; + @%p82 bra $L__BB5_44; + + { + .reg .b32 %temp; + mov.b64 {%r166, %temp}, %fd215; + } + setp.eq.s32 %p84, %r166, 0; + and.pred %p85, %p83, %p84; + @%p85 bra $L__BB5_43; + bra.uni $L__BB5_41; + +$L__BB5_43: + mov.u32 %r171, 0; + setp.gt.f64 %p92, %fd19, 0d3FF0000000000000; + selp.b32 %r172, 2146435072, 0, %p92; + xor.b32 %r173, %r172, 2146435072; + selp.b32 %r174, %r173, %r172, %p51; + setp.eq.s32 %p93, %r782, -2; + selp.b32 %r175, 1072693248, %r174, %p93; + mov.b64 %fd507, {%r171, %r175}; + bra.uni $L__BB5_44; + +$L__BB5_41: + { + .reg .b32 %temp; + mov.b64 {%r167, %temp}, %fd18; + } + and.b32 %r168, %r39, 2147483647; + setp.ne.s32 %p86, %r168, 2146435072; + setp.ne.s32 %p87, %r167, 0; + or.pred %p88, %p86, %p87; + mov.f64 %fd507, %fd506; + @%p88 bra $L__BB5_44; + + and.pred %p90, %p56, %p3; + selp.b32 %r169, %r20, %r19, %p90; + mov.u32 %r170, 0; + mov.b64 %fd507, {%r170, %r169}; + +$L__BB5_44: + mov.f64 %fd500, 0d3FF0000000000000; + mov.f64 %fd499, 0d3FE000000000000B; + mov.f64 %fd498, 0d3FC5555555555511; + mov.f64 %fd497, 0d3FA55555555502A1; + mov.f64 %fd496, 0d3F81111111122322; + mov.f64 %fd495, 0d3F56C16C1852B7AF; + mov.f64 %fd494, 0d3F2A01A014761F65; + mov.f64 %fd493, 0d3EFA01997C89EB71; + mov.f64 %fd492, 0d3EC71DEE62401315; + mov.f64 %fd491, 0d3E928AF3FCA213EA; + mov.f64 %fd490, 0d3E5ADE1569CE2BDF; + mov.f64 %fd489, 0dBC7ABC9E3B39803F; + mov.f64 %fd488, 0dBFE62E42FEFA39EF; + mov.f64 %fd487, 0dC338000000000000; + mov.f64 %fd486, 0d4338000000000000; + mov.f64 %fd485, 0d3FF71547652B82FE; + setp.eq.s32 %p94, %r782, 0; + selp.f64 %fd264, 0d3FF0000000000000, %fd507, %p94; + mul.f64 %fd29, %fd264, %fd1; + neg.f64 %fd266, %fd29; + fma.rn.f64 %fd269, %fd266, %fd485, %fd486; + { + .reg .b32 %temp; + mov.b64 {%r40, %temp}, %fd269; + } + add.rn.f64 %fd271, %fd269, %fd487; + fma.rn.f64 %fd273, %fd271, %fd488, %fd266; + fma.rn.f64 %fd275, %fd271, %fd489, %fd273; + fma.rn.f64 %fd278, %fd490, %fd275, %fd491; + fma.rn.f64 %fd280, %fd278, %fd275, %fd492; + fma.rn.f64 %fd282, %fd280, %fd275, %fd493; + fma.rn.f64 %fd284, %fd282, %fd275, %fd494; + fma.rn.f64 %fd286, %fd284, %fd275, %fd495; + fma.rn.f64 %fd288, %fd286, %fd275, %fd496; + fma.rn.f64 %fd290, %fd288, %fd275, %fd497; + fma.rn.f64 %fd292, %fd290, %fd275, %fd498; + fma.rn.f64 %fd294, %fd292, %fd275, %fd499; + fma.rn.f64 %fd295, %fd294, %fd275, %fd500; + fma.rn.f64 %fd296, %fd295, %fd275, %fd500; + { + .reg .b32 %temp; + mov.b64 {%r41, %temp}, %fd296; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r42}, %fd296; + } + shl.b32 %r176, %r40, 20; + add.s32 %r177, %r42, %r176; + mov.b64 %fd508, {%r41, %r177}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r178}, %fd266; + } + mov.b32 %f510, %r178; + abs.f32 %f43, %f510; + setp.lt.f32 %p95, %f43, 0f4086232B; + @%p95 bra $L__BB5_47; + + setp.gt.f64 %p96, %fd29, 0d8000000000000000; + mov.f64 %fd297, 0d7FF0000000000000; + sub.f64 %fd298, %fd297, %fd29; + selp.f64 %fd508, 0d0000000000000000, %fd298, %p96; + setp.geu.f32 %p97, %f43, 0f40874800; + @%p97 bra $L__BB5_47; + + shr.u32 %r179, %r40, 31; + add.s32 %r180, %r40, %r179; + shr.s32 %r181, %r180, 1; + shl.b32 %r182, %r181, 20; + add.s32 %r183, %r42, %r182; + mov.b64 %fd299, {%r41, %r183}; + sub.s32 %r184, %r40, %r181; + shl.b32 %r185, %r184, 20; + add.s32 %r186, %r185, 1072693248; + mov.u32 %r187, 0; + mov.b64 %fd300, {%r187, %r186}; + mul.f64 %fd508, %fd299, %fd300; + +$L__BB5_47: + ld.global.f32 %f511, [%rd59]; + cvt.f64.f32 %fd301, %f511; + mul.f64 %fd302, %fd504, %fd508; + cvt.f64.f32 %fd303, %f2820; + fma.rn.f64 %fd304, %fd302, %fd301, %fd303; + cvt.rn.f32.f64 %f2820, %fd304; + cvt.f64.f32 %fd305, %f2819; + add.f64 %fd306, %fd302, %fd305; + cvt.rn.f32.f64 %f2819, %fd306; + add.s32 %r782, %r782, -1; + add.s32 %r781, %r781, 1; + add.s64 %rd59, %rd59, 4; + add.s32 %r783, %r783, 1; + setp.lt.s32 %p98, %r783, %r102; + @%p98 bra $L__BB5_16; + + add.s32 %r780, %r780, 1; + setp.lt.s32 %p99, %r780, %r102; + @%p99 bra $L__BB5_15; + + div.rn.f32 %f512, %f2820, %f2819; + max.f32 %f2816, %f2816, %f512; + min.f32 %f2887, %f2887, %f512; + add.s32 %r779, %r779, 1; + setp.lt.s32 %p100, %r779, %r102; + @%p100 bra $L__BB5_14; + + add.s32 %r778, %r778, 1; + setp.lt.s32 %p101, %r778, %r102; + @%p101 bra $L__BB5_13; + +$L__BB5_51: + ld.param.u32 %r771, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_6]; + mov.f32 %f2794, 0f00000000; + sub.f32 %f514, %f2816, %f2887; + add.f32 %f515, %f514, %f514; + fma.rn.f32 %f516, %f514, 0f40000000, %f515; + mul.f32 %f517, %f516, 0f40490FD8; + mul.f32 %f518, %f517, %f2886; + mul.f32 %f519, %f518, %f2886; + max.f32 %f2888, %f2794, %f519; + setp.lt.s32 %p102, %r771, 1; + @%p102 bra $L__BB5_337; + + ld.param.u32 %r772, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_5]; + cvt.rn.f32.s32 %f522, %r102; + mul.f32 %f51, %f522, 0f3F000000; + cvt.rn.f32.s32 %f52, %r772; + mov.u32 %r784, 0; + cvta.to.global.u64 %rd27, %rd8; + mov.f64 %fd309, 0d4008000000000000; + cvta.to.global.u64 %rd33, %rd9; + +$L__BB5_53: + mov.f32 %f2839, 0f00000000; + mov.f32 %f2840, %f2839; + mov.f32 %f2841, %f2839; + mov.f32 %f2842, %f2839; + mov.f32 %f2843, %f2839; + mov.f32 %f2844, %f2839; + mov.f32 %f2845, %f2839; + mov.f32 %f2846, %f2839; + mov.f32 %f2847, %f2839; + mov.f32 %f2848, %f2839; + @%p40 bra $L__BB5_336; + + mov.f32 %f2839, 0f00000000; + mov.f32 %f543, 0f3F000000; + div.rn.f32 %f544, %f543, %f2886; + div.rn.f32 %f59, %f544, %f2886; + div.rn.f32 %f545, %f2888, 0fC0206C98; + div.rn.f32 %f60, %f545, %f2886; + cvt.f64.f32 %fd34, %f545; + cvt.f64.f32 %fd307, %f2886; + add.f64 %fd35, %fd307, 0d4008000000000000; + div.rn.f32 %f61, %f60, %f2886; + mov.f32 %f546, 0fC0000000; + div.rn.f32 %f62, %f546, %f2886; + div.rn.f32 %f547, %f2888, 0f40206C98; + cvt.f64.f32 %fd36, %f547; + shl.b32 %r194, %r1, 1; + mul.wide.s32 %rd28, %r194, 4; + add.s64 %rd29, %rd27, %rd28; + ld.global.f32 %f63, [%rd29+4]; + ld.global.f32 %f64, [%rd29]; + mov.u32 %r785, 0; + +$L__BB5_55: + cvt.f64.f32 %fd477, %f2886; + mov.u32 %r786, 0; + mov.f32 %f2667, 0f00000000; + cvt.rn.f32.s32 %f75, %r785; + sub.f32 %f76, %f75, %f2890; + add.f32 %f77, %f76, 0f3F000000; + sqrt.rn.f32 %f78, %f59; + mul.f32 %f548, %f77, %f78; + abs.f32 %f79, %f548; + setp.ge.f32 %p104, %f79, 0f3F8060FE; + mul.f32 %f549, %f548, %f548; + selp.f32 %f550, %f79, %f549, %p104; + selp.f32 %f551, 0f3789CA3C, 0f38B1E96A, %p104; + selp.f32 %f552, 0fB9F560B9, 0fBA574D20, %p104; + fma.rn.f32 %f553, %f551, %f550, %f552; + selp.f32 %f554, 0f3BAC840B, 0f3BAAD5EA, %p104; + fma.rn.f32 %f555, %f553, %f550, %f554; + selp.f32 %f556, 0fBD0C8162, 0fBCDC1BE7, %p104; + fma.rn.f32 %f557, %f555, %f550, %f556; + selp.f32 %f558, 0f3E1CF906, 0f3DE718AF, %p104; + fma.rn.f32 %f559, %f557, %f550, %f558; + selp.f32 %f560, 0f3F6A937E, 0fBEC093AC, %p104; + fma.rn.f32 %f561, %f559, %f550, %f560; + selp.f32 %f562, 0f3F20D842, 0f3E0375D3, %p104; + fma.rn.f32 %f563, %f561, %f550, %f562; + neg.f32 %f564, %f79; + selp.f32 %f565, %f564, %f548, %p104; + fma.rn.f32 %f80, %f563, %f565, %f565; + mov.b32 %r196, %f548; + and.b32 %r51, %r196, -2147483648; + add.f32 %f81, %f76, 0fBF000000; + mul.f32 %f566, %f81, %f78; + abs.f32 %f82, %f566; + setp.ge.f32 %p105, %f82, 0f3F8060FE; + mul.f32 %f567, %f566, %f566; + selp.f32 %f568, %f82, %f567, %p105; + selp.f32 %f569, 0f3789CA3C, 0f38B1E96A, %p105; + selp.f32 %f570, 0fB9F560B9, 0fBA574D20, %p105; + fma.rn.f32 %f571, %f569, %f568, %f570; + selp.f32 %f572, 0f3BAC840B, 0f3BAAD5EA, %p105; + fma.rn.f32 %f573, %f571, %f568, %f572; + selp.f32 %f574, 0fBD0C8162, 0fBCDC1BE7, %p105; + fma.rn.f32 %f575, %f573, %f568, %f574; + selp.f32 %f576, 0f3E1CF906, 0f3DE718AF, %p105; + fma.rn.f32 %f577, %f575, %f568, %f576; + selp.f32 %f578, 0f3F6A937E, 0fBEC093AC, %p105; + fma.rn.f32 %f579, %f577, %f568, %f578; + selp.f32 %f580, 0f3F20D842, 0f3E0375D3, %p105; + fma.rn.f32 %f581, %f579, %f568, %f580; + neg.f32 %f582, %f82; + selp.f32 %f583, %f582, %f566, %p105; + fma.rn.f32 %f83, %f581, %f583, %f583; + mov.b32 %r197, %f566; + and.b32 %r52, %r197, -2147483648; + add.f32 %f584, %f75, 0f3F000000; + sub.f32 %f84, %f584, %f2890; + div.rn.f32 %f85, %f84, %f2886; + mov.f32 %f585, 0f3F800000; + cvt.rzi.f32.f32 %f586, %f585; + add.f32 %f587, %f586, %f586; + mov.f32 %f588, 0f40000000; + sub.f32 %f589, %f588, %f587; + abs.f32 %f86, %f589; + setp.eq.f32 %p106, %f86, 0f3F800000; + abs.f32 %f87, %f85; + setp.lt.f32 %p107, %f87, 0f00800000; + mul.f32 %f590, %f87, 0f4B800000; + selp.f32 %f591, %f590, %f87, %p107; + selp.f32 %f592, 0fC3170000, 0fC2FE0000, %p107; + mov.b32 %r198, %f591; + and.b32 %r199, %r198, 8388607; + or.b32 %r200, %r199, 1065353216; + mov.b32 %f593, %r200; + shr.u32 %r201, %r198, 23; + cvt.rn.f32.u32 %f594, %r201; + add.f32 %f595, %f592, %f594; + setp.gt.f32 %p108, %f593, 0f3FB504F3; + mul.f32 %f596, %f593, 0f3F000000; + add.f32 %f597, %f595, 0f3F800000; + selp.f32 %f598, %f597, %f595, %p108; + selp.f32 %f599, %f596, %f593, %p108; + add.f32 %f600, %f599, 0fBF800000; + add.f32 %f601, %f599, 0f3F800000; + rcp.approx.ftz.f32 %f602, %f601; + add.f32 %f603, %f600, %f600; + mul.f32 %f604, %f603, %f602; + mul.f32 %f605, %f604, %f604; + mov.f32 %f606, 0f3C4CAF63; + mov.f32 %f607, 0f3B18F0FE; + fma.rn.f32 %f608, %f607, %f605, %f606; + mov.f32 %f609, 0f3DAAAABD; + fma.rn.f32 %f610, %f608, %f605, %f609; + mul.rn.f32 %f611, %f610, %f605; + mul.rn.f32 %f612, %f611, %f604; + sub.f32 %f613, %f600, %f604; + add.f32 %f614, %f613, %f613; + neg.f32 %f615, %f604; + fma.rn.f32 %f616, %f615, %f600, %f614; + mul.rn.f32 %f617, %f602, %f616; + add.f32 %f618, %f612, %f604; + sub.f32 %f619, %f604, %f618; + add.f32 %f620, %f612, %f619; + add.f32 %f621, %f617, %f620; + add.f32 %f622, %f618, %f621; + sub.f32 %f623, %f618, %f622; + add.f32 %f624, %f621, %f623; + mov.f32 %f625, 0f3F317200; + mul.rn.f32 %f626, %f598, %f625; + mov.f32 %f627, 0f35BFBE8E; + mul.rn.f32 %f628, %f598, %f627; + add.f32 %f629, %f626, %f622; + sub.f32 %f630, %f626, %f629; + add.f32 %f631, %f622, %f630; + add.f32 %f632, %f624, %f631; + add.f32 %f633, %f628, %f632; + add.f32 %f634, %f629, %f633; + sub.f32 %f635, %f629, %f634; + add.f32 %f636, %f633, %f635; + mul.rn.f32 %f637, %f588, %f634; + neg.f32 %f638, %f637; + fma.rn.f32 %f639, %f588, %f634, %f638; + fma.rn.f32 %f640, %f588, %f636, %f639; + fma.rn.f32 %f642, %f2667, %f634, %f640; + add.rn.f32 %f643, %f637, %f642; + neg.f32 %f644, %f643; + add.rn.f32 %f645, %f637, %f644; + add.rn.f32 %f646, %f645, %f642; + mov.b32 %r202, %f643; + setp.eq.s32 %p109, %r202, 1118925336; + add.s32 %r203, %r202, -1; + mov.b32 %f647, %r203; + add.f32 %f648, %f646, 0f37000000; + selp.f32 %f88, %f648, %f646, %p109; + selp.f32 %f649, %f647, %f643, %p109; + mov.f32 %f650, 0f3FB8AA3B; + mul.rn.f32 %f651, %f649, %f650; + cvt.rzi.f32.f32 %f652, %f651; + abs.f32 %f653, %f652; + setp.gt.f32 %p110, %f653, 0f42FC0000; + mov.b32 %r204, %f652; + and.b32 %r205, %r204, -2147483648; + or.b32 %r206, %r205, 1123811328; + mov.b32 %f654, %r206; + selp.f32 %f655, %f654, %f652, %p110; + mov.f32 %f656, 0fBF317218; + fma.rn.f32 %f657, %f655, %f656, %f649; + mov.f32 %f658, 0f3102E308; + fma.rn.f32 %f659, %f655, %f658, %f657; + mul.f32 %f660, %f659, 0f3FB8AA3B; + add.f32 %f661, %f655, 0f4B40007F; + mov.b32 %r207, %f661; + shl.b32 %r208, %r207, 23; + mov.b32 %f662, %r208; + ex2.approx.ftz.f32 %f663, %f660; + mul.f32 %f89, %f663, %f662; + setp.lt.f32 %p111, %f85, 0f00000000; + and.pred %p4, %p111, %p106; + div.rn.f32 %f90, %f81, %f2886; + abs.f32 %f91, %f90; + setp.lt.f32 %p112, %f91, 0f00800000; + mul.f32 %f664, %f91, 0f4B800000; + selp.f32 %f665, %f664, %f91, %p112; + selp.f32 %f666, 0fC3170000, 0fC2FE0000, %p112; + mov.b32 %r209, %f665; + and.b32 %r210, %r209, 8388607; + or.b32 %r211, %r210, 1065353216; + mov.b32 %f667, %r211; + shr.u32 %r212, %r209, 23; + cvt.rn.f32.u32 %f668, %r212; + add.f32 %f669, %f666, %f668; + setp.gt.f32 %p113, %f667, 0f3FB504F3; + mul.f32 %f670, %f667, 0f3F000000; + add.f32 %f671, %f669, 0f3F800000; + selp.f32 %f672, %f671, %f669, %p113; + selp.f32 %f673, %f670, %f667, %p113; + add.f32 %f674, %f673, 0fBF800000; + add.f32 %f675, %f673, 0f3F800000; + rcp.approx.ftz.f32 %f676, %f675; + add.f32 %f677, %f674, %f674; + mul.f32 %f678, %f677, %f676; + mul.f32 %f679, %f678, %f678; + fma.rn.f32 %f680, %f607, %f679, %f606; + fma.rn.f32 %f681, %f680, %f679, %f609; + mul.rn.f32 %f682, %f681, %f679; + mul.rn.f32 %f683, %f682, %f678; + sub.f32 %f684, %f674, %f678; + add.f32 %f685, %f684, %f684; + neg.f32 %f686, %f678; + fma.rn.f32 %f687, %f686, %f674, %f685; + mul.rn.f32 %f688, %f676, %f687; + add.f32 %f689, %f683, %f678; + sub.f32 %f690, %f678, %f689; + add.f32 %f691, %f683, %f690; + add.f32 %f692, %f688, %f691; + add.f32 %f693, %f689, %f692; + sub.f32 %f694, %f689, %f693; + add.f32 %f695, %f692, %f694; + mul.rn.f32 %f696, %f672, %f625; + mul.rn.f32 %f697, %f672, %f627; + add.f32 %f698, %f696, %f693; + sub.f32 %f699, %f696, %f698; + add.f32 %f700, %f693, %f699; + add.f32 %f701, %f695, %f700; + add.f32 %f702, %f697, %f701; + add.f32 %f703, %f698, %f702; + sub.f32 %f704, %f698, %f703; + add.f32 %f705, %f702, %f704; + mul.rn.f32 %f706, %f588, %f703; neg.f32 %f707, %f706; - mul.f32 %f708, %f706, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f709, %f708; - mov.f32 %f710, 0fBF317200; - fma.rn.f32 %f711, %f709, %f710, %f707; - mov.f32 %f712, 0fB5BFBE8E; - fma.rn.f32 %f713, %f709, %f712, %f711; - mul.f32 %f714, %f713, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f715, %f714; - add.f32 %f716, %f709, 0f00000000; - ex2.approx.f32 %f717, %f716; - mul.f32 %f718, %f715, %f717; - setp.gt.f32 %p39, %f706, 0f42D20000; - selp.f32 %f719, 0f00000000, %f718, %p39; - setp.lt.f32 %p40, %f706, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f719, %p40; - mov.f32 %f3031, %f3025; - mov.f32 %f3032, %f3026; - -BB5_32: - sub.s32 %r105, %r309, %r314; - cvt.rn.f32.s32 %f720, %r105; - mul.f32 %f721, %f720, %f720; - mul.f32 %f722, %f40, %f721; - neg.f32 %f723, %f722; - mul.f32 %f724, %f722, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f725, %f724; - fma.rn.f32 %f727, %f725, %f710, %f723; - fma.rn.f32 %f729, %f725, %f712, %f727; - mul.f32 %f730, %f729, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f731, %f730; - add.f32 %f732, %f725, 0f00000000; - ex2.approx.f32 %f733, %f732; - mul.f32 %f734, %f731, %f733; - setp.gt.f32 %p41, %f722, 0f42D20000; - selp.f32 %f735, 0f00000000, %f734, %p41; - setp.lt.f32 %p42, %f722, 0fC2D20000; - selp.f32 %f736, 0f7F800000, %f735, %p42; - mul.f32 %f737, %f75, %f736; - add.s32 %r106, %r314, %r20; - add.s32 %r107, %r106, %r4; - mul.wide.s32 %rd70, %r107, 4; - add.s64 %rd71, %rd1, %rd70; - ld.global.f32 %f738, [%rd71]; - fma.rn.f32 %f739, %f738, %f737, %f3032; - add.f32 %f740, %f3031, %f737; - add.s32 %r108, %r314, 1; - sub.s32 %r109, %r309, %r108; - cvt.rn.f32.s32 %f741, %r109; - mul.f32 %f742, %f741, %f741; - mul.f32 %f743, %f40, %f742; - neg.f32 %f744, %f743; - mul.f32 %f745, %f743, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f746, %f745; - fma.rn.f32 %f747, %f746, %f710, %f744; - fma.rn.f32 %f748, %f746, %f712, %f747; - mul.f32 %f749, %f748, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f750, %f749; - add.f32 %f751, %f746, 0f00000000; - ex2.approx.f32 %f752, %f751; - mul.f32 %f753, %f750, %f752; - setp.gt.f32 %p43, %f743, 0f42D20000; - selp.f32 %f754, 0f00000000, %f753, %p43; - setp.lt.f32 %p44, %f743, 0fC2D20000; - selp.f32 %f755, 0f7F800000, %f754, %p44; - mul.f32 %f756, %f75, %f755; - ld.global.f32 %f757, [%rd71+4]; - fma.rn.f32 %f758, %f757, %f756, %f739; - add.f32 %f759, %f740, %f756; - add.s32 %r110, %r314, 2; - sub.s32 %r111, %r309, %r110; - cvt.rn.f32.s32 %f760, %r111; - mul.f32 %f761, %f760, %f760; - mul.f32 %f762, %f40, %f761; - neg.f32 %f763, %f762; - mul.f32 %f764, %f762, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f765, %f764; - fma.rn.f32 %f766, %f765, %f710, %f763; - fma.rn.f32 %f767, %f765, %f712, %f766; - mul.f32 %f768, %f767, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f769, %f768; - add.f32 %f770, %f765, 0f00000000; - ex2.approx.f32 %f771, %f770; - mul.f32 %f772, %f769, %f771; - setp.gt.f32 %p45, %f762, 0f42D20000; - selp.f32 %f773, 0f00000000, %f772, %p45; - setp.lt.f32 %p46, %f762, 0fC2D20000; - selp.f32 %f774, 0f7F800000, %f773, %p46; - mul.f32 %f775, %f75, %f774; - ld.global.f32 %f776, [%rd71+8]; - fma.rn.f32 %f777, %f776, %f775, %f758; - add.f32 %f778, %f759, %f775; - add.s32 %r112, %r314, 3; - sub.s32 %r113, %r309, %r112; - cvt.rn.f32.s32 %f779, %r113; - mul.f32 %f780, %f779, %f779; - mul.f32 %f781, %f40, %f780; - neg.f32 %f782, %f781; - mul.f32 %f783, %f781, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f784, %f783; - fma.rn.f32 %f785, %f784, %f710, %f782; - fma.rn.f32 %f786, %f784, %f712, %f785; - mul.f32 %f787, %f786, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f788, %f787; - add.f32 %f789, %f784, 0f00000000; - ex2.approx.f32 %f790, %f789; - mul.f32 %f791, %f788, %f790; - setp.gt.f32 %p47, %f781, 0f42D20000; - selp.f32 %f792, 0f00000000, %f791, %p47; - setp.lt.f32 %p48, %f781, 0fC2D20000; - selp.f32 %f793, 0f7F800000, %f792, %p48; - mul.f32 %f794, %f75, %f793; - ld.global.f32 %f795, [%rd71+12]; - fma.rn.f32 %f3032, %f795, %f794, %f777; - add.f32 %f3031, %f778, %f794; - add.s32 %r314, %r314, 4; - setp.lt.s32 %p49, %r314, %r63; - @%p49 bra BB5_32; - -BB5_33: - add.s32 %r310, %r310, 1; - setp.lt.s32 %p50, %r310, %r63; - @%p50 bra BB5_21; - - div.rn.f32 %f796, %f3032, %f3031; - max.f32 %f3014, %f3014, %f796; - min.f32 %f3013, %f3013, %f796; - add.s32 %r309, %r309, 1; - setp.lt.s32 %p51, %r309, %r63; - @%p51 bra BB5_20; - - add.s32 %r308, %r308, 1; - setp.lt.s32 %p52, %r308, %r63; - @%p52 bra BB5_19; - -BB5_36: - sub.f32 %f798, %f3014, %f3013; - add.f32 %f799, %f798, %f798; - fma.rn.f32 %f800, %f798, 0f40000000, %f799; - mul.f32 %f801, %f800, 0f40490FD8; - mul.f32 %f802, %f801, %f3098; - mul.f32 %f803, %f802, %f3098; - max.f32 %f3100, %f572, %f803; - setp.lt.s32 %p53, %r65, 1; - @%p53 bra BB5_120; - - cvt.rn.f32.s32 %f806, %r63; - mul.f32 %f87, %f806, 0f3F000000; - cvt.rn.f32.s32 %f88, %r64; - mov.u32 %r315, 0; - cvta.to.global.u64 %rd72, %rd45; - cvta.to.global.u64 %rd78, %rd46; - -BB5_38: - mov.f32 %f3051, 0f00000000; - mov.f32 %f3052, %f3051; - mov.f32 %f3053, %f3051; - mov.f32 %f3054, %f3051; - mov.f32 %f3055, %f3051; - mov.f32 %f3056, %f3051; - mov.f32 %f3057, %f3051; - mov.f32 %f3058, %f3051; - mov.f32 %f3059, %f3051; - mov.f32 %f3060, %f3051; - @%p15 bra BB5_119; - - div.rn.f32 %f828, %f603, %f3098; - div.rn.f32 %f95, %f828, %f3098; - neg.f32 %f829, %f3100; - div.rn.f32 %f830, %f829, 0f40206C98; - div.rn.f32 %f96, %f830, %f3098; - div.rn.f32 %f97, %f96, %f3098; - mov.f32 %f831, 0fC0000000; - div.rn.f32 %f98, %f831, %f3098; - mul.f32 %f832, %f3098, %f3098; - mul.f32 %f833, %f3098, %f832; - div.rn.f32 %f99, %f830, %f833; - mul.f32 %f834, %f832, %f832; - mul.f32 %f835, %f3098, %f834; - div.rn.f32 %f836, %f3100, 0f40206C98; - div.rn.f32 %f100, %f836, %f835; - mov.u32 %r316, 0; - mov.f32 %f3051, 0f00000000; - mov.f32 %f3052, %f3051; - mov.f32 %f3053, %f3051; - mov.f32 %f3054, %f3051; - mov.f32 %f3055, %f3051; - mov.f32 %f3056, %f3051; - mov.f32 %f3057, %f3051; - mov.f32 %f3058, %f3051; - mov.f32 %f3059, %f3051; - mov.f32 %f3060, %f3051; - -BB5_40: - mov.u32 %r317, 0; - cvt.rn.f32.s32 %f111, %r316; - sub.f32 %f112, %f111, %f3102; - add.f32 %f113, %f112, 0f3F800000; - sqrt.rn.f32 %f114, %f95; - mul.f32 %f115, %f113, %f114; - abs.f32 %f116, %f115; - mul.f32 %f117, %f115, %f115; - mul.f32 %f118, %f112, %f114; - abs.f32 %f119, %f118; - shl.b32 %r121, %r1, 1; - mul.wide.s32 %rd73, %r121, 4; - add.s64 %rd74, %rd72, %rd73; - ld.global.f32 %f121, [%rd74+4]; - ld.global.f32 %f122, [%rd74]; - add.f32 %f837, %f111, 0f3F800000; - sub.f32 %f123, %f837, %f3102; - div.rn.f32 %f124, %f123, %f3098; - mov.f32 %f838, 0f3F800000; - cvt.rzi.f32.f32 %f839, %f838; - add.f32 %f840, %f839, %f839; - mov.f32 %f841, 0f40000000; - sub.f32 %f842, %f841, %f840; - abs.f32 %f125, %f842; - setp.eq.f32 %p55, %f125, 0f3F800000; - abs.f32 %f126, %f124; - setp.lt.f32 %p56, %f126, 0f00800000; - mul.f32 %f843, %f126, 0f4B800000; - selp.f32 %f844, 0fC3170000, 0fC2FE0000, %p56; - selp.f32 %f845, %f843, %f126, %p56; - mov.b32 %r122, %f845; - and.b32 %r123, %r122, 8388607; - or.b32 %r124, %r123, 1065353216; - mov.b32 %f846, %r124; - shr.u32 %r125, %r122, 23; - cvt.rn.f32.u32 %f847, %r125; - add.f32 %f848, %f844, %f847; - setp.gt.f32 %p57, %f846, 0f3FB504F3; - mul.f32 %f849, %f846, 0f3F000000; - add.f32 %f850, %f848, 0f3F800000; - selp.f32 %f851, %f849, %f846, %p57; - selp.f32 %f852, %f850, %f848, %p57; - add.f32 %f127, %f851, 0fBF800000; - add.f32 %f128, %f851, 0f3F800000; - add.f32 %f129, %f127, %f127; - mov.f32 %f853, 0f3F317200; - mul.rn.f32 %f130, %f852, %f853; - mov.f32 %f854, 0f35BFBE8E; - mul.rn.f32 %f131, %f852, %f854; - setp.lt.f32 %p58, %f124, 0f00000000; - and.pred %p1, %p58, %p55; - add.f32 %f855, %f124, %f124; - selp.f32 %f132, %f855, 0f00000000, %p55; - div.rn.f32 %f133, %f112, %f3098; - abs.f32 %f134, %f133; - setp.lt.f32 %p59, %f134, 0f00800000; - mul.f32 %f857, %f134, 0f4B800000; - selp.f32 %f858, 0fC3170000, 0fC2FE0000, %p59; - selp.f32 %f859, %f857, %f134, %p59; - mov.b32 %r126, %f859; - and.b32 %r127, %r126, 8388607; - or.b32 %r128, %r127, 1065353216; - mov.b32 %f860, %r128; - shr.u32 %r129, %r126, 23; - cvt.rn.f32.u32 %f861, %r129; - add.f32 %f862, %f858, %f861; - setp.gt.f32 %p60, %f860, 0f3FB504F3; - mul.f32 %f863, %f860, 0f3F000000; - add.f32 %f864, %f862, 0f3F800000; - selp.f32 %f865, %f863, %f860, %p60; - selp.f32 %f866, %f864, %f862, %p60; - add.f32 %f135, %f865, 0fBF800000; - add.f32 %f136, %f865, 0f3F800000; - add.f32 %f137, %f135, %f135; - mul.rn.f32 %f138, %f866, %f853; - mul.rn.f32 %f139, %f866, %f854; - setp.lt.f32 %p61, %f133, 0f00000000; - and.pred %p2, %p61, %p55; - add.f32 %f867, %f133, %f133; - selp.f32 %f140, %f867, 0f00000000, %p55; - mul.f32 %f869, %f113, %f113; - mul.f32 %f141, %f113, %f869; - -BB5_41: - setp.ltu.f32 %p62, %f116, 0f3F800000; - @%p62 bra BB5_43; - bra.uni BB5_42; - -BB5_43: - mov.f32 %f889, 0f3BA0C9F8; - mov.f32 %f890, 0fBA1268FB; - fma.rn.f32 %f891, %f890, %f117, %f889; - mov.f32 %f892, 0fBCDABFD4; - fma.rn.f32 %f893, %f891, %f117, %f892; - mov.f32 %f894, 0f3DE70331; - fma.rn.f32 %f895, %f893, %f117, %f894; - mov.f32 %f896, 0fBEC09330; - fma.rn.f32 %f897, %f895, %f117, %f896; - mov.f32 %f898, 0f3F906EBA; - fma.rn.f32 %f899, %f897, %f117, %f898; - mul.f32 %f3061, %f115, %f899; - bra.uni BB5_44; - -BB5_42: - mov.f32 %f2822, 0f3F800000; - setp.ltu.f32 %p63, %f116, 0f407AD445; - mov.f32 %f871, 0f3A03BB71; - mov.f32 %f872, 0fB7B730FB; - fma.rn.f32 %f873, %f872, %f116, %f871; - mov.f32 %f874, 0fBBACA3B3; - fma.rn.f32 %f875, %f873, %f116, %f874; - mov.f32 %f876, 0f3D0A7445; - fma.rn.f32 %f877, %f875, %f116, %f876; - mov.f32 %f878, 0fBE1B3B75; - fma.rn.f32 %f879, %f877, %f116, %f878; - mov.f32 %f880, 0fBF6B385A; - fma.rn.f32 %f881, %f879, %f116, %f880; - mov.f32 %f882, 0fBFD0316E; - fma.rn.f32 %f883, %f881, %f116, %f882; - mov.f32 %f884, 0fBA031CCE; - fma.rn.f32 %f885, %f883, %f116, %f884; - ex2.approx.ftz.f32 %f886, %f885; - sub.f32 %f888, %f2822, %f886; - mov.b32 %r130, %f888; - selp.b32 %r131, %r130, 1065353216, %p63; - mov.b32 %r132, %f115; - and.b32 %r133, %r132, -2147483648; - or.b32 %r134, %r131, %r133; - mov.b32 %f3061, %r134; - -BB5_44: - setp.ltu.f32 %p64, %f119, 0f3F800000; - @%p64 bra BB5_46; - bra.uni BB5_45; - -BB5_46: - cvt.rn.f32.s32 %f2864, %r316; - sub.f32 %f2863, %f2864, %f3102; - mul.f32 %f2862, %f2863, %f114; - mul.f32 %f2861, %f2862, %f2862; - mov.f32 %f918, 0f3BA0C9F8; - mov.f32 %f919, 0fBA1268FB; - fma.rn.f32 %f920, %f919, %f2861, %f918; - mov.f32 %f921, 0fBCDABFD4; - fma.rn.f32 %f922, %f920, %f2861, %f921; - mov.f32 %f923, 0f3DE70331; - fma.rn.f32 %f924, %f922, %f2861, %f923; - mov.f32 %f925, 0fBEC09330; - fma.rn.f32 %f926, %f924, %f2861, %f925; - mov.f32 %f927, 0f3F906EBA; - fma.rn.f32 %f928, %f926, %f2861, %f927; - mul.f32 %f3062, %f2862, %f928; - bra.uni BB5_47; - -BB5_45: - cvt.rn.f32.s32 %f2877, %r316; - sub.f32 %f2876, %f2877, %f3102; - mul.f32 %f2875, %f2876, %f114; - mov.f32 %f2823, 0f3F800000; - setp.ltu.f32 %p65, %f119, 0f407AD445; - mov.f32 %f900, 0f3A03BB71; - mov.f32 %f901, 0fB7B730FB; - fma.rn.f32 %f902, %f901, %f119, %f900; - mov.f32 %f903, 0fBBACA3B3; - fma.rn.f32 %f904, %f902, %f119, %f903; - mov.f32 %f905, 0f3D0A7445; - fma.rn.f32 %f906, %f904, %f119, %f905; - mov.f32 %f907, 0fBE1B3B75; - fma.rn.f32 %f908, %f906, %f119, %f907; - mov.f32 %f909, 0fBF6B385A; - fma.rn.f32 %f910, %f908, %f119, %f909; - mov.f32 %f911, 0fBFD0316E; - fma.rn.f32 %f912, %f910, %f119, %f911; - mov.f32 %f913, 0fBA031CCE; - fma.rn.f32 %f914, %f912, %f119, %f913; - ex2.approx.ftz.f32 %f915, %f914; - sub.f32 %f917, %f2823, %f915; - mov.b32 %r135, %f917; - selp.b32 %r136, %r135, 1065353216, %p65; - mov.b32 %r137, %f2875; - and.b32 %r138, %r137, -2147483648; - or.b32 %r139, %r136, %r138; - mov.b32 %f3062, %r139; - -BB5_47: - sub.f32 %f929, %f3061, %f3062; - mul.f32 %f159, %f929, 0f3F000000; - cvt.rn.f32.s32 %f160, %r317; - sub.f32 %f161, %f160, %f3101; - add.f32 %f162, %f161, 0f3F800000; - mul.f32 %f163, %f162, %f114; - abs.f32 %f164, %f163; - setp.ltu.f32 %p66, %f164, 0f3F800000; - @%p66 bra BB5_49; - bra.uni BB5_48; - -BB5_49: - mul.f32 %f948, %f163, %f163; - mov.f32 %f949, 0f3BA0C9F8; - mov.f32 %f950, 0fBA1268FB; - fma.rn.f32 %f951, %f950, %f948, %f949; - mov.f32 %f952, 0fBCDABFD4; - fma.rn.f32 %f953, %f951, %f948, %f952; - mov.f32 %f954, 0f3DE70331; - fma.rn.f32 %f955, %f953, %f948, %f954; - mov.f32 %f956, 0fBEC09330; - fma.rn.f32 %f957, %f955, %f948, %f956; - mov.f32 %f958, 0f3F906EBA; - fma.rn.f32 %f959, %f957, %f948, %f958; - mul.f32 %f3063, %f163, %f959; - bra.uni BB5_50; - -BB5_48: - mov.f32 %f2824, 0f3F800000; - mov.f32 %f930, 0f3A03BB71; - mov.f32 %f931, 0fB7B730FB; - fma.rn.f32 %f932, %f931, %f164, %f930; - mov.f32 %f933, 0fBBACA3B3; - fma.rn.f32 %f934, %f932, %f164, %f933; - mov.f32 %f935, 0f3D0A7445; - fma.rn.f32 %f936, %f934, %f164, %f935; - mov.f32 %f937, 0fBE1B3B75; - fma.rn.f32 %f938, %f936, %f164, %f937; - mov.f32 %f939, 0fBF6B385A; - fma.rn.f32 %f940, %f938, %f164, %f939; - mov.f32 %f941, 0fBFD0316E; - fma.rn.f32 %f942, %f940, %f164, %f941; - mov.f32 %f943, 0fBA031CCE; - fma.rn.f32 %f944, %f942, %f164, %f943; - ex2.approx.ftz.f32 %f945, %f944; - sub.f32 %f947, %f2824, %f945; - mov.b32 %r140, %f947; - setp.ltu.f32 %p67, %f164, 0f407AD445; - selp.b32 %r141, %r140, 1065353216, %p67; - mov.b32 %r142, %f163; - and.b32 %r143, %r142, -2147483648; - or.b32 %r144, %r141, %r143; - mov.b32 %f3063, %r144; - -BB5_50: - cvt.rn.f32.s32 %f2879, %r317; - sub.f32 %f2878, %f2879, %f3101; - mul.f32 %f168, %f2878, %f114; - abs.f32 %f169, %f168; - setp.ltu.f32 %p68, %f169, 0f3F800000; - @%p68 bra BB5_52; - bra.uni BB5_51; - -BB5_52: - mul.f32 %f978, %f168, %f168; - mov.f32 %f979, 0f3BA0C9F8; - mov.f32 %f980, 0fBA1268FB; - fma.rn.f32 %f981, %f980, %f978, %f979; - mov.f32 %f982, 0fBCDABFD4; - fma.rn.f32 %f983, %f981, %f978, %f982; - mov.f32 %f984, 0f3DE70331; - fma.rn.f32 %f985, %f983, %f978, %f984; - mov.f32 %f986, 0fBEC09330; - fma.rn.f32 %f987, %f985, %f978, %f986; - mov.f32 %f988, 0f3F906EBA; - fma.rn.f32 %f989, %f987, %f978, %f988; - mul.f32 %f3064, %f168, %f989; - bra.uni BB5_53; - -BB5_51: - mov.f32 %f2825, 0f3F800000; - mov.f32 %f960, 0f3A03BB71; - mov.f32 %f961, 0fB7B730FB; - fma.rn.f32 %f962, %f961, %f169, %f960; - mov.f32 %f963, 0fBBACA3B3; - fma.rn.f32 %f964, %f962, %f169, %f963; - mov.f32 %f965, 0f3D0A7445; - fma.rn.f32 %f966, %f964, %f169, %f965; - mov.f32 %f967, 0fBE1B3B75; - fma.rn.f32 %f968, %f966, %f169, %f967; - mov.f32 %f969, 0fBF6B385A; - fma.rn.f32 %f970, %f968, %f169, %f969; - mov.f32 %f971, 0fBFD0316E; - fma.rn.f32 %f972, %f970, %f169, %f971; - mov.f32 %f973, 0fBA031CCE; - fma.rn.f32 %f974, %f972, %f169, %f973; - ex2.approx.ftz.f32 %f975, %f974; - sub.f32 %f977, %f2825, %f975; - mov.b32 %r145, %f977; - setp.ltu.f32 %p69, %f169, 0f407AD445; - selp.b32 %r146, %r145, 1065353216, %p69; - mov.b32 %r147, %f168; - and.b32 %r148, %r147, -2147483648; - or.b32 %r149, %r146, %r148; - mov.b32 %f3064, %r149; - -BB5_53: - cvt.rn.f32.s32 %f2826, %r316; - sub.f32 %f992, %f3063, %f3064; - mul.f32 %f173, %f992, 0f3F000000; - mul.f32 %f993, %f159, %f3100; - fma.rn.f32 %f174, %f173, %f993, %f3013; - mad.lo.s32 %r150, %r317, %r63, %r316; - add.s32 %r151, %r150, %r4; - mul.wide.s32 %rd76, %r151, 4; - add.s64 %rd77, %rd1, %rd76; - ld.global.f32 %f175, [%rd77]; - add.f32 %f994, %f160, %f121; - fma.rn.f32 %f995, %f88, %f994, %f122; - add.f32 %f996, %f2826, %f995; - cvt.rzi.s32.f32 %r152, %f996; - mul.wide.s32 %rd79, %r152, 4; - add.s64 %rd80, %rd78, %rd79; - ld.global.f32 %f3097, [%rd80]; - // inline asm - rcp.approx.ftz.f32 %f990,%f128; - // inline asm - mul.f32 %f997, %f990, %f129; + fma.rn.f32 %f708, %f588, %f703, %f707; + fma.rn.f32 %f709, %f588, %f705, %f708; + fma.rn.f32 %f710, %f2667, %f703, %f709; + add.rn.f32 %f711, %f706, %f710; + neg.f32 %f712, %f711; + add.rn.f32 %f713, %f706, %f712; + add.rn.f32 %f714, %f713, %f710; + mov.b32 %r213, %f711; + setp.eq.s32 %p114, %r213, 1118925336; + add.s32 %r214, %r213, -1; + mov.b32 %f715, %r214; + add.f32 %f716, %f714, 0f37000000; + selp.f32 %f92, %f716, %f714, %p114; + selp.f32 %f717, %f715, %f711, %p114; + mul.rn.f32 %f718, %f717, %f650; + cvt.rzi.f32.f32 %f719, %f718; + abs.f32 %f720, %f719; + setp.gt.f32 %p115, %f720, 0f42FC0000; + mov.b32 %r215, %f719; + and.b32 %r216, %r215, -2147483648; + or.b32 %r217, %r216, 1123811328; + mov.b32 %f721, %r217; + selp.f32 %f722, %f721, %f719, %p115; + fma.rn.f32 %f723, %f722, %f656, %f717; + fma.rn.f32 %f724, %f722, %f658, %f723; + mul.f32 %f725, %f724, 0f3FB8AA3B; + add.f32 %f726, %f722, 0f4B40007F; + mov.b32 %r218, %f726; + shl.b32 %r219, %r218, 23; + mov.b32 %f727, %r219; + ex2.approx.ftz.f32 %f728, %f725; + mul.f32 %f93, %f728, %f727; + { + .reg .b32 %temp; + mov.b64 {%temp, %r53}, %fd477; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r54}, %fd309; + } + and.b32 %r55, %r54, 2146435072; + setp.eq.s32 %p117, %r55, 1073741824; + abs.f64 %fd310, %fd477; + { // callseq 98, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd310; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd37, [retval0+0]; + } // callseq 98 + setp.lt.s32 %p118, %r53, 0; + and.pred %p6, %p118, %p117; + selp.b32 %r220, %r53, 0, %p117; + setp.lt.s32 %p119, %r54, 0; + or.b32 %r221, %r220, 2146435072; + selp.b32 %r56, %r221, %r220, %p119; + { + .reg .b32 %temp; + mov.b64 {%temp, %r222}, %fd35; + } + and.b32 %r57, %r222, 2146435072; + setp.ne.s32 %p120, %r57, 2146435072; + setp.gtu.f64 %p121, %fd310, 0d7FF0000000000000; + and.b32 %r58, %r54, 2147483647; + setp.gt.f64 %p122, %fd310, 0d3FF0000000000000; + selp.b32 %r223, 2146435072, 0, %p122; + xor.b32 %r224, %r223, 2146435072; + selp.b32 %r225, %r224, %r223, %p119; + setp.eq.f32 %p123, %f2886, 0fBF800000; + selp.b32 %r59, 1072693248, %r225, %p123; + setp.gt.s32 %p124, %r54, -1; + selp.b32 %r60, 2146435072, 0, %p124; + setp.ne.s32 %p125, %r58, 1071644672; + and.pred %p126, %p125, %p6; + or.b32 %r61, %r60, -2147483648; + selp.b32 %r62, %r61, %r60, %p126; + add.f32 %f729, %f75, 0f3F800000; + sub.f32 %f730, %f729, %f2890; + div.rn.f32 %f94, %f730, %f2886; + abs.f32 %f95, %f94; + setp.lt.f32 %p127, %f95, 0f00800000; + mul.f32 %f731, %f95, 0f4B800000; + selp.f32 %f732, %f731, %f95, %p127; + selp.f32 %f733, 0fC3170000, 0fC2FE0000, %p127; + mov.b32 %r226, %f732; + and.b32 %r227, %r226, 8388607; + or.b32 %r228, %r227, 1065353216; + mov.b32 %f734, %r228; + shr.u32 %r229, %r226, 23; + cvt.rn.f32.u32 %f735, %r229; + add.f32 %f736, %f733, %f735; + setp.gt.f32 %p128, %f734, 0f3FB504F3; + mul.f32 %f737, %f734, 0f3F000000; + add.f32 %f738, %f736, 0f3F800000; + selp.f32 %f739, %f738, %f736, %p128; + selp.f32 %f740, %f737, %f734, %p128; + add.f32 %f741, %f740, 0fBF800000; + add.f32 %f742, %f740, 0f3F800000; + rcp.approx.ftz.f32 %f743, %f742; + add.f32 %f744, %f741, %f741; + mul.f32 %f745, %f744, %f743; + mul.f32 %f746, %f745, %f745; + fma.rn.f32 %f747, %f607, %f746, %f606; + fma.rn.f32 %f748, %f747, %f746, %f609; + mul.rn.f32 %f749, %f748, %f746; + mul.rn.f32 %f750, %f749, %f745; + sub.f32 %f751, %f741, %f745; + add.f32 %f752, %f751, %f751; + neg.f32 %f753, %f745; + fma.rn.f32 %f754, %f753, %f741, %f752; + mul.rn.f32 %f755, %f743, %f754; + add.f32 %f756, %f750, %f745; + sub.f32 %f757, %f745, %f756; + add.f32 %f758, %f750, %f757; + add.f32 %f759, %f755, %f758; + add.f32 %f760, %f756, %f759; + sub.f32 %f761, %f756, %f760; + add.f32 %f762, %f759, %f761; + mul.rn.f32 %f763, %f739, %f625; + mul.rn.f32 %f764, %f739, %f627; + add.f32 %f765, %f763, %f760; + sub.f32 %f766, %f763, %f765; + add.f32 %f767, %f760, %f766; + add.f32 %f768, %f762, %f767; + add.f32 %f769, %f764, %f768; + add.f32 %f770, %f765, %f769; + sub.f32 %f771, %f765, %f770; + add.f32 %f772, %f769, %f771; + mul.rn.f32 %f773, %f588, %f770; + neg.f32 %f774, %f773; + fma.rn.f32 %f775, %f588, %f770, %f774; + fma.rn.f32 %f776, %f588, %f772, %f775; + fma.rn.f32 %f777, %f2667, %f770, %f776; + add.rn.f32 %f778, %f773, %f777; + neg.f32 %f779, %f778; + add.rn.f32 %f780, %f773, %f779; + add.rn.f32 %f781, %f780, %f777; + mov.b32 %r230, %f778; + setp.eq.s32 %p129, %r230, 1118925336; + add.s32 %r231, %r230, -1; + mov.b32 %f782, %r231; + add.f32 %f783, %f781, 0f37000000; + selp.f32 %f96, %f783, %f781, %p129; + selp.f32 %f784, %f782, %f778, %p129; + mul.rn.f32 %f785, %f784, %f650; + cvt.rzi.f32.f32 %f786, %f785; + abs.f32 %f787, %f786; + setp.gt.f32 %p130, %f787, 0f42FC0000; + mov.b32 %r232, %f786; + and.b32 %r233, %r232, -2147483648; + or.b32 %r234, %r233, 1123811328; + mov.b32 %f788, %r234; + selp.f32 %f789, %f788, %f786, %p130; + fma.rn.f32 %f790, %f789, %f656, %f784; + fma.rn.f32 %f791, %f789, %f658, %f790; + mul.f32 %f792, %f791, 0f3FB8AA3B; + add.f32 %f793, %f789, 0f4B40007F; + mov.b32 %r235, %f793; + shl.b32 %r236, %r235, 23; + mov.b32 %f794, %r236; + ex2.approx.ftz.f32 %f795, %f792; + mul.f32 %f97, %f795, %f794; + div.rn.f32 %f98, %f76, %f2886; + abs.f32 %f99, %f98; + setp.lt.f32 %p132, %f99, 0f00800000; + mul.f32 %f796, %f99, 0f4B800000; + selp.f32 %f797, %f796, %f99, %p132; + selp.f32 %f798, 0fC3170000, 0fC2FE0000, %p132; + mov.b32 %r237, %f797; + and.b32 %r238, %r237, 8388607; + or.b32 %r239, %r238, 1065353216; + mov.b32 %f799, %r239; + shr.u32 %r240, %r237, 23; + cvt.rn.f32.u32 %f800, %r240; + add.f32 %f801, %f798, %f800; + setp.gt.f32 %p133, %f799, 0f3FB504F3; + mul.f32 %f802, %f799, 0f3F000000; + add.f32 %f803, %f801, 0f3F800000; + selp.f32 %f804, %f803, %f801, %p133; + selp.f32 %f805, %f802, %f799, %p133; + add.f32 %f806, %f805, 0fBF800000; + add.f32 %f807, %f805, 0f3F800000; + rcp.approx.ftz.f32 %f808, %f807; + add.f32 %f809, %f806, %f806; + mul.f32 %f810, %f809, %f808; + mul.f32 %f811, %f810, %f810; + fma.rn.f32 %f812, %f607, %f811, %f606; + fma.rn.f32 %f813, %f812, %f811, %f609; + mul.rn.f32 %f814, %f813, %f811; + mul.rn.f32 %f815, %f814, %f810; + sub.f32 %f816, %f806, %f810; + add.f32 %f817, %f816, %f816; + neg.f32 %f818, %f810; + fma.rn.f32 %f819, %f818, %f806, %f817; + mul.rn.f32 %f820, %f808, %f819; + add.f32 %f821, %f815, %f810; + sub.f32 %f822, %f810, %f821; + add.f32 %f823, %f815, %f822; + add.f32 %f824, %f820, %f823; + add.f32 %f825, %f821, %f824; + sub.f32 %f826, %f821, %f825; + add.f32 %f827, %f824, %f826; + mul.rn.f32 %f828, %f804, %f625; + mul.rn.f32 %f829, %f804, %f627; + add.f32 %f830, %f828, %f825; + sub.f32 %f831, %f828, %f830; + add.f32 %f832, %f825, %f831; + add.f32 %f833, %f827, %f832; + add.f32 %f834, %f829, %f833; + add.f32 %f835, %f830, %f834; + sub.f32 %f836, %f830, %f835; + add.f32 %f837, %f834, %f836; + mul.rn.f32 %f838, %f588, %f835; + neg.f32 %f839, %f838; + fma.rn.f32 %f840, %f588, %f835, %f839; + fma.rn.f32 %f841, %f588, %f837, %f840; + fma.rn.f32 %f842, %f2667, %f835, %f841; + add.rn.f32 %f843, %f838, %f842; + neg.f32 %f844, %f843; + add.rn.f32 %f845, %f838, %f844; + add.rn.f32 %f846, %f845, %f842; + mov.b32 %r241, %f843; + setp.eq.s32 %p134, %r241, 1118925336; + add.s32 %r242, %r241, -1; + mov.b32 %f847, %r242; + add.f32 %f848, %f846, 0f37000000; + selp.f32 %f100, %f848, %f846, %p134; + selp.f32 %f849, %f847, %f843, %p134; + mul.rn.f32 %f850, %f849, %f650; + cvt.rzi.f32.f32 %f851, %f850; + abs.f32 %f852, %f851; + setp.gt.f32 %p135, %f852, 0f42FC0000; + mov.b32 %r243, %f851; + and.b32 %r244, %r243, -2147483648; + or.b32 %r245, %r244, 1123811328; + mov.b32 %f853, %r245; + selp.f32 %f854, %f853, %f851, %p135; + fma.rn.f32 %f855, %f854, %f656, %f849; + fma.rn.f32 %f856, %f854, %f658, %f855; + mul.f32 %f857, %f856, 0f3FB8AA3B; + add.f32 %f858, %f854, 0f4B40007F; + mov.b32 %r246, %f858; + shl.b32 %r247, %r246, 23; + mov.b32 %f859, %r247; + ex2.approx.ftz.f32 %f860, %f857; + mul.f32 %f101, %f860, %f859; + mov.f64 %fd311, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r248}, %fd311; + } + and.b32 %r249, %r248, 2146435072; + setp.eq.s32 %p137, %r249, 1074790400; + { // callseq 99, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd310; + .param .b64 param1; + st.param.f64 [param1+0], %fd311; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd38, [retval0+0]; + } // callseq 99 + and.pred %p9, %p118, %p137; + selp.b32 %r250, %r53, 0, %p137; + setp.lt.s32 %p138, %r248, 0; + or.b32 %r251, %r250, 2146435072; + selp.b32 %r63, %r251, %r250, %p138; + add.f64 %fd312, %fd477, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r252}, %fd312; + } + and.b32 %r64, %r252, 2146435072; + setp.ne.s32 %p139, %r64, 2146435072; + cvt.f64.f32 %fd39, %f77; + { + .reg .b32 %temp; + mov.b64 {%temp, %r65}, %fd39; + } + abs.f64 %fd313, %fd39; + { // callseq 100, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd313; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd40, [retval0+0]; + } // callseq 100 + setp.lt.s32 %p140, %r65, 0; + and.pred %p10, %p140, %p117; + and.b32 %r66, %r248, 2147483647; + selp.b32 %r253, %r224, %r223, %p138; + selp.b32 %r67, 1072693248, %r253, %p123; + add.f64 %fd41, %fd39, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r254}, %fd41; + } + and.b32 %r68, %r254, 2146435072; + setp.ne.s32 %p141, %r68, 2146435072; + setp.gt.s32 %p142, %r248, -1; + selp.b32 %r255, 2146435072, 0, %p142; + setp.ne.s32 %p143, %r66, 1071644672; + and.pred %p144, %p143, %p9; + or.b32 %r256, %r255, -2147483648; + selp.b32 %r69, %r256, %r255, %p144; + setp.gtu.f64 %p145, %fd313, 0d7FF0000000000000; + cvt.f64.f32 %fd42, %f81; + { + .reg .b32 %temp; + mov.b64 {%temp, %r70}, %fd42; + } + abs.f64 %fd314, %fd42; + { // callseq 101, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd314; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd43, [retval0+0]; + } // callseq 101 + setp.lt.s32 %p146, %r70, 0; + and.pred %p11, %p146, %p117; + setp.gt.f64 %p147, %fd313, 0d3FF0000000000000; + selp.b32 %r257, 2146435072, 0, %p147; + xor.b32 %r258, %r257, 2146435072; + selp.b32 %r259, %r258, %r257, %p119; + setp.eq.f32 %p148, %f77, 0fBF800000; + selp.b32 %r71, 1072693248, %r259, %p148; + add.f64 %fd44, %fd42, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r260}, %fd44; + } + and.b32 %r72, %r260, 2146435072; + setp.ne.s32 %p149, %r72, 2146435072; + setp.gtu.f64 %p150, %fd314, 0d7FF0000000000000; + setp.gt.f64 %p151, %fd314, 0d3FF0000000000000; + selp.b32 %r261, 2146435072, 0, %p151; + xor.b32 %r262, %r261, 2146435072; + selp.b32 %r263, %r262, %r261, %p119; + setp.eq.f32 %p152, %f81, 0fBF800000; + selp.b32 %r73, 1072693248, %r263, %p152; + mov.f64 %fd315, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r74}, %fd315; + } + and.b32 %r75, %r74, 2147483647; + setp.gt.s32 %p153, %r74, -1; + selp.b32 %r76, 2146435072, 0, %p153; + or.pred %p14, %p120, %p121; + or.pred %p17, %p139, %p121; + or.pred %p18, %p141, %p145; + or.pred %p19, %p149, %p150; + shr.s32 %r264, %r74, 31; + and.b32 %r77, %r264, 2146435072; + +$L__BB5_56: + cvt.rn.f32.s32 %f2672, %r785; + sub.f32 %f2671, %f2672, %f2890; + add.f32 %f2670, %f2671, 0f3F000000; + mul.f32 %f2669, %f2670, %f78; + abs.f32 %f2668, %f2669; + setp.ltu.f32 %p154, %f2668, 0f3F8060FE; + mov.f32 %f2849, %f80; + @%p154 bra $L__BB5_58; + + mov.f32 %f2778, 0f3F800000; + ex2.approx.ftz.f32 %f861, %f80; + sub.f32 %f863, %f2778, %f861; + mov.b32 %r265, %f863; + or.b32 %r266, %r51, %r265; + mov.b32 %f2849, %r266; + +$L__BB5_58: + cvt.rn.f32.s32 %f2677, %r785; + sub.f32 %f2676, %f2677, %f2890; + add.f32 %f2675, %f2676, 0fBF000000; + mul.f32 %f2674, %f2675, %f78; + abs.f32 %f2673, %f2674; + setp.ltu.f32 %p155, %f2673, 0f3F8060FE; + mov.f32 %f2850, %f83; + @%p155 bra $L__BB5_60; + + mov.f32 %f2777, 0f3F800000; + ex2.approx.ftz.f32 %f864, %f83; + sub.f32 %f866, %f2777, %f864; + mov.b32 %r267, %f866; + or.b32 %r268, %r52, %r267; + mov.b32 %f2850, %r268; + +$L__BB5_60: + sub.f32 %f867, %f2849, %f2850; + mul.f32 %f116, %f867, 0f3F000000; + cvt.rn.f32.s32 %f117, %r786; + sub.f32 %f118, %f117, %f2889; + add.f32 %f119, %f118, 0f3F000000; + mul.f32 %f120, %f78, %f119; + abs.f32 %f868, %f120; + setp.ltu.f32 %p156, %f868, 0f3F8060FE; + setp.ge.f32 %p157, %f868, 0f3F8060FE; + mul.f32 %f869, %f120, %f120; + selp.f32 %f870, %f868, %f869, %p157; + selp.f32 %f871, 0f3789CA3C, 0f38B1E96A, %p157; + selp.f32 %f872, 0fB9F560B9, 0fBA574D20, %p157; + fma.rn.f32 %f873, %f871, %f870, %f872; + selp.f32 %f874, 0f3BAC840B, 0f3BAAD5EA, %p157; + fma.rn.f32 %f875, %f873, %f870, %f874; + selp.f32 %f876, 0fBD0C8162, 0fBCDC1BE7, %p157; + fma.rn.f32 %f877, %f875, %f870, %f876; + selp.f32 %f878, 0f3E1CF906, 0f3DE718AF, %p157; + fma.rn.f32 %f879, %f877, %f870, %f878; + selp.f32 %f880, 0f3F6A937E, 0fBEC093AC, %p157; + fma.rn.f32 %f881, %f879, %f870, %f880; + selp.f32 %f882, 0f3F20D842, 0f3E0375D3, %p157; + fma.rn.f32 %f883, %f881, %f870, %f882; + neg.f32 %f884, %f868; + selp.f32 %f885, %f884, %f120, %p157; + fma.rn.f32 %f2851, %f883, %f885, %f885; + @%p156 bra $L__BB5_62; + + mov.f32 %f2776, 0f3F800000; + ex2.approx.ftz.f32 %f886, %f2851; + sub.f32 %f888, %f2776, %f886; + mov.b32 %r269, %f888; + mov.b32 %r270, %f120; + and.b32 %r271, %r270, -2147483648; + or.b32 %r272, %r271, %r269; + mov.b32 %f2851, %r272; + +$L__BB5_62: + cvt.rn.f32.s32 %f2679, %r786; + sub.f32 %f2678, %f2679, %f2889; + add.f32 %f124, %f2678, 0fBF000000; + mul.f32 %f125, %f78, %f124; + abs.f32 %f889, %f125; + setp.ltu.f32 %p158, %f889, 0f3F8060FE; + setp.ge.f32 %p159, %f889, 0f3F8060FE; + mul.f32 %f890, %f125, %f125; + selp.f32 %f891, %f889, %f890, %p159; + selp.f32 %f892, 0f3789CA3C, 0f38B1E96A, %p159; + selp.f32 %f893, 0fB9F560B9, 0fBA574D20, %p159; + fma.rn.f32 %f894, %f892, %f891, %f893; + selp.f32 %f895, 0f3BAC840B, 0f3BAAD5EA, %p159; + fma.rn.f32 %f896, %f894, %f891, %f895; + selp.f32 %f897, 0fBD0C8162, 0fBCDC1BE7, %p159; + fma.rn.f32 %f898, %f896, %f891, %f897; + selp.f32 %f899, 0f3E1CF906, 0f3DE718AF, %p159; + fma.rn.f32 %f900, %f898, %f891, %f899; + selp.f32 %f901, 0f3F6A937E, 0fBEC093AC, %p159; + fma.rn.f32 %f902, %f900, %f891, %f901; + selp.f32 %f903, 0f3F20D842, 0f3E0375D3, %p159; + fma.rn.f32 %f904, %f902, %f891, %f903; + neg.f32 %f905, %f889; + selp.f32 %f906, %f905, %f125, %p159; + fma.rn.f32 %f2852, %f904, %f906, %f906; + @%p158 bra $L__BB5_64; + + mov.f32 %f2775, 0f3F800000; + ex2.approx.ftz.f32 %f907, %f2852; + sub.f32 %f909, %f2775, %f907; + mov.b32 %r273, %f909; + mov.b32 %r274, %f125; + and.b32 %r275, %r274, -2147483648; + or.b32 %r276, %r275, %r273; + mov.b32 %f2852, %r276; + +$L__BB5_64: + cvt.rn.f32.s32 %f2680, %r785; + sub.f32 %f911, %f2851, %f2852; + mul.f32 %f129, %f911, 0f3F000000; + mul.f32 %f912, %f116, %f2888; + fma.rn.f32 %f130, %f129, %f912, %f2887; + mad.lo.s32 %r277, %r786, %r102, %r785; + add.s32 %r278, %r277, %r2; + mul.wide.s32 %rd31, %r278, 4; + add.s64 %rd32, %rd1, %rd31; + ld.global.f32 %f131, [%rd32]; + add.f32 %f913, %f63, %f117; + fma.rn.f32 %f914, %f913, %f52, %f64; + add.f32 %f915, %f914, %f2680; + cvt.rzi.s32.f32 %r279, %f915; + mul.wide.s32 %rd34, %r279, 4; + add.s64 %rd35, %rd33, %rd34; + ld.global.f32 %f2885, [%rd35]; + setp.eq.f32 %p160, %f89, 0f7F800000; + mov.f32 %f2853, 0f7F800000; + @%p160 bra $L__BB5_66; + + fma.rn.f32 %f2853, %f89, %f88, %f89; + +$L__BB5_66: + setp.geu.f32 %p660, %f85, 0f00000000; + mov.b32 %r280, %f2853; + xor.b32 %r281, %r280, -2147483648; + mov.b32 %f916, %r281; + selp.f32 %f135, %f916, %f2853, %p4; + add.f32 %f917, %f85, %f85; + selp.f32 %f918, %f917, 0f00000000, %p106; + setp.eq.f32 %p162, %f85, 0f00000000; + selp.f32 %f2854, %f918, %f135, %p162; + @%p660 bra $L__BB5_69; + + cvt.rzi.f32.f32 %f920, %f588; + setp.eq.f32 %p163, %f920, 0f40000000; + mov.f32 %f2854, %f135; + @%p163 bra $L__BB5_69; + + mov.f32 %f2854, 0f7FFFFFFF; + +$L__BB5_69: + mov.f32 %f2683, 0f3FB8AA3B; + mov.f32 %f2682, 0f3F000000; + abs.f32 %f2681, %f85; + add.f32 %f923, %f2681, 0f40000000; + mov.b32 %r282, %f923; + setp.gt.s32 %p164, %r282, 2139095039; + add.f32 %f924, %f85, 0f40000000; + setp.gtu.f32 %p165, %f2681, 0f7F800000; + mov.f32 %f2855, 0f7F800000; + selp.f32 %f925, %f924, %f2854, %p165; + selp.f32 %f926, 0fFF800000, 0f7F800000, %p4; + setp.neu.f32 %p166, %f2681, 0f7F800000; + selp.f32 %f927, %f925, %f926, %p166; + selp.f32 %f928, %f927, %f2854, %p164; + mul.f32 %f929, %f928, 0fBF000000; + setp.eq.f32 %p167, %f85, 0f3F800000; + selp.f32 %f930, 0fBF000000, %f929, %p167; + mov.f32 %f932, 0f3BBB989D; + fma.rn.f32 %f933, %f930, %f932, %f2682; + mov.f32 %f935, 0f437C0000; + cvt.sat.f32.f32 %f936, %f933; + mov.f32 %f937, 0f4B400001; + fma.rm.f32 %f938, %f936, %f935, %f937; + add.f32 %f939, %f938, 0fCB40007F; + neg.f32 %f940, %f939; + fma.rn.f32 %f941, %f930, %f2683, %f940; + mov.f32 %f942, 0f32A57060; + fma.rn.f32 %f943, %f930, %f942, %f941; + mov.b32 %r283, %f938; + shl.b32 %r284, %r283, 23; + mov.b32 %f944, %r284; + ex2.approx.ftz.f32 %f945, %f943; + mul.f32 %f138, %f945, %f944; + setp.eq.f32 %p168, %f93, 0f7F800000; + @%p168 bra $L__BB5_71; + + fma.rn.f32 %f2855, %f93, %f92, %f93; + +$L__BB5_71: + setp.geu.f32 %p663, %f90, 0f00000000; + setp.lt.f32 %p662, %f90, 0f00000000; + and.pred %p661, %p662, %p106; + mov.b32 %r285, %f2855; + xor.b32 %r286, %r285, -2147483648; + mov.b32 %f946, %r286; + selp.f32 %f141, %f946, %f2855, %p661; + add.f32 %f947, %f90, %f90; + selp.f32 %f948, %f947, 0f00000000, %p106; + setp.eq.f32 %p170, %f90, 0f00000000; + selp.f32 %f2856, %f948, %f141, %p170; + @%p663 bra $L__BB5_74; + + cvt.rzi.f32.f32 %f950, %f588; + setp.eq.f32 %p171, %f950, 0f40000000; + mov.f32 %f2856, %f141; + @%p171 bra $L__BB5_74; + + mov.f32 %f2856, 0f7FFFFFFF; + +$L__BB5_74: + mov.f32 %f2690, 0f32A57060; + mov.f32 %f2689, 0f4B400001; + mov.f32 %f2688, 0f437C0000; + mov.f32 %f2687, 0f3BBB989D; + abs.f32 %f2686, %f90; + setp.lt.f32 %p665, %f90, 0f00000000; + and.pred %p664, %p665, %p106; + mov.f32 %f2685, 0f3FB8AA3B; + mov.f32 %f2684, 0f3F000000; + add.f32 %f952, %f2686, 0f40000000; + mov.b32 %r287, %f952; + setp.gt.s32 %p172, %r287, 2139095039; + add.f32 %f953, %f90, 0f40000000; + setp.gtu.f32 %p173, %f2686, 0f7F800000; + selp.f32 %f954, %f953, %f2856, %p173; + selp.f32 %f955, 0fFF800000, 0f7F800000, %p664; + setp.neu.f32 %p174, %f2686, 0f7F800000; + selp.f32 %f956, %f954, %f955, %p174; + selp.f32 %f957, %f956, %f2856, %p172; + mul.f32 %f958, %f957, 0fBF000000; + setp.eq.f32 %p175, %f90, 0f3F800000; + selp.f32 %f959, 0fBF000000, %f958, %p175; + fma.rn.f32 %f962, %f959, %f2687, %f2684; + cvt.sat.f32.f32 %f965, %f962; + fma.rm.f32 %f967, %f965, %f2688, %f2689; + add.f32 %f968, %f967, 0fCB40007F; + neg.f32 %f969, %f968; + fma.rn.f32 %f970, %f959, %f2685, %f969; + fma.rn.f32 %f972, %f959, %f2690, %f970; + mov.b32 %r288, %f967; + shl.b32 %r289, %r288, 23; + mov.b32 %f973, %r289; + ex2.approx.ftz.f32 %f974, %f972; + mul.f32 %f144, %f974, %f973; + sub.f32 %f975, %f138, %f144; + mul.f32 %f976, %f60, %f975; + mul.f32 %f145, %f129, %f976; + not.pred %p176, %p6; + mov.f64 %fd510, %fd37; + @%p176 bra $L__BB5_76; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r290}, %fd37; + } + xor.b32 %r291, %r290, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r292, %temp}, %fd37; + } + mov.b64 %fd510, {%r292, %r291}; + +$L__BB5_76: + setp.eq.f32 %p177, %f2886, 0f00000000; + @%p177 bra $L__BB5_80; + bra.uni $L__BB5_77; + +$L__BB5_80: + mov.u32 %r293, 0; + mov.b64 %fd510, {%r293, %r56}; + bra.uni $L__BB5_81; + +$L__BB5_77: + setp.gt.s32 %p178, %r53, -1; + @%p178 bra $L__BB5_81; + + cvt.rzi.f64.f64 %fd317, %fd309; + setp.eq.f64 %p179, %fd317, 0d4008000000000000; + @%p179 bra $L__BB5_81; + + mov.f64 %fd510, 0dFFF8000000000000; + +$L__BB5_81: + selp.f64 %fd511, %fd510, %fd35, %p120; + @%p14 bra $L__BB5_86; + + setp.eq.s32 %p181, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r294, %temp}, %fd309; + } + setp.eq.s32 %p182, %r294, 0; + and.pred %p183, %p181, %p182; + @%p183 bra $L__BB5_85; + bra.uni $L__BB5_83; + +$L__BB5_85: + mov.u32 %r298, 0; + mov.b64 %fd511, {%r298, %r59}; + bra.uni $L__BB5_86; + +$L__BB5_83: + and.b32 %r295, %r53, 2147483647; + setp.ne.s32 %p184, %r295, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r296, %temp}, %fd307; + } + setp.ne.s32 %p185, %r296, 0; + or.pred %p186, %p184, %p185; + mov.f64 %fd511, %fd510; + @%p186 bra $L__BB5_86; + + mov.u32 %r297, 0; + mov.b64 %fd511, {%r297, %r62}; + +$L__BB5_86: + mov.f32 %f2701, 0f3102E308; + mov.f32 %f2700, 0fBF317218; + mov.f32 %f2699, 0f35BFBE8E; + mov.f32 %f2698, 0f3F317200; + mov.f32 %f2697, 0f3DAAAABD; + mov.f32 %f2696, 0f3C4CAF63; + mov.f32 %f2695, 0f3B18F0FE; + cvt.rn.f32.s32 %f2694, %r785; + add.f32 %f2693, %f2694, 0f3F000000; + sub.f32 %f2692, %f2693, %f2890; + mov.f32 %f2691, 0f3FB8AA3B; + setp.eq.f32 %p187, %f2886, 0f3F800000; + selp.f64 %fd321, 0d3FF0000000000000, %fd511, %p187; + div.rn.f64 %fd322, %fd34, %fd321; + mul.f32 %f978, %f81, %f144; + mul.f32 %f979, %f2692, %f138; + sub.f32 %f980, %f979, %f978; + cvt.f64.f32 %fd323, %f980; + mul.f64 %fd324, %fd322, %fd323; + cvt.f64.f32 %fd53, %f129; + mul.f64 %fd325, %fd324, %fd53; + cvt.rn.f32.f64 %f146, %fd325; + add.f32 %f981, %f117, 0f3F000000; + sub.f32 %f147, %f981, %f2889; + div.rn.f32 %f148, %f147, %f2886; + abs.f32 %f149, %f148; + setp.lt.f32 %p188, %f149, 0f00800000; + mul.f32 %f982, %f149, 0f4B800000; + selp.f32 %f983, %f982, %f149, %p188; + selp.f32 %f984, 0fC3170000, 0fC2FE0000, %p188; + mov.b32 %r299, %f983; + and.b32 %r300, %r299, 8388607; + or.b32 %r301, %r300, 1065353216; + mov.b32 %f985, %r301; + shr.u32 %r302, %r299, 23; + cvt.rn.f32.u32 %f986, %r302; + add.f32 %f987, %f984, %f986; + setp.gt.f32 %p189, %f985, 0f3FB504F3; + mul.f32 %f988, %f985, 0f3F000000; + add.f32 %f989, %f987, 0f3F800000; + selp.f32 %f990, %f989, %f987, %p189; + selp.f32 %f991, %f988, %f985, %p189; + add.f32 %f992, %f991, 0fBF800000; + add.f32 %f993, %f991, 0f3F800000; + rcp.approx.ftz.f32 %f994, %f993; + add.f32 %f995, %f992, %f992; + mul.f32 %f997, %f995, %f994; mul.f32 %f998, %f997, %f997; - mov.f32 %f999, 0f3C4CAF63; - mov.f32 %f1000, 0f3B18F0FE; - fma.rn.f32 %f1001, %f1000, %f998, %f999; - mov.f32 %f1002, 0f3DAAAABD; - fma.rn.f32 %f1003, %f1001, %f998, %f1002; + fma.rn.f32 %f1001, %f2695, %f998, %f2696; + fma.rn.f32 %f1003, %f1001, %f998, %f2697; mul.rn.f32 %f1004, %f1003, %f998; mul.rn.f32 %f1005, %f1004, %f997; - sub.f32 %f1006, %f127, %f997; - neg.f32 %f1007, %f997; - add.f32 %f1008, %f1006, %f1006; - fma.rn.f32 %f1009, %f1007, %f127, %f1008; - mul.rn.f32 %f1010, %f990, %f1009; + sub.f32 %f1006, %f992, %f997; + add.f32 %f1007, %f1006, %f1006; + neg.f32 %f1008, %f997; + fma.rn.f32 %f1009, %f1008, %f992, %f1007; + mul.rn.f32 %f1010, %f994, %f1009; add.f32 %f1011, %f1005, %f997; sub.f32 %f1012, %f997, %f1011; add.f32 %f1013, %f1005, %f1012; @@ -18605,3040 +30725,4199 @@ BB5_53: add.f32 %f1015, %f1011, %f1014; sub.f32 %f1016, %f1011, %f1015; add.f32 %f1017, %f1014, %f1016; - add.f32 %f1018, %f130, %f1015; - sub.f32 %f1019, %f130, %f1018; - add.f32 %f1020, %f1015, %f1019; - add.f32 %f1021, %f1017, %f1020; - add.f32 %f1022, %f131, %f1021; - add.f32 %f1023, %f1018, %f1022; - sub.f32 %f1024, %f1018, %f1023; - add.f32 %f1025, %f1022, %f1024; - mul.rn.f32 %f1027, %f841, %f1023; - neg.f32 %f1028, %f1027; - fma.rn.f32 %f1029, %f841, %f1023, %f1028; - fma.rn.f32 %f1030, %f841, %f1025, %f1029; - mov.f32 %f1031, 0f00000000; - fma.rn.f32 %f1032, %f1031, %f1023, %f1030; - add.rn.f32 %f1033, %f1027, %f1032; - neg.f32 %f1034, %f1033; - add.rn.f32 %f1035, %f1027, %f1034; - add.rn.f32 %f1036, %f1035, %f1032; - mov.b32 %r153, %f1033; - setp.eq.s32 %p70, %r153, 1118925336; - add.s32 %r154, %r153, -1; - mov.b32 %f1037, %r154; - add.f32 %f1038, %f1036, 0f37000000; - selp.f32 %f1039, %f1037, %f1033, %p70; - selp.f32 %f177, %f1038, %f1036, %p70; - mul.f32 %f1040, %f1039, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1041, %f1040; - mov.f32 %f1042, 0fBF317200; - fma.rn.f32 %f1043, %f1041, %f1042, %f1039; - mov.f32 %f1044, 0fB5BFBE8E; - fma.rn.f32 %f1045, %f1041, %f1044, %f1043; - mul.f32 %f1046, %f1045, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1047, %f1046; - add.f32 %f1048, %f1041, 0f00000000; - ex2.approx.f32 %f1049, %f1048; - mul.f32 %f1050, %f1047, %f1049; - setp.lt.f32 %p71, %f1039, 0fC2D20000; - selp.f32 %f1051, 0f00000000, %f1050, %p71; - setp.gt.f32 %p72, %f1039, 0f42D20000; - selp.f32 %f3065, 0f7F800000, %f1051, %p72; - setp.eq.f32 %p73, %f3065, 0f7F800000; - @%p73 bra BB5_55; - - fma.rn.f32 %f3065, %f3065, %f177, %f3065; - -BB5_55: - setp.geu.f32 %p346, %f124, 0f00000000; - mov.b32 %r155, %f3065; - xor.b32 %r156, %r155, -2147483648; - mov.b32 %f1052, %r156; - selp.f32 %f181, %f1052, %f3065, %p1; - setp.eq.f32 %p74, %f124, 0f00000000; - selp.f32 %f3066, %f132, %f181, %p74; - @%p346 bra BB5_57; - - cvt.rzi.f32.f32 %f1054, %f841; - setp.neu.f32 %p75, %f1054, 0f40000000; - selp.f32 %f3066, 0f7FFFFFFF, %f181, %p75; - -BB5_57: - abs.f32 %f2834, %f124; - add.f32 %f2833, %f2834, 0f40000000; - mov.b32 %r287, %f2833; - mov.f32 %f2832, 0f00000000; - mov.f32 %f2831, 0f3DAAAABD; - mov.f32 %f2830, 0f3C4CAF63; - mov.f32 %f2829, 0f3B18F0FE; - mov.f32 %f2828, 0fB5BFBE8E; - mov.f32 %f2827, 0fBF317200; - add.f32 %f1057, %f124, 0f40000000; - setp.gtu.f32 %p76, %f2834, 0f7F800000; - selp.f32 %f1058, %f1057, %f3066, %p76; - selp.f32 %f1059, 0fFF800000, 0f7F800000, %p1; - setp.neu.f32 %p77, %f2834, 0f7F800000; - selp.f32 %f1060, %f1058, %f1059, %p77; - setp.gt.s32 %p78, %r287, 2139095039; - selp.f32 %f1061, %f1060, %f3066, %p78; - mul.f32 %f1062, %f1061, 0fBF000000; - setp.eq.f32 %p79, %f124, 0f3F800000; - selp.f32 %f1063, 0fBF000000, %f1062, %p79; - mul.f32 %f1064, %f1063, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1065, %f1064; - fma.rn.f32 %f1067, %f1065, %f2827, %f1063; - fma.rn.f32 %f1069, %f1065, %f2828, %f1067; - mul.f32 %f1070, %f1069, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1071, %f1070; - add.f32 %f1072, %f1065, 0f00000000; - ex2.approx.f32 %f1073, %f1072; - mul.f32 %f1074, %f1071, %f1073; - setp.lt.f32 %p80, %f1063, 0fC2D20000; - selp.f32 %f1075, 0f00000000, %f1074, %p80; - setp.gt.f32 %p81, %f1063, 0f42D20000; - selp.f32 %f185, 0f7F800000, %f1075, %p81; - // inline asm - rcp.approx.ftz.f32 %f1055,%f136; - // inline asm - mul.f32 %f1076, %f1055, %f137; - mul.f32 %f1077, %f1076, %f1076; - fma.rn.f32 %f1080, %f2829, %f1077, %f2830; - fma.rn.f32 %f1082, %f1080, %f1077, %f2831; - mul.rn.f32 %f1083, %f1082, %f1077; - mul.rn.f32 %f1084, %f1083, %f1076; - sub.f32 %f1085, %f135, %f1076; - neg.f32 %f1086, %f1076; - add.f32 %f1087, %f1085, %f1085; - fma.rn.f32 %f1088, %f1086, %f135, %f1087; - mul.rn.f32 %f1089, %f1055, %f1088; - add.f32 %f1090, %f1084, %f1076; - sub.f32 %f1091, %f1076, %f1090; - add.f32 %f1092, %f1084, %f1091; - add.f32 %f1093, %f1089, %f1092; - add.f32 %f1094, %f1090, %f1093; - sub.f32 %f1095, %f1090, %f1094; - add.f32 %f1096, %f1093, %f1095; - add.f32 %f1097, %f138, %f1094; - sub.f32 %f1098, %f138, %f1097; - add.f32 %f1099, %f1094, %f1098; - add.f32 %f1100, %f1096, %f1099; - add.f32 %f1101, %f139, %f1100; - add.f32 %f1102, %f1097, %f1101; - sub.f32 %f1103, %f1097, %f1102; - add.f32 %f1104, %f1101, %f1103; - mul.rn.f32 %f1106, %f841, %f1102; - neg.f32 %f1107, %f1106; - fma.rn.f32 %f1108, %f841, %f1102, %f1107; - fma.rn.f32 %f1109, %f841, %f1104, %f1108; - fma.rn.f32 %f1111, %f2832, %f1102, %f1109; - add.rn.f32 %f1112, %f1106, %f1111; - neg.f32 %f1113, %f1112; - add.rn.f32 %f1114, %f1106, %f1113; - add.rn.f32 %f1115, %f1114, %f1111; - mov.b32 %r157, %f1112; - setp.eq.s32 %p82, %r157, 1118925336; - add.s32 %r158, %r157, -1; - mov.b32 %f1116, %r158; - add.f32 %f1117, %f1115, 0f37000000; - selp.f32 %f1118, %f1116, %f1112, %p82; - selp.f32 %f186, %f1117, %f1115, %p82; - mul.f32 %f1119, %f1118, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1120, %f1119; - fma.rn.f32 %f1121, %f1120, %f2827, %f1118; - fma.rn.f32 %f1122, %f1120, %f2828, %f1121; - mul.f32 %f1123, %f1122, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1124, %f1123; - add.f32 %f1125, %f1120, 0f00000000; - ex2.approx.f32 %f1126, %f1125; - mul.f32 %f1127, %f1124, %f1126; - setp.lt.f32 %p83, %f1118, 0fC2D20000; - selp.f32 %f1128, 0f00000000, %f1127, %p83; - setp.gt.f32 %p84, %f1118, 0f42D20000; - selp.f32 %f3067, 0f7F800000, %f1128, %p84; - setp.eq.f32 %p85, %f3067, 0f7F800000; - @%p85 bra BB5_59; - - fma.rn.f32 %f3067, %f3067, %f186, %f3067; - -BB5_59: - setp.geu.f32 %p347, %f133, 0f00000000; - mov.b32 %r159, %f3067; - xor.b32 %r160, %r159, -2147483648; - mov.b32 %f1129, %r160; - selp.f32 %f190, %f1129, %f3067, %p2; - setp.eq.f32 %p86, %f133, 0f00000000; - selp.f32 %f3068, %f140, %f190, %p86; - @%p347 bra BB5_61; - - cvt.rzi.f32.f32 %f1131, %f841; - setp.neu.f32 %p87, %f1131, 0f40000000; - selp.f32 %f3068, 0f7FFFFFFF, %f190, %p87; - -BB5_61: - abs.f32 %f2849, %f133; - add.f32 %f2848, %f2849, 0f40000000; - mov.b32 %r288, %f2848; - cvt.rn.f32.s32 %f2847, %r316; - cvt.rn.f32.s32 %f2846, %r317; - mov.f32 %f2845, 0f35BFBE8E; - mov.f32 %f2844, 0f3F317200; - add.f32 %f2843, %f2847, 0f3F800000; - sub.f32 %f2842, %f2843, %f3102; - sub.f32 %f2841, %f2847, %f3102; - mov.f32 %f2840, 0f00000000; - mov.f32 %f2839, 0f3DAAAABD; - mov.f32 %f2838, 0f3C4CAF63; - mov.f32 %f2837, 0f3B18F0FE; - mov.f32 %f2836, 0fB5BFBE8E; - mov.f32 %f2835, 0fBF317200; - add.f32 %f1134, %f133, 0f40000000; - setp.gtu.f32 %p88, %f2849, 0f7F800000; - selp.f32 %f1135, %f1134, %f3068, %p88; - selp.f32 %f1136, 0fFF800000, 0f7F800000, %p2; - setp.neu.f32 %p89, %f2849, 0f7F800000; - selp.f32 %f1137, %f1135, %f1136, %p89; - setp.gt.s32 %p90, %r288, 2139095039; - selp.f32 %f1138, %f1137, %f3068, %p90; - mul.f32 %f1139, %f1138, 0fBF000000; - setp.eq.f32 %p91, %f133, 0f3F800000; - selp.f32 %f1140, 0fBF000000, %f1139, %p91; - mul.f32 %f1141, %f1140, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1142, %f1141; - fma.rn.f32 %f1144, %f1142, %f2835, %f1140; - fma.rn.f32 %f1146, %f1142, %f2836, %f1144; - mul.f32 %f1147, %f1146, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1148, %f1147; - add.f32 %f1149, %f1142, 0f00000000; - ex2.approx.f32 %f1150, %f1149; - mul.f32 %f1151, %f1148, %f1150; - setp.lt.f32 %p92, %f1140, 0fC2D20000; - selp.f32 %f1152, 0f00000000, %f1151, %p92; - setp.gt.f32 %p93, %f1140, 0f42D20000; - selp.f32 %f1153, 0f7F800000, %f1152, %p93; - sub.f32 %f1154, %f185, %f1153; - mul.f32 %f1155, %f96, %f1154; - mul.f32 %f194, %f173, %f1155; - mul.f32 %f1156, %f2841, %f1153; - mul.f32 %f1157, %f2842, %f185; - sub.f32 %f1158, %f1157, %f1156; - mul.f32 %f1159, %f1158, %f99; - mul.f32 %f195, %f173, %f1159; - add.f32 %f1160, %f2846, 0f3F800000; - sub.f32 %f196, %f1160, %f3101; - div.rn.f32 %f197, %f196, %f3098; - abs.f32 %f198, %f197; - setp.lt.f32 %p94, %f198, 0f00800000; - mul.f32 %f1161, %f198, 0f4B800000; - selp.f32 %f1162, 0fC3170000, 0fC2FE0000, %p94; - selp.f32 %f1163, %f1161, %f198, %p94; - mov.b32 %r161, %f1163; - and.b32 %r162, %r161, 8388607; - or.b32 %r163, %r162, 1065353216; - mov.b32 %f1164, %r163; - shr.u32 %r164, %r161, 23; - cvt.rn.f32.u32 %f1165, %r164; - add.f32 %f1166, %f1162, %f1165; - setp.gt.f32 %p95, %f1164, 0f3FB504F3; - mul.f32 %f1167, %f1164, 0f3F000000; - add.f32 %f1168, %f1166, 0f3F800000; - selp.f32 %f1169, %f1167, %f1164, %p95; - selp.f32 %f1170, %f1168, %f1166, %p95; - add.f32 %f199, %f1169, 0fBF800000; - add.f32 %f1133, %f1169, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1132,%f1133; - // inline asm - add.f32 %f201, %f199, %f199; - mul.f32 %f1171, %f1132, %f201; - mul.f32 %f1172, %f1171, %f1171; - fma.rn.f32 %f1175, %f2837, %f1172, %f2838; - fma.rn.f32 %f1177, %f1175, %f1172, %f2839; - mul.rn.f32 %f1178, %f1177, %f1172; - mul.rn.f32 %f1179, %f1178, %f1171; - sub.f32 %f1180, %f199, %f1171; - neg.f32 %f1181, %f1171; - add.f32 %f1182, %f1180, %f1180; - fma.rn.f32 %f1183, %f1181, %f199, %f1182; - mul.rn.f32 %f1184, %f1132, %f1183; - add.f32 %f1185, %f1179, %f1171; - sub.f32 %f1186, %f1171, %f1185; - add.f32 %f1187, %f1179, %f1186; - add.f32 %f1188, %f1184, %f1187; - add.f32 %f1189, %f1185, %f1188; - sub.f32 %f1190, %f1185, %f1189; - add.f32 %f1191, %f1188, %f1190; - mul.rn.f32 %f202, %f1170, %f2844; - mul.rn.f32 %f203, %f1170, %f2845; - add.f32 %f1194, %f202, %f1189; - sub.f32 %f1195, %f202, %f1194; - add.f32 %f1196, %f1189, %f1195; - add.f32 %f1197, %f1191, %f1196; - add.f32 %f1198, %f203, %f1197; - add.f32 %f1199, %f1194, %f1198; - sub.f32 %f1200, %f1194, %f1199; - add.f32 %f1201, %f1198, %f1200; - mul.rn.f32 %f1203, %f841, %f1199; - neg.f32 %f1204, %f1203; - fma.rn.f32 %f1205, %f841, %f1199, %f1204; - fma.rn.f32 %f1206, %f841, %f1201, %f1205; - fma.rn.f32 %f1208, %f2840, %f1199, %f1206; - add.rn.f32 %f1209, %f1203, %f1208; - neg.f32 %f1210, %f1209; - add.rn.f32 %f1211, %f1203, %f1210; - add.rn.f32 %f1212, %f1211, %f1208; - mov.b32 %r165, %f1209; - setp.eq.s32 %p96, %r165, 1118925336; - add.s32 %r166, %r165, -1; - mov.b32 %f1213, %r166; - add.f32 %f1214, %f1212, 0f37000000; - selp.f32 %f1215, %f1213, %f1209, %p96; - selp.f32 %f204, %f1214, %f1212, %p96; - mul.f32 %f1216, %f1215, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1217, %f1216; - fma.rn.f32 %f1218, %f1217, %f2835, %f1215; - fma.rn.f32 %f1219, %f1217, %f2836, %f1218; - mul.f32 %f1220, %f1219, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1221, %f1220; - add.f32 %f1222, %f1217, 0f00000000; - ex2.approx.f32 %f1223, %f1222; - mul.f32 %f1224, %f1221, %f1223; - setp.lt.f32 %p97, %f1215, 0fC2D20000; - selp.f32 %f1225, 0f00000000, %f1224, %p97; - setp.gt.f32 %p98, %f1215, 0f42D20000; - selp.f32 %f3069, 0f7F800000, %f1225, %p98; - setp.eq.f32 %p99, %f3069, 0f7F800000; - @%p99 bra BB5_63; - - fma.rn.f32 %f3069, %f3069, %f204, %f3069; - -BB5_63: - setp.lt.f32 %p100, %f197, 0f00000000; - and.pred %p5, %p100, %p55; - mov.b32 %r167, %f3069; - xor.b32 %r168, %r167, -2147483648; - mov.b32 %f1226, %r168; - selp.f32 %f3071, %f1226, %f3069, %p5; - setp.eq.f32 %p102, %f197, 0f00000000; - @%p102 bra BB5_66; - bra.uni BB5_64; - -BB5_66: - add.f32 %f1229, %f197, %f197; - selp.f32 %f3071, %f1229, 0f00000000, %p55; - bra.uni BB5_67; - -BB5_64: - setp.geu.f32 %p103, %f197, 0f00000000; - @%p103 bra BB5_67; - - cvt.rzi.f32.f32 %f1228, %f841; - setp.neu.f32 %p104, %f1228, 0f40000000; - selp.f32 %f3071, 0f7FFFFFFF, %f3071, %p104; - -BB5_67: - abs.f32 %f2850, %f197; - add.f32 %f1230, %f2850, 0f40000000; - mov.b32 %r36, %f1230; - setp.lt.s32 %p106, %r36, 2139095040; - @%p106 bra BB5_72; - - abs.f32 %f2859, %f197; - setp.gtu.f32 %p107, %f2859, 0f7F800000; - @%p107 bra BB5_71; - bra.uni BB5_69; - -BB5_71: - add.f32 %f3071, %f197, 0f40000000; - bra.uni BB5_72; - -BB5_69: - abs.f32 %f2860, %f197; - setp.neu.f32 %p108, %f2860, 0f7F800000; - @%p108 bra BB5_72; - - selp.f32 %f3071, 0fFF800000, 0f7F800000, %p5; - -BB5_72: - cvt.rn.f32.s32 %f2872, %r317; - sub.f32 %f2871, %f2872, %f3101; - mov.f32 %f2858, 0f35BFBE8E; - mov.f32 %f2857, 0f3F317200; - mov.f32 %f2856, 0f00000000; - mov.f32 %f2855, 0f3DAAAABD; - mov.f32 %f2854, 0f3C4CAF63; - mov.f32 %f2853, 0f3B18F0FE; - mov.f32 %f2852, 0fB5BFBE8E; - mov.f32 %f2851, 0fBF317200; - mul.f32 %f1233, %f3071, 0fBF000000; - setp.eq.f32 %p109, %f197, 0f3F800000; - selp.f32 %f1234, 0fBF000000, %f1233, %p109; - mul.f32 %f1235, %f1234, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1236, %f1235; - fma.rn.f32 %f1238, %f1236, %f2851, %f1234; - fma.rn.f32 %f1240, %f1236, %f2852, %f1238; - mul.f32 %f1241, %f1240, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1242, %f1241; - add.f32 %f1243, %f1236, 0f00000000; - ex2.approx.f32 %f1244, %f1243; - mul.f32 %f1245, %f1242, %f1244; - setp.lt.f32 %p110, %f1234, 0fC2D20000; - selp.f32 %f1246, 0f00000000, %f1245, %p110; - setp.gt.f32 %p111, %f1234, 0f42D20000; - selp.f32 %f215, 0f7F800000, %f1246, %p111; - div.rn.f32 %f216, %f2871, %f3098; - abs.f32 %f217, %f216; - setp.lt.f32 %p112, %f217, 0f00800000; - mul.f32 %f1247, %f217, 0f4B800000; - selp.f32 %f1248, 0fC3170000, 0fC2FE0000, %p112; - selp.f32 %f1249, %f1247, %f217, %p112; - mov.b32 %r169, %f1249; - and.b32 %r170, %r169, 8388607; - or.b32 %r171, %r170, 1065353216; - mov.b32 %f1250, %r171; - shr.u32 %r172, %r169, 23; - cvt.rn.f32.u32 %f1251, %r172; - add.f32 %f1252, %f1248, %f1251; - setp.gt.f32 %p113, %f1250, 0f3FB504F3; - mul.f32 %f1253, %f1250, 0f3F000000; - add.f32 %f1254, %f1252, 0f3F800000; - selp.f32 %f1255, %f1253, %f1250, %p113; - selp.f32 %f1256, %f1254, %f1252, %p113; - add.f32 %f218, %f1255, 0fBF800000; - add.f32 %f1232, %f1255, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1231,%f1232; - // inline asm - add.f32 %f220, %f218, %f218; - mul.f32 %f1257, %f1231, %f220; - mul.f32 %f1258, %f1257, %f1257; - fma.rn.f32 %f1261, %f2853, %f1258, %f2854; - fma.rn.f32 %f1263, %f1261, %f1258, %f2855; - mul.rn.f32 %f1264, %f1263, %f1258; - mul.rn.f32 %f1265, %f1264, %f1257; - sub.f32 %f1266, %f218, %f1257; - neg.f32 %f1267, %f1257; - add.f32 %f1268, %f1266, %f1266; - fma.rn.f32 %f1269, %f1267, %f218, %f1268; - mul.rn.f32 %f1270, %f1231, %f1269; - add.f32 %f1271, %f1265, %f1257; - sub.f32 %f1272, %f1257, %f1271; - add.f32 %f1273, %f1265, %f1272; - add.f32 %f1274, %f1270, %f1273; - add.f32 %f1275, %f1271, %f1274; - sub.f32 %f1276, %f1271, %f1275; - add.f32 %f1277, %f1274, %f1276; - mul.rn.f32 %f221, %f1256, %f2857; - mul.rn.f32 %f222, %f1256, %f2858; - add.f32 %f1280, %f221, %f1275; - sub.f32 %f1281, %f221, %f1280; - add.f32 %f1282, %f1275, %f1281; - add.f32 %f1283, %f1277, %f1282; - add.f32 %f1284, %f222, %f1283; - add.f32 %f1285, %f1280, %f1284; - sub.f32 %f1286, %f1280, %f1285; + mul.rn.f32 %f1019, %f990, %f2698; + mul.rn.f32 %f1021, %f990, %f2699; + add.f32 %f1022, %f1019, %f1015; + sub.f32 %f1023, %f1019, %f1022; + add.f32 %f1024, %f1015, %f1023; + add.f32 %f1025, %f1017, %f1024; + add.f32 %f1026, %f1021, %f1025; + add.f32 %f1027, %f1022, %f1026; + sub.f32 %f1028, %f1022, %f1027; + add.f32 %f1029, %f1026, %f1028; + mul.rn.f32 %f1030, %f588, %f1027; + neg.f32 %f1031, %f1030; + fma.rn.f32 %f1032, %f588, %f1027, %f1031; + fma.rn.f32 %f1033, %f588, %f1029, %f1032; + mov.f32 %f1034, 0f00000000; + fma.rn.f32 %f1035, %f1034, %f1027, %f1033; + add.rn.f32 %f1036, %f1030, %f1035; + neg.f32 %f1037, %f1036; + add.rn.f32 %f1038, %f1030, %f1037; + add.rn.f32 %f1039, %f1038, %f1035; + mov.b32 %r303, %f1036; + setp.eq.s32 %p190, %r303, 1118925336; + add.s32 %r304, %r303, -1; + mov.b32 %f1040, %r304; + add.f32 %f1041, %f1039, 0f37000000; + selp.f32 %f150, %f1041, %f1039, %p190; + selp.f32 %f1042, %f1040, %f1036, %p190; + mul.rn.f32 %f1044, %f1042, %f2691; + cvt.rzi.f32.f32 %f1045, %f1044; + abs.f32 %f1046, %f1045; + setp.gt.f32 %p191, %f1046, 0f42FC0000; + mov.b32 %r305, %f1045; + and.b32 %r306, %r305, -2147483648; + or.b32 %r307, %r306, 1123811328; + mov.b32 %f1047, %r307; + selp.f32 %f1048, %f1047, %f1045, %p191; + fma.rn.f32 %f1050, %f1048, %f2700, %f1042; + fma.rn.f32 %f1052, %f1048, %f2701, %f1050; + mul.f32 %f1053, %f1052, 0f3FB8AA3B; + add.f32 %f1054, %f1048, 0f4B40007F; + mov.b32 %r308, %f1054; + shl.b32 %r309, %r308, 23; + mov.b32 %f1055, %r309; + ex2.approx.ftz.f32 %f1056, %f1053; + mul.f32 %f151, %f1056, %f1055; + setp.eq.f32 %p192, %f151, 0f7F800000; + mov.f32 %f2857, 0f7F800000; + @%p192 bra $L__BB5_88; + + fma.rn.f32 %f2857, %f151, %f150, %f151; + +$L__BB5_88: + setp.lt.f32 %p193, %f148, 0f00000000; + and.pred %p20, %p193, %p106; + setp.eq.f32 %p195, %f148, 0f00000000; + @%p195 bra $L__BB5_92; + bra.uni $L__BB5_89; + +$L__BB5_92: + add.f32 %f1061, %f148, %f148; + selp.f32 %f2859, %f1061, 0f00000000, %p106; + bra.uni $L__BB5_93; + +$L__BB5_89: + mov.b32 %r310, %f2857; + xor.b32 %r311, %r310, -2147483648; + mov.b32 %f1057, %r311; + selp.f32 %f2859, %f1057, %f2857, %p20; + setp.geu.f32 %p196, %f148, 0f00000000; + @%p196 bra $L__BB5_93; + + cvt.rzi.f32.f32 %f1059, %f588; + setp.eq.f32 %p197, %f1059, 0f40000000; + @%p197 bra $L__BB5_93; + + mov.f32 %f2859, 0f7FFFFFFF; + +$L__BB5_93: + abs.f32 %f2781, %f148; + add.f32 %f1062, %f2781, 0f40000000; + mov.b32 %r312, %f1062; + setp.lt.s32 %p199, %r312, 2139095040; + @%p199 bra $L__BB5_98; + + abs.f32 %f2786, %f148; + setp.gtu.f32 %p200, %f2786, 0f7F800000; + @%p200 bra $L__BB5_97; + bra.uni $L__BB5_95; + +$L__BB5_97: + add.f32 %f2859, %f148, 0f40000000; + bra.uni $L__BB5_98; + +$L__BB5_95: + abs.f32 %f2787, %f148; + setp.neu.f32 %p201, %f2787, 0f7F800000; + @%p201 bra $L__BB5_98; + + selp.f32 %f2859, 0fFF800000, 0f7F800000, %p20; + +$L__BB5_98: + mov.f32 %f2715, 0f00000000; + mov.f32 %f2714, 0f3102E308; + mov.f32 %f2713, 0fBF317218; + mov.f32 %f2712, 0f35BFBE8E; + mov.f32 %f2711, 0f3F317200; + mov.f32 %f2710, 0f3DAAAABD; + mov.f32 %f2709, 0f3C4CAF63; + mov.f32 %f2708, 0f3B18F0FE; + mov.f32 %f2707, 0f32A57060; + mov.f32 %f2706, 0f4B400001; + mov.f32 %f2705, 0f437C0000; + mov.f32 %f2704, 0f3BBB989D; + mov.f32 %f2703, 0f3FB8AA3B; + mov.f32 %f2702, 0f3F000000; + mul.f32 %f1064, %f2859, 0fBF000000; + setp.eq.f32 %p202, %f148, 0f3F800000; + selp.f32 %f1065, 0fBF000000, %f1064, %p202; + fma.rn.f32 %f1068, %f1065, %f2704, %f2702; + cvt.sat.f32.f32 %f1071, %f1068; + fma.rm.f32 %f1073, %f1071, %f2705, %f2706; + add.f32 %f1074, %f1073, 0fCB40007F; + neg.f32 %f1075, %f1074; + fma.rn.f32 %f1076, %f1065, %f2703, %f1075; + fma.rn.f32 %f1078, %f1065, %f2707, %f1076; + mov.b32 %r313, %f1073; + shl.b32 %r314, %r313, 23; + mov.b32 %f1079, %r314; + ex2.approx.ftz.f32 %f1080, %f1078; + mul.f32 %f160, %f1080, %f1079; + div.rn.f32 %f161, %f124, %f2886; + abs.f32 %f162, %f161; + setp.lt.f32 %p203, %f162, 0f00800000; + mul.f32 %f1081, %f162, 0f4B800000; + selp.f32 %f1082, %f1081, %f162, %p203; + selp.f32 %f1083, 0fC3170000, 0fC2FE0000, %p203; + mov.b32 %r315, %f1082; + and.b32 %r316, %r315, 8388607; + or.b32 %r317, %r316, 1065353216; + mov.b32 %f1084, %r317; + shr.u32 %r318, %r315, 23; + cvt.rn.f32.u32 %f1085, %r318; + add.f32 %f1086, %f1083, %f1085; + setp.gt.f32 %p204, %f1084, 0f3FB504F3; + mul.f32 %f1087, %f1084, 0f3F000000; + add.f32 %f1088, %f1086, 0f3F800000; + selp.f32 %f1089, %f1088, %f1086, %p204; + selp.f32 %f1090, %f1087, %f1084, %p204; + add.f32 %f1091, %f1090, 0fBF800000; + add.f32 %f1092, %f1090, 0f3F800000; + rcp.approx.ftz.f32 %f1093, %f1092; + add.f32 %f1094, %f1091, %f1091; + mul.f32 %f1096, %f1094, %f1093; + mul.f32 %f1097, %f1096, %f1096; + fma.rn.f32 %f1100, %f2708, %f1097, %f2709; + fma.rn.f32 %f1102, %f1100, %f1097, %f2710; + mul.rn.f32 %f1103, %f1102, %f1097; + mul.rn.f32 %f1104, %f1103, %f1096; + sub.f32 %f1105, %f1091, %f1096; + add.f32 %f1106, %f1105, %f1105; + neg.f32 %f1107, %f1096; + fma.rn.f32 %f1108, %f1107, %f1091, %f1106; + mul.rn.f32 %f1109, %f1093, %f1108; + add.f32 %f1110, %f1104, %f1096; + sub.f32 %f1111, %f1096, %f1110; + add.f32 %f1112, %f1104, %f1111; + add.f32 %f1113, %f1109, %f1112; + add.f32 %f1114, %f1110, %f1113; + sub.f32 %f1115, %f1110, %f1114; + add.f32 %f1116, %f1113, %f1115; + mul.rn.f32 %f1118, %f1089, %f2711; + mul.rn.f32 %f1120, %f1089, %f2712; + add.f32 %f1121, %f1118, %f1114; + sub.f32 %f1122, %f1118, %f1121; + add.f32 %f1123, %f1114, %f1122; + add.f32 %f1124, %f1116, %f1123; + add.f32 %f1125, %f1120, %f1124; + add.f32 %f1126, %f1121, %f1125; + sub.f32 %f1127, %f1121, %f1126; + add.f32 %f1128, %f1125, %f1127; + mul.rn.f32 %f1129, %f588, %f1126; + neg.f32 %f1130, %f1129; + fma.rn.f32 %f1131, %f588, %f1126, %f1130; + fma.rn.f32 %f1132, %f588, %f1128, %f1131; + fma.rn.f32 %f1134, %f2715, %f1126, %f1132; + add.rn.f32 %f1135, %f1129, %f1134; + neg.f32 %f1136, %f1135; + add.rn.f32 %f1137, %f1129, %f1136; + add.rn.f32 %f1138, %f1137, %f1134; + mov.b32 %r319, %f1135; + setp.eq.s32 %p205, %r319, 1118925336; + add.s32 %r320, %r319, -1; + mov.b32 %f1139, %r320; + add.f32 %f1140, %f1138, 0f37000000; + selp.f32 %f163, %f1140, %f1138, %p205; + selp.f32 %f1141, %f1139, %f1135, %p205; + mul.rn.f32 %f1142, %f1141, %f2703; + cvt.rzi.f32.f32 %f1143, %f1142; + abs.f32 %f1144, %f1143; + setp.gt.f32 %p206, %f1144, 0f42FC0000; + mov.b32 %r321, %f1143; + and.b32 %r322, %r321, -2147483648; + or.b32 %r323, %r322, 1123811328; + mov.b32 %f1145, %r323; + selp.f32 %f1146, %f1145, %f1143, %p206; + fma.rn.f32 %f1148, %f1146, %f2713, %f1141; + fma.rn.f32 %f1150, %f1146, %f2714, %f1148; + mul.f32 %f1151, %f1150, 0f3FB8AA3B; + add.f32 %f1152, %f1146, 0f4B40007F; + mov.b32 %r324, %f1152; + shl.b32 %r325, %r324, 23; + mov.b32 %f1153, %r325; + ex2.approx.ftz.f32 %f1154, %f1151; + mul.f32 %f164, %f1154, %f1153; + setp.eq.f32 %p207, %f164, 0f7F800000; + mov.f32 %f2860, 0f7F800000; + @%p207 bra $L__BB5_100; + + fma.rn.f32 %f2860, %f164, %f163, %f164; + +$L__BB5_100: + setp.lt.f32 %p208, %f161, 0f00000000; + and.pred %p21, %p208, %p106; + setp.eq.f32 %p210, %f161, 0f00000000; + @%p210 bra $L__BB5_104; + bra.uni $L__BB5_101; + +$L__BB5_104: + add.f32 %f1159, %f161, %f161; + selp.f32 %f2862, %f1159, 0f00000000, %p106; + bra.uni $L__BB5_105; + +$L__BB5_101: + mov.b32 %r326, %f2860; + xor.b32 %r327, %r326, -2147483648; + mov.b32 %f1155, %r327; + selp.f32 %f2862, %f1155, %f2860, %p21; + setp.geu.f32 %p211, %f161, 0f00000000; + @%p211 bra $L__BB5_105; + + cvt.rzi.f32.f32 %f1157, %f588; + setp.eq.f32 %p212, %f1157, 0f40000000; + @%p212 bra $L__BB5_105; + + mov.f32 %f2862, 0f7FFFFFFF; + +$L__BB5_105: + abs.f32 %f2788, %f161; + add.f32 %f1160, %f2788, 0f40000000; + mov.b32 %r328, %f1160; + setp.lt.s32 %p214, %r328, 2139095040; + @%p214 bra $L__BB5_110; + + abs.f32 %f2789, %f161; + setp.gtu.f32 %p215, %f2789, 0f7F800000; + @%p215 bra $L__BB5_109; + bra.uni $L__BB5_107; + +$L__BB5_109: + add.f32 %f2862, %f161, 0f40000000; + bra.uni $L__BB5_110; + +$L__BB5_107: + abs.f32 %f2790, %f161; + setp.neu.f32 %p216, %f2790, 0f7F800000; + @%p216 bra $L__BB5_110; + + selp.f32 %f2862, 0fFF800000, 0f7F800000, %p21; + +$L__BB5_110: + mov.f32 %f2721, 0f32A57060; + mov.f32 %f2720, 0f4B400001; + mov.f32 %f2719, 0f437C0000; + mov.f32 %f2718, 0f3BBB989D; + mov.f32 %f2717, 0f3FB8AA3B; + mov.f32 %f2716, 0f3F000000; + mul.f32 %f1161, %f2862, 0fBF000000; + setp.eq.f32 %p217, %f161, 0f3F800000; + selp.f32 %f1162, 0fBF000000, %f1161, %p217; + fma.rn.f32 %f1165, %f1162, %f2718, %f2716; + cvt.sat.f32.f32 %f1168, %f1165; + fma.rm.f32 %f1170, %f1168, %f2719, %f2720; + add.f32 %f1171, %f1170, 0fCB40007F; + neg.f32 %f1172, %f1171; + fma.rn.f32 %f1173, %f1162, %f2717, %f1172; + fma.rn.f32 %f1175, %f1162, %f2721, %f1173; + mov.b32 %r329, %f1170; + shl.b32 %r330, %r329, 23; + mov.b32 %f1176, %r330; + ex2.approx.ftz.f32 %f1177, %f1175; + mul.f32 %f173, %f1177, %f1176; + sub.f32 %f1178, %f160, %f173; + mul.f32 %f1179, %f60, %f1178; + mul.f32 %f174, %f116, %f1179; + mov.f64 %fd513, %fd37; + @%p176 bra $L__BB5_112; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r331}, %fd37; + } + xor.b32 %r332, %r331, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r333, %temp}, %fd37; + } + mov.b64 %fd513, {%r333, %r332}; + +$L__BB5_112: + setp.eq.f32 %p676, %f2886, 0f00000000; + @%p676 bra $L__BB5_116; + bra.uni $L__BB5_113; + +$L__BB5_116: + mov.u32 %r334, 0; + mov.b64 %fd513, {%r334, %r56}; + bra.uni $L__BB5_117; + +$L__BB5_113: + setp.gt.s32 %p220, %r53, -1; + @%p220 bra $L__BB5_117; + + cvt.rzi.f64.f64 %fd327, %fd309; + setp.eq.f64 %p221, %fd327, 0d4008000000000000; + @%p221 bra $L__BB5_117; + + mov.f64 %fd513, 0dFFF8000000000000; + +$L__BB5_117: + selp.f64 %fd514, %fd513, %fd35, %p120; + @%p14 bra $L__BB5_122; + + setp.eq.s32 %p223, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r335, %temp}, %fd309; + } + setp.eq.s32 %p224, %r335, 0; + and.pred %p225, %p223, %p224; + @%p225 bra $L__BB5_121; + bra.uni $L__BB5_119; + +$L__BB5_121: + mov.u32 %r339, 0; + mov.b64 %fd514, {%r339, %r59}; + bra.uni $L__BB5_122; + +$L__BB5_119: + and.b32 %r336, %r53, 2147483647; + setp.ne.s32 %p226, %r336, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r337, %temp}, %fd307; + } + setp.ne.s32 %p227, %r337, 0; + or.pred %p228, %p226, %p227; + mov.f64 %fd514, %fd513; + @%p228 bra $L__BB5_122; + + mov.u32 %r338, 0; + mov.b64 %fd514, {%r338, %r62}; + +$L__BB5_122: + cvt.rn.f32.s32 %f2784, %r786; + add.f32 %f2783, %f2784, 0f3F000000; + sub.f32 %f2782, %f2783, %f2889; + setp.eq.f32 %p677, %f2886, 0f3F800000; + selp.f64 %fd331, 0d3FF0000000000000, %fd514, %p677; + div.rn.f64 %fd332, %fd34, %fd331; + mul.f32 %f1181, %f124, %f173; + mul.f32 %f1182, %f2782, %f160; + sub.f32 %f1183, %f1182, %f1181; + cvt.f64.f32 %fd333, %f1183; + mul.f64 %fd334, %fd332, %fd333; + cvt.f64.f32 %fd335, %f116; + mul.f64 %fd336, %fd334, %fd335; + cvt.rn.f32.f64 %f175, %fd336; + setp.eq.f32 %p230, %f97, 0f7F800000; + mov.f32 %f2863, 0f7F800000; + @%p230 bra $L__BB5_124; + + fma.rn.f32 %f2863, %f97, %f96, %f97; + +$L__BB5_124: + setp.geu.f32 %p668, %f94, 0f00000000; + setp.lt.f32 %p667, %f94, 0f00000000; + and.pred %p666, %p667, %p106; + mov.b32 %r340, %f2863; + xor.b32 %r341, %r340, -2147483648; + mov.b32 %f1184, %r341; + selp.f32 %f178, %f1184, %f2863, %p666; + add.f32 %f1185, %f94, %f94; + selp.f32 %f1186, %f1185, 0f00000000, %p106; + setp.eq.f32 %p232, %f94, 0f00000000; + selp.f32 %f2864, %f1186, %f178, %p232; + @%p668 bra $L__BB5_127; + + cvt.rzi.f32.f32 %f1188, %f588; + setp.eq.f32 %p233, %f1188, 0f40000000; + mov.f32 %f2864, %f178; + @%p233 bra $L__BB5_127; + + mov.f32 %f2864, 0f7FFFFFFF; + +$L__BB5_127: + abs.f32 %f2728, %f94; + setp.lt.f32 %p670, %f94, 0f00000000; + and.pred %p669, %p670, %p106; + mov.f32 %f2727, 0f32A57060; + mov.f32 %f2726, 0f4B400001; + mov.f32 %f2725, 0f437C0000; + mov.f32 %f2724, 0f3BBB989D; + mov.f32 %f2723, 0f3FB8AA3B; + mov.f32 %f2722, 0f3F000000; + add.f32 %f1191, %f2728, 0f40000000; + mov.b32 %r342, %f1191; + setp.gt.s32 %p234, %r342, 2139095039; + add.f32 %f1192, %f94, 0f40000000; + setp.gtu.f32 %p235, %f2728, 0f7F800000; + mov.f32 %f2865, 0f7F800000; + selp.f32 %f1193, %f1192, %f2864, %p235; + selp.f32 %f1194, 0fFF800000, 0f7F800000, %p669; + setp.neu.f32 %p236, %f2728, 0f7F800000; + selp.f32 %f1195, %f1193, %f1194, %p236; + selp.f32 %f1196, %f1195, %f2864, %p234; + mul.f32 %f1197, %f1196, 0fBF000000; + setp.eq.f32 %p237, %f94, 0f3F800000; + selp.f32 %f1198, 0fBF000000, %f1197, %p237; + fma.rn.f32 %f1201, %f1198, %f2724, %f2722; + cvt.sat.f32.f32 %f1204, %f1201; + fma.rm.f32 %f1206, %f1204, %f2725, %f2726; + add.f32 %f1207, %f1206, 0fCB40007F; + neg.f32 %f1208, %f1207; + fma.rn.f32 %f1209, %f1198, %f2723, %f1208; + fma.rn.f32 %f1211, %f1198, %f2727, %f1209; + mov.b32 %r343, %f1206; + shl.b32 %r344, %r343, 23; + mov.b32 %f1212, %r344; + ex2.approx.ftz.f32 %f1213, %f1211; + mul.f32 %f181, %f1213, %f1212; + setp.eq.f32 %p238, %f101, 0f7F800000; + @%p238 bra $L__BB5_129; + + fma.rn.f32 %f2865, %f101, %f100, %f101; + +$L__BB5_129: + setp.geu.f32 %p673, %f98, 0f00000000; + setp.lt.f32 %p672, %f98, 0f00000000; + and.pred %p671, %p672, %p106; + mov.b32 %r345, %f2865; + xor.b32 %r346, %r345, -2147483648; + mov.b32 %f1214, %r346; + selp.f32 %f184, %f1214, %f2865, %p671; + add.f32 %f1215, %f98, %f98; + selp.f32 %f1216, %f1215, 0f00000000, %p106; + setp.eq.f32 %p240, %f98, 0f00000000; + selp.f32 %f2866, %f1216, %f184, %p240; + @%p673 bra $L__BB5_132; + + cvt.rzi.f32.f32 %f1218, %f588; + setp.eq.f32 %p241, %f1218, 0f40000000; + mov.f32 %f2866, %f184; + @%p241 bra $L__BB5_132; + + mov.f32 %f2866, 0f7FFFFFFF; + +$L__BB5_132: + cvt.rn.f32.s32 %f2737, %r785; + sub.f32 %f2736, %f2737, %f2890; + abs.f32 %f2735, %f98; + setp.lt.f32 %p675, %f98, 0f00000000; + and.pred %p674, %p675, %p106; + mov.f32 %f2734, 0f32A57060; + mov.f32 %f2733, 0f4B400001; + mov.f32 %f2732, 0f437C0000; + mov.f32 %f2731, 0f3BBB989D; + mov.f32 %f2730, 0f3FB8AA3B; + mov.f32 %f2729, 0f3F000000; + add.f32 %f1220, %f2735, 0f40000000; + mov.b32 %r347, %f1220; + setp.gt.s32 %p242, %r347, 2139095039; + add.f32 %f1221, %f98, 0f40000000; + setp.gtu.f32 %p243, %f2735, 0f7F800000; + selp.f32 %f1222, %f1221, %f2866, %p243; + selp.f32 %f1223, 0fFF800000, 0f7F800000, %p674; + setp.neu.f32 %p244, %f2735, 0f7F800000; + selp.f32 %f1224, %f1222, %f1223, %p244; + selp.f32 %f1225, %f1224, %f2866, %p242; + mul.f32 %f1226, %f1225, 0fBF000000; + setp.eq.f32 %p245, %f98, 0f3F800000; + selp.f32 %f1227, 0fBF000000, %f1226, %p245; + fma.rn.f32 %f1230, %f1227, %f2731, %f2729; + cvt.sat.f32.f32 %f1233, %f1230; + fma.rm.f32 %f1235, %f1233, %f2732, %f2733; + add.f32 %f1236, %f1235, 0fCB40007F; + neg.f32 %f1237, %f1236; + fma.rn.f32 %f1238, %f1227, %f2730, %f1237; + fma.rn.f32 %f1240, %f1227, %f2734, %f1238; + mov.b32 %r348, %f1235; + shl.b32 %r349, %r348, 23; + mov.b32 %f1241, %r349; + ex2.approx.ftz.f32 %f1242, %f1240; + mul.f32 %f187, %f1242, %f1241; + add.f32 %f1243, %f2736, 0f3F800000; + mul.f32 %f1244, %f1243, %f181; + mul.f32 %f1245, %f2736, %f187; + sub.f32 %f1246, %f1244, %f1245; + mul.f32 %f1247, %f61, %f1246; + mul.f32 %f188, %f129, %f1247; + not.pred %p246, %p9; + mov.f64 %fd516, %fd38; + @%p246 bra $L__BB5_134; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r350}, %fd38; + } + xor.b32 %r351, %r350, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r352, %temp}, %fd38; + } + mov.b64 %fd516, {%r352, %r351}; + +$L__BB5_134: + setp.eq.f32 %p678, %f2886, 0f00000000; + @%p678 bra $L__BB5_138; + bra.uni $L__BB5_135; + +$L__BB5_138: + mov.u32 %r353, 0; + mov.b64 %fd516, {%r353, %r63}; + bra.uni $L__BB5_139; + +$L__BB5_135: + setp.gt.s32 %p248, %r53, -1; + @%p248 bra $L__BB5_139; + + mov.f64 %fd483, 0d4014000000000000; + cvt.rzi.f64.f64 %fd338, %fd483; + setp.eq.f64 %p249, %fd338, 0d4014000000000000; + @%p249 bra $L__BB5_139; + + mov.f64 %fd516, 0dFFF8000000000000; + +$L__BB5_139: + cvt.f64.f32 %fd479, %f2886; + add.f64 %fd478, %fd477, 0d4014000000000000; + selp.f64 %fd517, %fd516, %fd478, %p139; + @%p17 bra $L__BB5_144; + + mov.f64 %fd480, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r770}, %fd480; + } + and.b32 %r769, %r770, 2147483647; + setp.eq.s32 %p251, %r769, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r354, %temp}, %fd480; + } + setp.eq.s32 %p252, %r354, 0; + and.pred %p253, %p251, %p252; + @%p253 bra $L__BB5_143; + bra.uni $L__BB5_141; + +$L__BB5_143: + mov.u32 %r358, 0; + mov.b64 %fd517, {%r358, %r67}; + bra.uni $L__BB5_144; + +$L__BB5_141: + and.b32 %r355, %r53, 2147483647; + setp.ne.s32 %p254, %r355, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r356, %temp}, %fd307; + } + setp.ne.s32 %p255, %r356, 0; + or.pred %p256, %p254, %p255; + mov.f64 %fd517, %fd516; + @%p256 bra $L__BB5_144; + + mov.u32 %r357, 0; + mov.b64 %fd517, {%r357, %r69}; + +$L__BB5_144: + setp.eq.f32 %p679, %f2886, 0f3F800000; + selp.f64 %fd344, 0d3FF0000000000000, %fd517, %p679; + div.rn.f64 %fd70, %fd36, %fd344; + not.pred %p258, %p10; + mov.f64 %fd519, %fd40; + @%p258 bra $L__BB5_146; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r359}, %fd40; + } + xor.b32 %r360, %r359, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r361, %temp}, %fd40; + } + mov.b64 %fd519, {%r361, %r360}; + +$L__BB5_146: + cvt.rn.f32.s32 %f2740, %r785; + sub.f32 %f2739, %f2740, %f2890; + add.f32 %f2738, %f2739, 0f3F000000; + setp.eq.f32 %p259, %f2738, 0f00000000; + @%p259 bra $L__BB5_150; + bra.uni $L__BB5_147; + +$L__BB5_150: + mov.u32 %r362, 0; + selp.b32 %r364, %r65, 0, %p117; + or.b32 %r365, %r364, 2146435072; + selp.b32 %r366, %r365, %r364, %p119; + mov.b64 %fd519, {%r362, %r366}; + bra.uni $L__BB5_151; + +$L__BB5_147: + setp.gt.s32 %p260, %r65, -1; + @%p260 bra $L__BB5_151; + + cvt.rzi.f64.f64 %fd346, %fd309; + setp.eq.f64 %p261, %fd346, 0d4008000000000000; + @%p261 bra $L__BB5_151; + + mov.f64 %fd519, 0dFFF8000000000000; + +$L__BB5_151: + selp.f64 %fd520, %fd519, %fd41, %p141; + @%p18 bra $L__BB5_156; + + setp.eq.s32 %p265, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r367, %temp}, %fd309; + } + setp.eq.s32 %p266, %r367, 0; + and.pred %p267, %p265, %p266; + @%p267 bra $L__BB5_155; + bra.uni $L__BB5_153; + +$L__BB5_155: + mov.u32 %r374, 0; + mov.b64 %fd520, {%r374, %r71}; + bra.uni $L__BB5_156; + +$L__BB5_153: + cvt.rn.f32.s32 %f2743, %r785; + sub.f32 %f2742, %f2743, %f2890; + add.f32 %f2741, %f2742, 0f3F000000; + cvt.f64.f32 %fd481, %f2741; + and.b32 %r368, %r65, 2147483647; + setp.ne.s32 %p268, %r368, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r369, %temp}, %fd481; + } + setp.ne.s32 %p269, %r369, 0; + or.pred %p270, %p268, %p269; + mov.f64 %fd520, %fd519; + @%p270 bra $L__BB5_156; + + and.pred %p272, %p125, %p10; + selp.b32 %r372, %r61, %r60, %p272; + mov.u32 %r373, 0; + mov.b64 %fd520, {%r373, %r372}; + +$L__BB5_156: + cvt.rn.f32.s32 %f2746, %r785; + sub.f32 %f2745, %f2746, %f2890; + add.f32 %f2744, %f2745, 0f3F000000; + setp.eq.f32 %p273, %f2744, 0f3F800000; + selp.f64 %fd349, 0d3FF0000000000000, %fd520, %p273; + cvt.f64.f32 %fd350, %f181; + mul.f64 %fd79, %fd349, %fd350; + not.pred %p274, %p11; + mov.f64 %fd522, %fd43; + @%p274 bra $L__BB5_158; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r375}, %fd43; + } + xor.b32 %r376, %r375, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r377, %temp}, %fd43; + } + mov.b64 %fd522, {%r377, %r376}; + +$L__BB5_158: + setp.eq.f32 %p275, %f81, 0f00000000; + @%p275 bra $L__BB5_162; + bra.uni $L__BB5_159; + +$L__BB5_162: + mov.u32 %r378, 0; + selp.b32 %r380, %r70, 0, %p117; + or.b32 %r381, %r380, 2146435072; + selp.b32 %r382, %r381, %r380, %p119; + mov.b64 %fd522, {%r378, %r382}; + bra.uni $L__BB5_163; + +$L__BB5_159: + setp.gt.s32 %p276, %r70, -1; + @%p276 bra $L__BB5_163; + + cvt.rzi.f64.f64 %fd352, %fd309; + setp.eq.f64 %p277, %fd352, 0d4008000000000000; + @%p277 bra $L__BB5_163; + + mov.f64 %fd522, 0dFFF8000000000000; + +$L__BB5_163: + selp.f64 %fd523, %fd522, %fd44, %p149; + @%p19 bra $L__BB5_168; + + setp.eq.s32 %p281, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r383, %temp}, %fd309; + } + setp.eq.s32 %p282, %r383, 0; + and.pred %p283, %p281, %p282; + @%p283 bra $L__BB5_167; + bra.uni $L__BB5_165; + +$L__BB5_167: + mov.u32 %r390, 0; + mov.b64 %fd523, {%r390, %r73}; + bra.uni $L__BB5_168; + +$L__BB5_165: + cvt.rn.f32.s32 %f2749, %r785; + sub.f32 %f2748, %f2749, %f2890; + add.f32 %f2747, %f2748, 0fBF000000; + cvt.f64.f32 %fd482, %f2747; + and.b32 %r384, %r70, 2147483647; + setp.ne.s32 %p284, %r384, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r385, %temp}, %fd482; + } + setp.ne.s32 %p285, %r385, 0; + or.pred %p286, %p284, %p285; + mov.f64 %fd523, %fd522; + @%p286 bra $L__BB5_168; + + and.pred %p288, %p125, %p11; + selp.b32 %r388, %r61, %r60, %p288; + mov.u32 %r389, 0; + mov.b64 %fd523, {%r389, %r388}; + +$L__BB5_168: + cvt.f64.f32 %fd484, %f129; + cvt.rn.f32.s32 %f2785, %r786; + mov.f32 %f2758, 0f00000000; + mov.f32 %f2757, 0f3102E308; + mov.f32 %f2756, 0fBF317218; + mov.f32 %f2755, 0f35BFBE8E; + mov.f32 %f2754, 0f3F317200; + mov.f32 %f2753, 0f3DAAAABD; + mov.f32 %f2752, 0f3C4CAF63; + mov.f32 %f2751, 0f3B18F0FE; + mov.f32 %f2750, 0f3FB8AA3B; + setp.eq.f32 %p289, %f81, 0f3F800000; + selp.f64 %fd355, 0d3FF0000000000000, %fd523, %p289; + cvt.f64.f32 %fd356, %f187; + mul.f64 %fd357, %fd355, %fd356; + sub.f64 %fd358, %fd79, %fd357; + mul.f64 %fd359, %fd70, %fd358; + mul.f64 %fd360, %fd359, %fd484; + mul.f32 %f1249, %f62, %f188; + cvt.f64.f32 %fd361, %f1249; + sub.f64 %fd362, %fd361, %fd360; + cvt.rn.f32.f64 %f189, %fd362; + add.f32 %f1250, %f2785, 0f3F800000; + sub.f32 %f1251, %f1250, %f2889; + div.rn.f32 %f190, %f1251, %f2886; + abs.f32 %f191, %f190; + setp.lt.f32 %p290, %f191, 0f00800000; + mul.f32 %f1252, %f191, 0f4B800000; + selp.f32 %f1253, %f1252, %f191, %p290; + selp.f32 %f1254, 0fC3170000, 0fC2FE0000, %p290; + mov.b32 %r391, %f1253; + and.b32 %r392, %r391, 8388607; + or.b32 %r393, %r392, 1065353216; + mov.b32 %f1255, %r393; + shr.u32 %r394, %r391, 23; + cvt.rn.f32.u32 %f1256, %r394; + add.f32 %f1257, %f1254, %f1256; + setp.gt.f32 %p291, %f1255, 0f3FB504F3; + mul.f32 %f1258, %f1255, 0f3F000000; + add.f32 %f1259, %f1257, 0f3F800000; + selp.f32 %f1260, %f1259, %f1257, %p291; + selp.f32 %f1261, %f1258, %f1255, %p291; + add.f32 %f1262, %f1261, 0fBF800000; + add.f32 %f1263, %f1261, 0f3F800000; + rcp.approx.ftz.f32 %f1264, %f1263; + add.f32 %f1265, %f1262, %f1262; + mul.f32 %f1267, %f1265, %f1264; + mul.f32 %f1268, %f1267, %f1267; + fma.rn.f32 %f1271, %f2751, %f1268, %f2752; + fma.rn.f32 %f1273, %f1271, %f1268, %f2753; + mul.rn.f32 %f1274, %f1273, %f1268; + mul.rn.f32 %f1275, %f1274, %f1267; + sub.f32 %f1276, %f1262, %f1267; + add.f32 %f1277, %f1276, %f1276; + neg.f32 %f1278, %f1267; + fma.rn.f32 %f1279, %f1278, %f1262, %f1277; + mul.rn.f32 %f1280, %f1264, %f1279; + add.f32 %f1281, %f1275, %f1267; + sub.f32 %f1282, %f1267, %f1281; + add.f32 %f1283, %f1275, %f1282; + add.f32 %f1284, %f1280, %f1283; + add.f32 %f1285, %f1281, %f1284; + sub.f32 %f1286, %f1281, %f1285; add.f32 %f1287, %f1284, %f1286; - mul.rn.f32 %f1289, %f841, %f1285; - neg.f32 %f1290, %f1289; - fma.rn.f32 %f1291, %f841, %f1285, %f1290; - fma.rn.f32 %f1292, %f841, %f1287, %f1291; - fma.rn.f32 %f1294, %f2856, %f1285, %f1292; - add.rn.f32 %f1295, %f1289, %f1294; - neg.f32 %f1296, %f1295; - add.rn.f32 %f1297, %f1289, %f1296; - add.rn.f32 %f1298, %f1297, %f1294; - mov.b32 %r173, %f1295; - setp.eq.s32 %p114, %r173, 1118925336; - add.s32 %r174, %r173, -1; - mov.b32 %f1299, %r174; - add.f32 %f1300, %f1298, 0f37000000; - selp.f32 %f1301, %f1299, %f1295, %p114; - selp.f32 %f223, %f1300, %f1298, %p114; - mul.f32 %f1302, %f1301, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1303, %f1302; - fma.rn.f32 %f1304, %f1303, %f2851, %f1301; - fma.rn.f32 %f1305, %f1303, %f2852, %f1304; - mul.f32 %f1306, %f1305, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1307, %f1306; - add.f32 %f1308, %f1303, 0f00000000; - ex2.approx.f32 %f1309, %f1308; - mul.f32 %f1310, %f1307, %f1309; - setp.lt.f32 %p115, %f1301, 0fC2D20000; - selp.f32 %f1311, 0f00000000, %f1310, %p115; - setp.gt.f32 %p116, %f1301, 0f42D20000; - selp.f32 %f3072, 0f7F800000, %f1311, %p116; - setp.eq.f32 %p117, %f3072, 0f7F800000; - @%p117 bra BB5_74; - - fma.rn.f32 %f3072, %f3072, %f223, %f3072; - -BB5_74: - setp.lt.f32 %p118, %f216, 0f00000000; - and.pred %p6, %p118, %p55; - mov.b32 %r175, %f3072; - xor.b32 %r176, %r175, -2147483648; - mov.b32 %f1312, %r176; - selp.f32 %f3074, %f1312, %f3072, %p6; - setp.eq.f32 %p120, %f216, 0f00000000; - @%p120 bra BB5_77; - bra.uni BB5_75; - -BB5_77: - add.f32 %f1315, %f216, %f216; - selp.f32 %f3074, %f1315, 0f00000000, %p55; - bra.uni BB5_78; - -BB5_75: - setp.geu.f32 %p121, %f216, 0f00000000; - @%p121 bra BB5_78; - - cvt.rzi.f32.f32 %f1314, %f841; - setp.neu.f32 %p122, %f1314, 0f40000000; - selp.f32 %f3074, 0f7FFFFFFF, %f3074, %p122; - -BB5_78: - abs.f32 %f2772, %f216; - add.f32 %f1316, %f2772, 0f40000000; - mov.b32 %r37, %f1316; - setp.lt.s32 %p124, %r37, 2139095040; - @%p124 bra BB5_83; - - abs.f32 %f2869, %f216; - setp.gtu.f32 %p125, %f2869, 0f7F800000; - @%p125 bra BB5_82; - bra.uni BB5_80; - -BB5_82: - add.f32 %f3074, %f216, 0f40000000; - bra.uni BB5_83; - -BB5_80: - abs.f32 %f2870, %f216; - setp.neu.f32 %p126, %f2870, 0f7F800000; - @%p126 bra BB5_83; - - selp.f32 %f3074, 0fFF800000, 0f7F800000, %p6; - -BB5_83: - cvt.rn.f32.s32 %f2874, %r317; - sub.f32 %f2873, %f2874, %f3101; - cvt.rn.f32.s32 %f2781, %r317; - add.f32 %f2780, %f2781, 0f3F800000; - sub.f32 %f2779, %f2780, %f3101; - mov.f32 %f2778, 0f00000000; - mov.f32 %f2777, 0f3DAAAABD; - mov.f32 %f2776, 0f3C4CAF63; - mov.f32 %f2775, 0f3B18F0FE; - mov.f32 %f2774, 0fB5BFBE8E; - mov.f32 %f2773, 0fBF317200; - mul.f32 %f1319, %f3074, 0fBF000000; - setp.eq.f32 %p127, %f216, 0f3F800000; - selp.f32 %f1320, 0fBF000000, %f1319, %p127; - mul.f32 %f1321, %f1320, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1322, %f1321; - fma.rn.f32 %f1324, %f1322, %f2773, %f1320; - fma.rn.f32 %f1326, %f1322, %f2774, %f1324; - mul.f32 %f1327, %f1326, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1328, %f1327; - add.f32 %f1329, %f1322, 0f00000000; - ex2.approx.f32 %f1330, %f1329; - mul.f32 %f1331, %f1328, %f1330; - setp.lt.f32 %p128, %f1320, 0fC2D20000; - selp.f32 %f1332, 0f00000000, %f1331, %p128; - setp.gt.f32 %p129, %f1320, 0f42D20000; - selp.f32 %f1333, 0f7F800000, %f1332, %p129; - sub.f32 %f1334, %f215, %f1333; - mul.f32 %f1335, %f96, %f1334; - mul.f32 %f234, %f159, %f1335; - mul.f32 %f1336, %f2873, %f1333; - mul.f32 %f1337, %f2779, %f215; - sub.f32 %f1338, %f1337, %f1336; - mul.f32 %f1339, %f1338, %f99; - mul.f32 %f235, %f159, %f1339; - // inline asm - rcp.approx.ftz.f32 %f1317,%f128; - // inline asm - mul.f32 %f1340, %f1317, %f129; - mul.f32 %f1341, %f1340, %f1340; - fma.rn.f32 %f1344, %f2775, %f1341, %f2776; - fma.rn.f32 %f1346, %f1344, %f1341, %f2777; - mul.rn.f32 %f1347, %f1346, %f1341; - mul.rn.f32 %f1348, %f1347, %f1340; - sub.f32 %f1349, %f127, %f1340; - neg.f32 %f1350, %f1340; - add.f32 %f1351, %f1349, %f1349; - fma.rn.f32 %f1352, %f1350, %f127, %f1351; - mul.rn.f32 %f1353, %f1317, %f1352; - add.f32 %f1354, %f1348, %f1340; - sub.f32 %f1355, %f1340, %f1354; - add.f32 %f1356, %f1348, %f1355; - add.f32 %f1357, %f1353, %f1356; - add.f32 %f1358, %f1354, %f1357; - sub.f32 %f1359, %f1354, %f1358; - add.f32 %f1360, %f1357, %f1359; - add.f32 %f1361, %f130, %f1358; - sub.f32 %f1362, %f130, %f1361; - add.f32 %f1363, %f1358, %f1362; - add.f32 %f1364, %f1360, %f1363; - add.f32 %f1365, %f131, %f1364; - add.f32 %f1366, %f1361, %f1365; - sub.f32 %f1367, %f1361, %f1366; - add.f32 %f1368, %f1365, %f1367; - mul.rn.f32 %f1370, %f841, %f1366; - neg.f32 %f1371, %f1370; - fma.rn.f32 %f1372, %f841, %f1366, %f1371; - fma.rn.f32 %f1373, %f841, %f1368, %f1372; - fma.rn.f32 %f1375, %f2778, %f1366, %f1373; - add.rn.f32 %f1376, %f1370, %f1375; - neg.f32 %f1377, %f1376; - add.rn.f32 %f1378, %f1370, %f1377; - add.rn.f32 %f1379, %f1378, %f1375; - mov.b32 %r177, %f1376; - setp.eq.s32 %p130, %r177, 1118925336; - add.s32 %r178, %r177, -1; - mov.b32 %f1380, %r178; - add.f32 %f1381, %f1379, 0f37000000; - selp.f32 %f1382, %f1380, %f1376, %p130; - selp.f32 %f236, %f1381, %f1379, %p130; - mul.f32 %f1383, %f1382, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1384, %f1383; - fma.rn.f32 %f1385, %f1384, %f2773, %f1382; - fma.rn.f32 %f1386, %f1384, %f2774, %f1385; - mul.f32 %f1387, %f1386, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1388, %f1387; - add.f32 %f1389, %f1384, 0f00000000; - ex2.approx.f32 %f1390, %f1389; - mul.f32 %f1391, %f1388, %f1390; - setp.lt.f32 %p131, %f1382, 0fC2D20000; - selp.f32 %f1392, 0f00000000, %f1391, %p131; - setp.gt.f32 %p132, %f1382, 0f42D20000; - selp.f32 %f3075, 0f7F800000, %f1392, %p132; - setp.eq.f32 %p133, %f3075, 0f7F800000; - @%p133 bra BB5_85; - - fma.rn.f32 %f3075, %f3075, %f236, %f3075; - -BB5_85: - setp.eq.f32 %p332, %f124, 0f00000000; - setp.geu.f32 %p331, %f124, 0f00000000; - mov.b32 %r179, %f3075; - xor.b32 %r180, %r179, -2147483648; - mov.b32 %f1393, %r180; - selp.f32 %f240, %f1393, %f3075, %p1; - selp.f32 %f3076, %f132, %f240, %p332; - @%p331 bra BB5_87; - - cvt.rzi.f32.f32 %f1395, %f841; - setp.neu.f32 %p135, %f1395, 0f40000000; - selp.f32 %f3076, 0f7FFFFFFF, %f240, %p135; - -BB5_87: - abs.f32 %f2791, %f124; - setp.eq.f32 %p336, %f124, 0f3F800000; - add.f32 %f2790, %f2791, 0f40000000; - mov.b32 %r282, %f2790; - setp.gt.s32 %p335, %r282, 2139095039; - setp.neu.f32 %p334, %f2791, 0f7F800000; - selp.f32 %f2789, 0fFF800000, 0f7F800000, %p1; - setp.gtu.f32 %p333, %f2791, 0f7F800000; - add.f32 %f2788, %f124, 0f40000000; - mov.f32 %f2787, 0f00000000; - mov.f32 %f2786, 0f3DAAAABD; - mov.f32 %f2785, 0f3C4CAF63; - mov.f32 %f2784, 0f3B18F0FE; - mov.f32 %f2783, 0fB5BFBE8E; - mov.f32 %f2782, 0fBF317200; - selp.f32 %f1399, %f2788, %f3076, %p333; - selp.f32 %f1401, %f1399, %f2789, %p334; - selp.f32 %f1402, %f1401, %f3076, %p335; - mul.f32 %f1403, %f1402, 0fBF000000; - selp.f32 %f1404, 0fBF000000, %f1403, %p336; - mul.f32 %f1405, %f1404, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1406, %f1405; - fma.rn.f32 %f1408, %f1406, %f2782, %f1404; - fma.rn.f32 %f1410, %f1406, %f2783, %f1408; - mul.f32 %f1411, %f1410, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1412, %f1411; - add.f32 %f1413, %f1406, 0f00000000; - ex2.approx.f32 %f1414, %f1413; - mul.f32 %f1415, %f1412, %f1414; - setp.lt.f32 %p140, %f1404, 0fC2D20000; - selp.f32 %f1416, 0f00000000, %f1415, %p140; - setp.gt.f32 %p141, %f1404, 0f42D20000; - selp.f32 %f244, 0f7F800000, %f1416, %p141; - // inline asm - rcp.approx.ftz.f32 %f1396,%f136; - // inline asm - mul.f32 %f1417, %f1396, %f137; - mul.f32 %f1418, %f1417, %f1417; - fma.rn.f32 %f1421, %f2784, %f1418, %f2785; - fma.rn.f32 %f1423, %f1421, %f1418, %f2786; - mul.rn.f32 %f1424, %f1423, %f1418; - mul.rn.f32 %f1425, %f1424, %f1417; - sub.f32 %f1426, %f135, %f1417; - neg.f32 %f1427, %f1417; - add.f32 %f1428, %f1426, %f1426; - fma.rn.f32 %f1429, %f1427, %f135, %f1428; - mul.rn.f32 %f1430, %f1396, %f1429; - add.f32 %f1431, %f1425, %f1417; - sub.f32 %f1432, %f1417, %f1431; - add.f32 %f1433, %f1425, %f1432; - add.f32 %f1434, %f1430, %f1433; - add.f32 %f1435, %f1431, %f1434; - sub.f32 %f1436, %f1431, %f1435; - add.f32 %f1437, %f1434, %f1436; - add.f32 %f1438, %f138, %f1435; - sub.f32 %f1439, %f138, %f1438; - add.f32 %f1440, %f1435, %f1439; - add.f32 %f1441, %f1437, %f1440; - add.f32 %f1442, %f139, %f1441; - add.f32 %f1443, %f1438, %f1442; - sub.f32 %f1444, %f1438, %f1443; - add.f32 %f1445, %f1442, %f1444; - mul.rn.f32 %f1447, %f841, %f1443; - neg.f32 %f1448, %f1447; - fma.rn.f32 %f1449, %f841, %f1443, %f1448; - fma.rn.f32 %f1450, %f841, %f1445, %f1449; - fma.rn.f32 %f1452, %f2787, %f1443, %f1450; - add.rn.f32 %f1453, %f1447, %f1452; - neg.f32 %f1454, %f1453; - add.rn.f32 %f1455, %f1447, %f1454; - add.rn.f32 %f1456, %f1455, %f1452; - mov.b32 %r181, %f1453; - setp.eq.s32 %p142, %r181, 1118925336; - add.s32 %r182, %r181, -1; - mov.b32 %f1457, %r182; - add.f32 %f1458, %f1456, 0f37000000; - selp.f32 %f1459, %f1457, %f1453, %p142; - selp.f32 %f245, %f1458, %f1456, %p142; - mul.f32 %f1460, %f1459, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1461, %f1460; - fma.rn.f32 %f1462, %f1461, %f2782, %f1459; - fma.rn.f32 %f1463, %f1461, %f2783, %f1462; - mul.f32 %f1464, %f1463, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1465, %f1464; - add.f32 %f1466, %f1461, 0f00000000; - ex2.approx.f32 %f1467, %f1466; - mul.f32 %f1468, %f1465, %f1467; - setp.lt.f32 %p143, %f1459, 0fC2D20000; - selp.f32 %f1469, 0f00000000, %f1468, %p143; - setp.gt.f32 %p144, %f1459, 0f42D20000; - selp.f32 %f3077, 0f7F800000, %f1469, %p144; - setp.eq.f32 %p145, %f3077, 0f7F800000; - @%p145 bra BB5_89; - - fma.rn.f32 %f3077, %f3077, %f245, %f3077; - -BB5_89: - setp.eq.f32 %p338, %f133, 0f00000000; - setp.geu.f32 %p337, %f133, 0f00000000; - mov.b32 %r183, %f3077; - xor.b32 %r184, %r183, -2147483648; - mov.b32 %f1470, %r184; - selp.f32 %f249, %f1470, %f3077, %p2; - selp.f32 %f3078, %f140, %f249, %p338; - @%p337 bra BB5_91; - - cvt.rzi.f32.f32 %f1472, %f841; - setp.neu.f32 %p147, %f1472, 0f40000000; - selp.f32 %f3078, 0f7FFFFFFF, %f249, %p147; - -BB5_91: - abs.f32 %f2806, %f133; - cvt.rn.f32.s32 %f2805, %r316; - sub.f32 %f2804, %f2805, %f3102; - mul.f32 %f2803, %f2804, %f2804; - mul.f32 %f2802, %f2804, %f2803; - add.f32 %f2801, %f2804, 0f3F800000; - setp.eq.f32 %p342, %f133, 0f3F800000; - add.f32 %f2800, %f2806, 0f40000000; - mov.b32 %r283, %f2800; - setp.gt.s32 %p341, %r283, 2139095039; - setp.neu.f32 %p340, %f2806, 0f7F800000; - selp.f32 %f2799, 0fFF800000, 0f7F800000, %p2; - setp.gtu.f32 %p339, %f2806, 0f7F800000; - add.f32 %f2798, %f133, 0f40000000; - mov.f32 %f2797, 0f00000000; - mov.f32 %f2796, 0f3DAAAABD; - mov.f32 %f2795, 0f3C4CAF63; - mov.f32 %f2794, 0f3B18F0FE; - mov.f32 %f2793, 0fB5BFBE8E; - mov.f32 %f2792, 0fBF317200; - selp.f32 %f1476, %f2798, %f3078, %p339; - selp.f32 %f1478, %f1476, %f2799, %p340; - selp.f32 %f1479, %f1478, %f3078, %p341; - mul.f32 %f1480, %f1479, 0fBF000000; - selp.f32 %f1481, 0fBF000000, %f1480, %p342; - mul.f32 %f1482, %f1481, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1483, %f1482; - fma.rn.f32 %f1485, %f1483, %f2792, %f1481; - fma.rn.f32 %f1487, %f1483, %f2793, %f1485; - mul.f32 %f1488, %f1487, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1489, %f1488; - add.f32 %f1490, %f1483, 0f00000000; - ex2.approx.f32 %f1491, %f1490; - mul.f32 %f1492, %f1489, %f1491; - setp.lt.f32 %p152, %f1481, 0fC2D20000; - selp.f32 %f1493, 0f00000000, %f1492, %p152; - setp.gt.f32 %p153, %f1481, 0f42D20000; - selp.f32 %f1494, 0f7F800000, %f1493, %p153; - mul.f32 %f1495, %f2804, %f1494; - mul.f32 %f1496, %f2801, %f244; - sub.f32 %f1497, %f1496, %f1495; - mul.f32 %f1498, %f97, %f1497; - mul.f32 %f253, %f173, %f1498; - mul.f32 %f1499, %f98, %f253; - mul.f32 %f1500, %f1494, %f2802; - mul.f32 %f1501, %f244, %f141; - sub.f32 %f1502, %f1501, %f1500; - mul.f32 %f1503, %f100, %f1502; - mul.f32 %f1504, %f173, %f1503; - sub.f32 %f254, %f1499, %f1504; - // inline asm - rcp.approx.ftz.f32 %f1473,%f1133; - // inline asm - mul.f32 %f1505, %f1473, %f201; - mul.f32 %f1506, %f1505, %f1505; - fma.rn.f32 %f1509, %f2794, %f1506, %f2795; - fma.rn.f32 %f1511, %f1509, %f1506, %f2796; - mul.rn.f32 %f1512, %f1511, %f1506; - mul.rn.f32 %f1513, %f1512, %f1505; - sub.f32 %f1514, %f199, %f1505; - neg.f32 %f1515, %f1505; - add.f32 %f1516, %f1514, %f1514; - fma.rn.f32 %f1517, %f1515, %f199, %f1516; - mul.rn.f32 %f1518, %f1473, %f1517; - add.f32 %f1519, %f1513, %f1505; - sub.f32 %f1520, %f1505, %f1519; - add.f32 %f1521, %f1513, %f1520; - add.f32 %f1522, %f1518, %f1521; - add.f32 %f1523, %f1519, %f1522; - sub.f32 %f1524, %f1519, %f1523; - add.f32 %f1525, %f1522, %f1524; - add.f32 %f1526, %f202, %f1523; - sub.f32 %f1527, %f202, %f1526; - add.f32 %f1528, %f1523, %f1527; - add.f32 %f1529, %f1525, %f1528; - add.f32 %f1530, %f203, %f1529; - add.f32 %f1531, %f1526, %f1530; - sub.f32 %f1532, %f1526, %f1531; - add.f32 %f1533, %f1530, %f1532; - mul.rn.f32 %f1535, %f841, %f1531; - neg.f32 %f1536, %f1535; - fma.rn.f32 %f1537, %f841, %f1531, %f1536; - fma.rn.f32 %f1538, %f841, %f1533, %f1537; - fma.rn.f32 %f1540, %f2797, %f1531, %f1538; - add.rn.f32 %f1541, %f1535, %f1540; - neg.f32 %f1542, %f1541; - add.rn.f32 %f1543, %f1535, %f1542; - add.rn.f32 %f1544, %f1543, %f1540; - mov.b32 %r185, %f1541; - setp.eq.s32 %p154, %r185, 1118925336; - add.s32 %r186, %r185, -1; - mov.b32 %f1545, %r186; - add.f32 %f1546, %f1544, 0f37000000; - selp.f32 %f1547, %f1545, %f1541, %p154; - selp.f32 %f255, %f1546, %f1544, %p154; - mul.f32 %f1548, %f1547, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1549, %f1548; - fma.rn.f32 %f1550, %f1549, %f2792, %f1547; - fma.rn.f32 %f1551, %f1549, %f2793, %f1550; - mul.f32 %f1552, %f1551, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1553, %f1552; - add.f32 %f1554, %f1549, 0f00000000; - ex2.approx.f32 %f1555, %f1554; - mul.f32 %f1556, %f1553, %f1555; - setp.lt.f32 %p155, %f1547, 0fC2D20000; - selp.f32 %f1557, 0f00000000, %f1556, %p155; - setp.gt.f32 %p156, %f1547, 0f42D20000; - selp.f32 %f3079, 0f7F800000, %f1557, %p156; - setp.eq.f32 %p157, %f3079, 0f7F800000; - @%p157 bra BB5_93; - - fma.rn.f32 %f3079, %f3079, %f255, %f3079; - -BB5_93: - setp.eq.f32 %p343, %f197, 0f00000000; - mov.b32 %r187, %f3079; - xor.b32 %r188, %r187, -2147483648; - mov.b32 %f1558, %r188; - selp.f32 %f3081, %f1558, %f3079, %p5; - @%p343 bra BB5_96; - bra.uni BB5_94; - -BB5_96: - add.f32 %f1561, %f197, %f197; - selp.f32 %f3081, %f1561, 0f00000000, %p55; - bra.uni BB5_97; - -BB5_94: - setp.geu.f32 %p159, %f197, 0f00000000; - @%p159 bra BB5_97; - - cvt.rzi.f32.f32 %f1560, %f841; - setp.neu.f32 %p160, %f1560, 0f40000000; - selp.f32 %f3081, 0f7FFFFFFF, %f3081, %p160; - -BB5_97: - abs.f32 %f2808, %f197; - add.f32 %f2807, %f2808, 0f40000000; - mov.b32 %r284, %f2807; - setp.lt.s32 %p344, %r284, 2139095040; - @%p344 bra BB5_102; - - abs.f32 %f2867, %f197; - setp.gtu.f32 %p163, %f2867, 0f7F800000; - @%p163 bra BB5_101; - bra.uni BB5_99; - -BB5_101: - add.f32 %f3081, %f197, 0f40000000; - bra.uni BB5_102; - -BB5_99: - abs.f32 %f2868, %f197; - setp.neu.f32 %p164, %f2868, 0f7F800000; - @%p164 bra BB5_102; - - selp.f32 %f3081, 0fFF800000, 0f7F800000, %p5; - -BB5_102: - setp.eq.f32 %p345, %f197, 0f3F800000; - mov.f32 %f2814, 0f00000000; - mov.f32 %f2813, 0f3DAAAABD; - mov.f32 %f2812, 0f3C4CAF63; - mov.f32 %f2811, 0f3B18F0FE; - mov.f32 %f2810, 0fB5BFBE8E; - mov.f32 %f2809, 0fBF317200; - mul.f32 %f1564, %f3081, 0fBF000000; - selp.f32 %f1565, 0fBF000000, %f1564, %p345; - mul.f32 %f1566, %f1565, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1567, %f1566; - fma.rn.f32 %f1569, %f1567, %f2809, %f1565; - fma.rn.f32 %f1571, %f1567, %f2810, %f1569; - mul.f32 %f1572, %f1571, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1573, %f1572; - add.f32 %f1574, %f1567, 0f00000000; - ex2.approx.f32 %f1575, %f1574; - mul.f32 %f1576, %f1573, %f1575; - setp.lt.f32 %p166, %f1565, 0fC2D20000; - selp.f32 %f1577, 0f00000000, %f1576, %p166; - setp.gt.f32 %p167, %f1565, 0f42D20000; - selp.f32 %f266, 0f7F800000, %f1577, %p167; - // inline asm - rcp.approx.ftz.f32 %f1562,%f1232; - // inline asm - mul.f32 %f1578, %f1562, %f220; - mul.f32 %f1579, %f1578, %f1578; - fma.rn.f32 %f1582, %f2811, %f1579, %f2812; - fma.rn.f32 %f1584, %f1582, %f1579, %f2813; - mul.rn.f32 %f1585, %f1584, %f1579; - mul.rn.f32 %f1586, %f1585, %f1578; - sub.f32 %f1587, %f218, %f1578; - neg.f32 %f1588, %f1578; - add.f32 %f1589, %f1587, %f1587; - fma.rn.f32 %f1590, %f1588, %f218, %f1589; - mul.rn.f32 %f1591, %f1562, %f1590; - add.f32 %f1592, %f1586, %f1578; - sub.f32 %f1593, %f1578, %f1592; - add.f32 %f1594, %f1586, %f1593; - add.f32 %f1595, %f1591, %f1594; - add.f32 %f1596, %f1592, %f1595; - sub.f32 %f1597, %f1592, %f1596; - add.f32 %f1598, %f1595, %f1597; - add.f32 %f1599, %f221, %f1596; - sub.f32 %f1600, %f221, %f1599; - add.f32 %f1601, %f1596, %f1600; - add.f32 %f1602, %f1598, %f1601; - add.f32 %f1603, %f222, %f1602; - add.f32 %f1604, %f1599, %f1603; - sub.f32 %f1605, %f1599, %f1604; - add.f32 %f1606, %f1603, %f1605; - mul.rn.f32 %f1608, %f841, %f1604; - neg.f32 %f1609, %f1608; - fma.rn.f32 %f1610, %f841, %f1604, %f1609; - fma.rn.f32 %f1611, %f841, %f1606, %f1610; - fma.rn.f32 %f1613, %f2814, %f1604, %f1611; - add.rn.f32 %f1614, %f1608, %f1613; - neg.f32 %f1615, %f1614; - add.rn.f32 %f1616, %f1608, %f1615; - add.rn.f32 %f1617, %f1616, %f1613; - mov.b32 %r189, %f1614; - setp.eq.s32 %p168, %r189, 1118925336; - add.s32 %r190, %r189, -1; - mov.b32 %f1618, %r190; - add.f32 %f1619, %f1617, 0f37000000; - selp.f32 %f1620, %f1618, %f1614, %p168; - selp.f32 %f267, %f1619, %f1617, %p168; - mul.f32 %f1621, %f1620, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1622, %f1621; - fma.rn.f32 %f1623, %f1622, %f2809, %f1620; - fma.rn.f32 %f1624, %f1622, %f2810, %f1623; - mul.f32 %f1625, %f1624, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1626, %f1625; - add.f32 %f1627, %f1622, 0f00000000; - ex2.approx.f32 %f1628, %f1627; - mul.f32 %f1629, %f1626, %f1628; - setp.lt.f32 %p169, %f1620, 0fC2D20000; - selp.f32 %f1630, 0f00000000, %f1629, %p169; - setp.gt.f32 %p170, %f1620, 0f42D20000; - selp.f32 %f3082, 0f7F800000, %f1630, %p170; - setp.eq.f32 %p171, %f3082, 0f7F800000; - @%p171 bra BB5_104; - - fma.rn.f32 %f3082, %f3082, %f267, %f3082; - -BB5_104: - setp.eq.f32 %p348, %f216, 0f00000000; - mov.b32 %r191, %f3082; - xor.b32 %r192, %r191, -2147483648; - mov.b32 %f1631, %r192; - selp.f32 %f3084, %f1631, %f3082, %p6; - @%p348 bra BB5_107; - bra.uni BB5_105; - -BB5_107: - add.f32 %f1634, %f216, %f216; - selp.f32 %f3084, %f1634, 0f00000000, %p55; - bra.uni BB5_108; - -BB5_105: - setp.geu.f32 %p173, %f216, 0f00000000; - @%p173 bra BB5_108; - - cvt.rzi.f32.f32 %f1633, %f841; - setp.neu.f32 %p174, %f1633, 0f40000000; - selp.f32 %f3084, 0f7FFFFFFF, %f3084, %p174; - -BB5_108: - abs.f32 %f2881, %f216; - add.f32 %f2880, %f2881, 0f40000000; - mov.b32 %r289, %f2880; - setp.lt.s32 %p349, %r289, 2139095040; - @%p349 bra BB5_113; - - abs.f32 %f2865, %f216; - setp.gtu.f32 %p177, %f2865, 0f7F800000; - @%p177 bra BB5_112; - bra.uni BB5_110; - -BB5_112: - add.f32 %f3084, %f216, 0f40000000; - bra.uni BB5_113; - -BB5_110: - abs.f32 %f2866, %f216; - setp.neu.f32 %p178, %f2866, 0f7F800000; - @%p178 bra BB5_113; - - selp.f32 %f3084, 0fFF800000, 0f7F800000, %p6; - -BB5_113: - setp.eq.f32 %p350, %f216, 0f3F800000; - cvt.rn.f32.s32 %f2820, %r317; - sub.f32 %f2819, %f2820, %f3101; - add.f32 %f2818, %f2819, 0f3F800000; - mov.f32 %f3085, 0f00000000; - mov.f32 %f2816, 0fB5BFBE8E; - mov.f32 %f2815, 0fBF317200; - mul.f32 %f1636, %f2818, %f2818; - mul.f32 %f1637, %f2818, %f1636; - mul.f32 %f1638, %f2819, %f2819; - mul.f32 %f1639, %f2819, %f1638; - mul.f32 %f1640, %f3084, 0fBF000000; - selp.f32 %f1641, 0fBF000000, %f1640, %p350; - mul.f32 %f1642, %f1641, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1643, %f1642; - fma.rn.f32 %f1645, %f1643, %f2815, %f1641; - fma.rn.f32 %f1647, %f1643, %f2816, %f1645; - mul.f32 %f1648, %f1647, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1649, %f1648; - add.f32 %f1650, %f1643, 0f00000000; - ex2.approx.f32 %f1651, %f1650; - mul.f32 %f1652, %f1649, %f1651; - setp.lt.f32 %p180, %f1641, 0fC2D20000; - selp.f32 %f1653, 0f00000000, %f1652, %p180; - setp.gt.f32 %p181, %f1641, 0f42D20000; - selp.f32 %f1654, 0f7F800000, %f1653, %p181; - mul.f32 %f1655, %f2819, %f1654; - mul.f32 %f1656, %f2818, %f266; - sub.f32 %f1657, %f1656, %f1655; - mul.f32 %f1658, %f97, %f1657; - mul.f32 %f1659, %f159, %f1658; - mul.f32 %f1660, %f98, %f1659; - mul.f32 %f1661, %f266, %f1637; - mul.f32 %f1662, %f1654, %f1639; - sub.f32 %f1663, %f1661, %f1662; - mul.f32 %f1664, %f100, %f1663; - mul.f32 %f1665, %f159, %f1664; - sub.f32 %f1666, %f1660, %f1665; - add.f32 %f278, %f253, %f1659; - add.f32 %f279, %f254, %f1666; - mul.f32 %f280, %f159, %f173; - setp.leu.f32 %p182, %f174, 0f3C23D70A; - @%p182 bra BB5_115; - - sub.f32 %f1667, %f175, %f174; - add.f32 %f1668, %f174, %f3097; - div.rn.f32 %f3085, %f1667, %f1668; - -BB5_115: - mov.f32 %f3086, 0f00000000; - @%p182 bra BB5_117; - - add.f32 %f1670, %f174, %f3097; - mul.f32 %f1671, %f1670, %f1670; - add.f32 %f1672, %f175, %f3097; - div.rn.f32 %f3086, %f1672, %f1671; - -BB5_117: - mov.f32 %f1673, 0f47C35000; - min.f32 %f1674, %f3085, %f1673; - fma.rn.f32 %f3055, %f1674, %f194, %f3055; - mul.f32 %f1675, %f1674, %f195; - mul.f32 %f1676, %f194, %f194; - min.f32 %f1677, %f3086, %f1673; - mul.f32 %f1678, %f1677, %f1676; - sub.f32 %f1679, %f1675, %f1678; - add.f32 %f3060, %f1679, %f3060; - fma.rn.f32 %f3054, %f1674, %f234, %f3054; - mul.f32 %f1680, %f1674, %f235; - mul.f32 %f1681, %f234, %f234; - mul.f32 %f1682, %f1677, %f1681; - sub.f32 %f1683, %f1680, %f1682; - add.f32 %f3059, %f1683, %f3059; - fma.rn.f32 %f3053, %f1674, %f280, %f3053; - mul.f32 %f1684, %f1674, 0f00000000; - mul.f32 %f1685, %f280, %f280; - mul.f32 %f1686, %f1677, %f1685; - sub.f32 %f1687, %f1684, %f1686; - add.f32 %f3058, %f1687, %f3058; - add.f32 %f3052, %f3052, %f1674; - sub.f32 %f1688, %f1684, %f1677; - add.f32 %f3057, %f1688, %f3057; - fma.rn.f32 %f3051, %f1674, %f278, %f3051; - mul.f32 %f1689, %f1674, %f279; - mul.f32 %f1690, %f278, %f278; - mul.f32 %f1691, %f1677, %f1690; - sub.f32 %f1692, %f1689, %f1691; - add.f32 %f3056, %f1692, %f3056; - add.s32 %r317, %r317, 1; - setp.lt.s32 %p184, %r317, %r63; - @%p184 bra BB5_41; - - add.s32 %r316, %r316, 1; - setp.lt.s32 %p185, %r316, %r63; - @%p185 bra BB5_40; - -BB5_119: - div.rn.f32 %f1693, %f3055, %f3060; - mov.f32 %f1694, 0fBF800000; - max.f32 %f1695, %f1693, %f1694; - mov.f32 %f1696, 0f3F800000; - min.f32 %f1697, %f1695, %f1696; - sub.f32 %f3102, %f3102, %f1697; - div.rn.f32 %f1698, %f3054, %f3059; - max.f32 %f1699, %f1698, %f1694; - min.f32 %f1700, %f1699, %f1696; - sub.f32 %f3101, %f3101, %f1700; - neg.f32 %f1701, %f3100; - div.rn.f32 %f1702, %f3053, %f3058; - max.f32 %f1703, %f1702, %f1701; - min.f32 %f1704, %f1703, %f3100; - sub.f32 %f1705, %f3100, %f1704; - neg.f32 %f1706, %f3013; - div.rn.f32 %f1707, %f3052, %f3057; - max.f32 %f1708, %f1707, %f1706; - min.f32 %f1709, %f1708, %f3013; - sub.f32 %f1710, %f3013, %f1709; - neg.f32 %f1711, %f3098; - div.rn.f32 %f1712, %f3051, %f3056; - max.f32 %f1713, %f1712, %f1711; - min.f32 %f1714, %f1713, %f3098; - sub.f32 %f1715, %f3098, %f1714; - max.f32 %f3100, %f1705, %f1696; - mov.f32 %f1716, 0f3C23D70A; - max.f32 %f3013, %f1710, %f1716; - max.f32 %f1718, %f1715, %f603; - min.f32 %f3098, %f1718, %f87; - add.s32 %r315, %r315, 1; - setp.lt.s32 %p186, %r315, %r65; - @%p186 bra BB5_38; - -BB5_120: - mov.f32 %f3120, 0f00000000; - @%p15 bra BB5_206; - - div.rn.f32 %f1722, %f603, %f3098; - div.rn.f32 %f317, %f1722, %f3098; - div.rn.f32 %f1723, %f3100, 0fC0206C98; - div.rn.f32 %f318, %f1723, %f3098; - div.rn.f32 %f319, %f318, %f3098; - mov.u32 %r193, 0; - mov.f32 %f3120, 0f00000000; - sqrt.rn.f32 %f323, %f317; - mov.u32 %r318, %r193; - -BB5_122: - cvt.rn.f32.s32 %f1724, %r318; - sub.f32 %f321, %f1724, %f3102; - add.f32 %f322, %f321, 0f3F800000; - mul.f32 %f324, %f322, %f323; - abs.f32 %f325, %f324; - mul.f32 %f326, %f324, %f324; - mul.f32 %f327, %f321, %f323; - abs.f32 %f328, %f327; - add.f32 %f1725, %f1724, 0f3F800000; - sub.f32 %f1726, %f1725, %f3102; - div.rn.f32 %f330, %f1726, %f3098; - mov.f32 %f1727, 0f3F800000; - cvt.rzi.f32.f32 %f1728, %f1727; - add.f32 %f1729, %f1728, %f1728; - mov.f32 %f1730, 0f40000000; - sub.f32 %f1731, %f1730, %f1729; - abs.f32 %f331, %f1731; - setp.eq.f32 %p188, %f331, 0f3F800000; - abs.f32 %f332, %f330; - setp.lt.f32 %p189, %f332, 0f00800000; - mul.f32 %f1732, %f332, 0f4B800000; - selp.f32 %f1733, 0fC3170000, 0fC2FE0000, %p189; - selp.f32 %f1734, %f1732, %f332, %p189; - mov.b32 %r195, %f1734; - and.b32 %r196, %r195, 8388607; - or.b32 %r197, %r196, 1065353216; - mov.b32 %f1735, %r197; - shr.u32 %r198, %r195, 23; - cvt.rn.f32.u32 %f1736, %r198; - add.f32 %f1737, %f1733, %f1736; - setp.gt.f32 %p190, %f1735, 0f3FB504F3; - mul.f32 %f1738, %f1735, 0f3F000000; - add.f32 %f1739, %f1737, 0f3F800000; - selp.f32 %f1740, %f1738, %f1735, %p190; - selp.f32 %f1741, %f1739, %f1737, %p190; - add.f32 %f333, %f1740, 0fBF800000; - add.f32 %f334, %f1740, 0f3F800000; - add.f32 %f335, %f333, %f333; - mov.f32 %f1742, 0f3F317200; - mul.rn.f32 %f336, %f1741, %f1742; - mov.f32 %f1743, 0f35BFBE8E; - mul.rn.f32 %f337, %f1741, %f1743; - setp.lt.f32 %p191, %f330, 0f00000000; - and.pred %p7, %p191, %p188; - add.f32 %f1744, %f330, %f330; - selp.f32 %f338, %f1744, 0f00000000, %p188; - div.rn.f32 %f341, %f321, %f3098; - abs.f32 %f342, %f341; - setp.lt.f32 %p192, %f342, 0f00800000; - mul.f32 %f1746, %f342, 0f4B800000; - selp.f32 %f1747, 0fC3170000, 0fC2FE0000, %p192; - selp.f32 %f1748, %f1746, %f342, %p192; - mov.b32 %r199, %f1748; - and.b32 %r200, %r199, 8388607; - or.b32 %r201, %r200, 1065353216; - mov.b32 %f1749, %r201; - shr.u32 %r202, %r199, 23; - cvt.rn.f32.u32 %f1750, %r202; + mul.rn.f32 %f1289, %f1260, %f2754; + mul.rn.f32 %f1291, %f1260, %f2755; + add.f32 %f1292, %f1289, %f1285; + sub.f32 %f1293, %f1289, %f1292; + add.f32 %f1294, %f1285, %f1293; + add.f32 %f1295, %f1287, %f1294; + add.f32 %f1296, %f1291, %f1295; + add.f32 %f1297, %f1292, %f1296; + sub.f32 %f1298, %f1292, %f1297; + add.f32 %f1299, %f1296, %f1298; + mul.rn.f32 %f1300, %f588, %f1297; + neg.f32 %f1301, %f1300; + fma.rn.f32 %f1302, %f588, %f1297, %f1301; + fma.rn.f32 %f1303, %f588, %f1299, %f1302; + fma.rn.f32 %f1305, %f2758, %f1297, %f1303; + add.rn.f32 %f1306, %f1300, %f1305; + neg.f32 %f1307, %f1306; + add.rn.f32 %f1308, %f1300, %f1307; + add.rn.f32 %f1309, %f1308, %f1305; + mov.b32 %r395, %f1306; + setp.eq.s32 %p292, %r395, 1118925336; + add.s32 %r396, %r395, -1; + mov.b32 %f1310, %r396; + add.f32 %f1311, %f1309, 0f37000000; + selp.f32 %f192, %f1311, %f1309, %p292; + selp.f32 %f1312, %f1310, %f1306, %p292; + mul.rn.f32 %f1314, %f1312, %f2750; + cvt.rzi.f32.f32 %f1315, %f1314; + abs.f32 %f1316, %f1315; + setp.gt.f32 %p293, %f1316, 0f42FC0000; + mov.b32 %r397, %f1315; + and.b32 %r398, %r397, -2147483648; + or.b32 %r399, %r398, 1123811328; + mov.b32 %f1317, %r399; + selp.f32 %f1318, %f1317, %f1315, %p293; + fma.rn.f32 %f1320, %f1318, %f2756, %f1312; + fma.rn.f32 %f1322, %f1318, %f2757, %f1320; + mul.f32 %f1323, %f1322, 0f3FB8AA3B; + add.f32 %f1324, %f1318, 0f4B40007F; + mov.b32 %r400, %f1324; + shl.b32 %r401, %r400, 23; + mov.b32 %f1325, %r401; + ex2.approx.ftz.f32 %f1326, %f1323; + mul.f32 %f193, %f1326, %f1325; + setp.eq.f32 %p294, %f193, 0f7F800000; + mov.f32 %f2867, 0f7F800000; + @%p294 bra $L__BB5_170; + + fma.rn.f32 %f2867, %f193, %f192, %f193; + +$L__BB5_170: + setp.lt.f32 %p295, %f190, 0f00000000; + and.pred %p22, %p295, %p106; + setp.eq.f32 %p297, %f190, 0f00000000; + @%p297 bra $L__BB5_174; + bra.uni $L__BB5_171; + +$L__BB5_174: + add.f32 %f1331, %f190, %f190; + selp.f32 %f2869, %f1331, 0f00000000, %p106; + bra.uni $L__BB5_175; + +$L__BB5_171: + mov.b32 %r402, %f2867; + xor.b32 %r403, %r402, -2147483648; + mov.b32 %f1327, %r403; + selp.f32 %f2869, %f1327, %f2867, %p22; + setp.geu.f32 %p298, %f190, 0f00000000; + @%p298 bra $L__BB5_175; + + cvt.rzi.f32.f32 %f1329, %f588; + setp.eq.f32 %p299, %f1329, 0f40000000; + @%p299 bra $L__BB5_175; + + mov.f32 %f2869, 0f7FFFFFFF; + +$L__BB5_175: + abs.f32 %f2791, %f190; + add.f32 %f1332, %f2791, 0f40000000; + mov.b32 %r404, %f1332; + setp.lt.s32 %p301, %r404, 2139095040; + @%p301 bra $L__BB5_180; + + abs.f32 %f2792, %f190; + setp.gtu.f32 %p302, %f2792, 0f7F800000; + @%p302 bra $L__BB5_179; + bra.uni $L__BB5_177; + +$L__BB5_179: + add.f32 %f2869, %f190, 0f40000000; + bra.uni $L__BB5_180; + +$L__BB5_177: + abs.f32 %f2793, %f190; + setp.neu.f32 %p303, %f2793, 0f7F800000; + @%p303 bra $L__BB5_180; + + selp.f32 %f2869, 0fFF800000, 0f7F800000, %p22; + +$L__BB5_180: + mov.f32 %f2774, 0f00000000; + mov.f32 %f2773, 0f3102E308; + mov.f32 %f2772, 0fBF317218; + mov.f32 %f2771, 0f35BFBE8E; + mov.f32 %f2770, 0f3F317200; + mov.f32 %f2769, 0f3DAAAABD; + mov.f32 %f2768, 0f3C4CAF63; + mov.f32 %f2767, 0f3B18F0FE; + mov.f32 %f2766, 0f32A57060; + mov.f32 %f2765, 0f4B400001; + mov.f32 %f2764, 0f437C0000; + mov.f32 %f2763, 0f3BBB989D; + mov.f32 %f2762, 0f3FB8AA3B; + mov.f32 %f2761, 0f3F000000; + cvt.rn.f32.s32 %f2760, %r786; + sub.f32 %f2759, %f2760, %f2889; + mul.f32 %f1334, %f2869, 0fBF000000; + setp.eq.f32 %p304, %f190, 0f3F800000; + selp.f32 %f1335, 0fBF000000, %f1334, %p304; + fma.rn.f32 %f1338, %f1335, %f2763, %f2761; + cvt.sat.f32.f32 %f1341, %f1338; + fma.rm.f32 %f1343, %f1341, %f2764, %f2765; + add.f32 %f1344, %f1343, 0fCB40007F; + neg.f32 %f1345, %f1344; + fma.rn.f32 %f1346, %f1335, %f2762, %f1345; + fma.rn.f32 %f1348, %f1335, %f2766, %f1346; + mov.b32 %r405, %f1343; + shl.b32 %r406, %r405, 23; + mov.b32 %f1349, %r406; + ex2.approx.ftz.f32 %f1350, %f1348; + mul.f32 %f202, %f1350, %f1349; + div.rn.f32 %f203, %f2759, %f2886; + abs.f32 %f204, %f203; + setp.lt.f32 %p305, %f204, 0f00800000; + mul.f32 %f1351, %f204, 0f4B800000; + selp.f32 %f1352, %f1351, %f204, %p305; + selp.f32 %f1353, 0fC3170000, 0fC2FE0000, %p305; + mov.b32 %r407, %f1352; + and.b32 %r408, %r407, 8388607; + or.b32 %r409, %r408, 1065353216; + mov.b32 %f1354, %r409; + shr.u32 %r410, %r407, 23; + cvt.rn.f32.u32 %f1355, %r410; + add.f32 %f1356, %f1353, %f1355; + setp.gt.f32 %p306, %f1354, 0f3FB504F3; + mul.f32 %f1357, %f1354, 0f3F000000; + add.f32 %f1358, %f1356, 0f3F800000; + selp.f32 %f1359, %f1358, %f1356, %p306; + selp.f32 %f1360, %f1357, %f1354, %p306; + add.f32 %f1361, %f1360, 0fBF800000; + add.f32 %f1362, %f1360, 0f3F800000; + rcp.approx.ftz.f32 %f1363, %f1362; + add.f32 %f1364, %f1361, %f1361; + mul.f32 %f1366, %f1364, %f1363; + mul.f32 %f1367, %f1366, %f1366; + fma.rn.f32 %f1370, %f2767, %f1367, %f2768; + fma.rn.f32 %f1372, %f1370, %f1367, %f2769; + mul.rn.f32 %f1373, %f1372, %f1367; + mul.rn.f32 %f1374, %f1373, %f1366; + sub.f32 %f1375, %f1361, %f1366; + add.f32 %f1376, %f1375, %f1375; + neg.f32 %f1377, %f1366; + fma.rn.f32 %f1378, %f1377, %f1361, %f1376; + mul.rn.f32 %f1379, %f1363, %f1378; + add.f32 %f1380, %f1374, %f1366; + sub.f32 %f1381, %f1366, %f1380; + add.f32 %f1382, %f1374, %f1381; + add.f32 %f1383, %f1379, %f1382; + add.f32 %f1384, %f1380, %f1383; + sub.f32 %f1385, %f1380, %f1384; + add.f32 %f1386, %f1383, %f1385; + mul.rn.f32 %f1388, %f1359, %f2770; + mul.rn.f32 %f1390, %f1359, %f2771; + add.f32 %f1391, %f1388, %f1384; + sub.f32 %f1392, %f1388, %f1391; + add.f32 %f1393, %f1384, %f1392; + add.f32 %f1394, %f1386, %f1393; + add.f32 %f1395, %f1390, %f1394; + add.f32 %f1396, %f1391, %f1395; + sub.f32 %f1397, %f1391, %f1396; + add.f32 %f1398, %f1395, %f1397; + mul.rn.f32 %f1399, %f588, %f1396; + neg.f32 %f1400, %f1399; + fma.rn.f32 %f1401, %f588, %f1396, %f1400; + fma.rn.f32 %f1402, %f588, %f1398, %f1401; + fma.rn.f32 %f1404, %f2774, %f1396, %f1402; + add.rn.f32 %f1405, %f1399, %f1404; + neg.f32 %f1406, %f1405; + add.rn.f32 %f1407, %f1399, %f1406; + add.rn.f32 %f1408, %f1407, %f1404; + mov.b32 %r411, %f1405; + setp.eq.s32 %p307, %r411, 1118925336; + add.s32 %r412, %r411, -1; + mov.b32 %f1409, %r412; + add.f32 %f1410, %f1408, 0f37000000; + selp.f32 %f205, %f1410, %f1408, %p307; + selp.f32 %f1411, %f1409, %f1405, %p307; + mul.rn.f32 %f1412, %f1411, %f2762; + cvt.rzi.f32.f32 %f1413, %f1412; + abs.f32 %f1414, %f1413; + setp.gt.f32 %p308, %f1414, 0f42FC0000; + mov.b32 %r413, %f1413; + and.b32 %r414, %r413, -2147483648; + or.b32 %r415, %r414, 1123811328; + mov.b32 %f1415, %r415; + selp.f32 %f1416, %f1415, %f1413, %p308; + fma.rn.f32 %f1418, %f1416, %f2772, %f1411; + fma.rn.f32 %f1420, %f1416, %f2773, %f1418; + mul.f32 %f1421, %f1420, 0f3FB8AA3B; + add.f32 %f1422, %f1416, 0f4B40007F; + mov.b32 %r416, %f1422; + shl.b32 %r417, %r416, 23; + mov.b32 %f1423, %r417; + ex2.approx.ftz.f32 %f1424, %f1421; + mul.f32 %f206, %f1424, %f1423; + setp.eq.f32 %p309, %f206, 0f7F800000; + mov.f32 %f2870, 0f7F800000; + @%p309 bra $L__BB5_182; + + fma.rn.f32 %f2870, %f206, %f205, %f206; + +$L__BB5_182: + setp.lt.f32 %p310, %f203, 0f00000000; + and.pred %p23, %p310, %p106; + setp.eq.f32 %p312, %f203, 0f00000000; + @%p312 bra $L__BB5_186; + bra.uni $L__BB5_183; + +$L__BB5_186: + add.f32 %f1429, %f203, %f203; + selp.f32 %f2872, %f1429, 0f00000000, %p106; + bra.uni $L__BB5_187; + +$L__BB5_183: + mov.b32 %r418, %f2870; + xor.b32 %r419, %r418, -2147483648; + mov.b32 %f1425, %r419; + selp.f32 %f2872, %f1425, %f2870, %p23; + setp.geu.f32 %p313, %f203, 0f00000000; + @%p313 bra $L__BB5_187; + + cvt.rzi.f32.f32 %f1427, %f588; + setp.eq.f32 %p314, %f1427, 0f40000000; + @%p314 bra $L__BB5_187; + + mov.f32 %f2872, 0f7FFFFFFF; + +$L__BB5_187: + abs.f32 %f2655, %f203; + add.f32 %f1430, %f2655, 0f40000000; + mov.b32 %r420, %f1430; + setp.lt.s32 %p316, %r420, 2139095040; + @%p316 bra $L__BB5_192; + + abs.f32 %f2779, %f203; + setp.gtu.f32 %p317, %f2779, 0f7F800000; + @%p317 bra $L__BB5_191; + bra.uni $L__BB5_189; + +$L__BB5_191: + add.f32 %f2872, %f203, 0f40000000; + bra.uni $L__BB5_192; + +$L__BB5_189: + abs.f32 %f2780, %f203; + setp.neu.f32 %p318, %f2780, 0f7F800000; + @%p318 bra $L__BB5_192; + + selp.f32 %f2872, 0fFF800000, 0f7F800000, %p23; + +$L__BB5_192: + mov.f32 %f2663, 0f32A57060; + mov.f32 %f2662, 0f4B400001; + mov.f32 %f2661, 0f437C0000; + mov.f32 %f2660, 0f3BBB989D; + mov.f32 %f2659, 0f3FB8AA3B; + mov.f32 %f2658, 0f3F000000; + cvt.rn.f32.s32 %f2657, %r786; + sub.f32 %f2656, %f2657, %f2889; + mul.f32 %f1431, %f2872, 0fBF000000; + setp.eq.f32 %p319, %f203, 0f3F800000; + selp.f32 %f1432, 0fBF000000, %f1431, %p319; + fma.rn.f32 %f1435, %f1432, %f2660, %f2658; + cvt.sat.f32.f32 %f1438, %f1435; + fma.rm.f32 %f1440, %f1438, %f2661, %f2662; + add.f32 %f1441, %f1440, 0fCB40007F; + neg.f32 %f1442, %f1441; + fma.rn.f32 %f1443, %f1432, %f2659, %f1442; + fma.rn.f32 %f1445, %f1432, %f2663, %f1443; + mov.b32 %r421, %f1440; + shl.b32 %r422, %r421, 23; + mov.b32 %f1446, %r422; + ex2.approx.ftz.f32 %f1447, %f1445; + mul.f32 %f215, %f1447, %f1446; + add.f32 %f1448, %f2656, 0f3F800000; + mul.f32 %f1449, %f1448, %f202; + mul.f32 %f1450, %f2656, %f215; + sub.f32 %f216, %f1449, %f1450; + cvt.f64.f32 %fd363, %f119; + { + .reg .b32 %temp; + mov.b64 {%temp, %r79}, %fd363; + } + abs.f64 %fd88, %fd363; + { // callseq 102, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd88; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd525, [retval0+0]; + } // callseq 102 + setp.lt.s32 %p320, %r79, 0; + and.pred %p24, %p320, %p117; + not.pred %p322, %p24; + @%p322 bra $L__BB5_194; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r423}, %fd525; + } + xor.b32 %r424, %r423, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r425, %temp}, %fd525; + } + mov.b64 %fd525, {%r425, %r424}; + +$L__BB5_194: + setp.eq.f32 %p323, %f119, 0f00000000; + @%p323 bra $L__BB5_198; + bra.uni $L__BB5_195; + +$L__BB5_198: + mov.u32 %r426, 0; + selp.b32 %r427, %r79, 0, %p117; + or.b32 %r428, %r427, 2146435072; + selp.b32 %r429, %r428, %r427, %p119; + mov.b64 %fd525, {%r426, %r429}; + bra.uni $L__BB5_199; + +$L__BB5_195: + setp.gt.s32 %p324, %r79, -1; + @%p324 bra $L__BB5_199; + + cvt.rzi.f64.f64 %fd366, %fd309; + setp.eq.f64 %p325, %fd366, 0d4008000000000000; + @%p325 bra $L__BB5_199; + + mov.f64 %fd525, 0dFFF8000000000000; + +$L__BB5_199: + add.f64 %fd94, %fd363, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r430}, %fd94; + } + and.b32 %r431, %r430, 2146435072; + setp.ne.s32 %p328, %r431, 2146435072; + mov.f64 %fd526, %fd525; + @%p328 bra $L__BB5_205; + + setp.gtu.f64 %p329, %fd88, 0d7FF0000000000000; + mov.f64 %fd526, %fd94; + @%p329 bra $L__BB5_205; + + setp.eq.s32 %p330, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r432, %temp}, %fd309; + } + setp.eq.s32 %p331, %r432, 0; + and.pred %p332, %p330, %p331; + @%p332 bra $L__BB5_204; + bra.uni $L__BB5_202; + +$L__BB5_204: + mov.u32 %r437, 0; + setp.gt.f64 %p339, %fd88, 0d3FF0000000000000; + selp.b32 %r438, 2146435072, 0, %p339; + xor.b32 %r439, %r438, 2146435072; + selp.b32 %r440, %r439, %r438, %p119; + setp.eq.f32 %p340, %f119, 0fBF800000; + selp.b32 %r441, 1072693248, %r440, %p340; + mov.b64 %fd526, {%r437, %r441}; + bra.uni $L__BB5_205; + +$L__BB5_202: + { + .reg .b32 %temp; + mov.b64 {%r433, %temp}, %fd363; + } + and.b32 %r434, %r79, 2147483647; + setp.ne.s32 %p333, %r434, 2146435072; + setp.ne.s32 %p334, %r433, 0; + or.pred %p335, %p333, %p334; + mov.f64 %fd526, %fd525; + @%p335 bra $L__BB5_205; + + and.pred %p337, %p125, %p24; + selp.b32 %r435, %r61, %r60, %p337; + mov.u32 %r436, 0; + mov.b64 %fd526, {%r436, %r435}; + +$L__BB5_205: + mul.f32 %f1451, %f61, %f216; + mul.f32 %f217, %f116, %f1451; + setp.eq.f32 %p341, %f119, 0f3F800000; + selp.f64 %fd371, 0d3FF0000000000000, %fd526, %p341; + cvt.f64.f32 %fd372, %f202; + mul.f64 %fd98, %fd371, %fd372; + cvt.f64.f32 %fd99, %f124; + { + .reg .b32 %temp; + mov.b64 {%temp, %r80}, %fd99; + } + abs.f64 %fd100, %fd99; + { // callseq 103, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd100; + .param .b64 param1; + st.param.f64 [param1+0], %fd309; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd528, [retval0+0]; + } // callseq 103 + setp.lt.s32 %p342, %r80, 0; + and.pred %p25, %p342, %p117; + not.pred %p344, %p25; + @%p344 bra $L__BB5_207; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r442}, %fd528; + } + xor.b32 %r443, %r442, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r444, %temp}, %fd528; + } + mov.b64 %fd528, {%r444, %r443}; + +$L__BB5_207: + setp.eq.f32 %p345, %f124, 0f00000000; + @%p345 bra $L__BB5_211; + bra.uni $L__BB5_208; + +$L__BB5_211: + mov.u32 %r445, 0; + selp.b32 %r446, %r80, 0, %p117; + or.b32 %r447, %r446, 2146435072; + selp.b32 %r448, %r447, %r446, %p119; + mov.b64 %fd528, {%r445, %r448}; + bra.uni $L__BB5_212; + +$L__BB5_208: + setp.gt.s32 %p346, %r80, -1; + @%p346 bra $L__BB5_212; + + cvt.rzi.f64.f64 %fd375, %fd309; + setp.eq.f64 %p347, %fd375, 0d4008000000000000; + @%p347 bra $L__BB5_212; + + mov.f64 %fd528, 0dFFF8000000000000; + +$L__BB5_212: + add.f64 %fd106, %fd99, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r449}, %fd106; + } + and.b32 %r450, %r449, 2146435072; + setp.ne.s32 %p350, %r450, 2146435072; + mov.f64 %fd529, %fd528; + @%p350 bra $L__BB5_218; + + setp.gtu.f64 %p351, %fd100, 0d7FF0000000000000; + mov.f64 %fd529, %fd106; + @%p351 bra $L__BB5_218; + + setp.eq.s32 %p352, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r451, %temp}, %fd309; + } + setp.eq.s32 %p353, %r451, 0; + and.pred %p354, %p352, %p353; + @%p354 bra $L__BB5_217; + bra.uni $L__BB5_215; + +$L__BB5_217: + mov.u32 %r456, 0; + setp.gt.f64 %p361, %fd100, 0d3FF0000000000000; + selp.b32 %r457, 2146435072, 0, %p361; + xor.b32 %r458, %r457, 2146435072; + selp.b32 %r459, %r458, %r457, %p119; + setp.eq.f32 %p362, %f124, 0fBF800000; + selp.b32 %r460, 1072693248, %r459, %p362; + mov.b64 %fd529, {%r456, %r460}; + bra.uni $L__BB5_218; + +$L__BB5_215: + { + .reg .b32 %temp; + mov.b64 {%r452, %temp}, %fd99; + } + and.b32 %r453, %r80, 2147483647; + setp.ne.s32 %p355, %r453, 2146435072; + setp.ne.s32 %p356, %r452, 0; + or.pred %p357, %p355, %p356; + mov.f64 %fd529, %fd528; + @%p357 bra $L__BB5_218; + + and.pred %p359, %p125, %p25; + selp.b32 %r454, %r61, %r60, %p359; + mov.u32 %r455, 0; + mov.b64 %fd529, {%r455, %r454}; + +$L__BB5_218: + cvt.f64.f32 %fd476, %f116; + mov.f32 %f2873, 0f00000000; + setp.eq.f32 %p363, %f124, 0f3F800000; + selp.f64 %fd378, 0d3FF0000000000000, %fd529, %p363; + cvt.f64.f32 %fd379, %f215; + mul.f64 %fd380, %fd378, %fd379; + sub.f64 %fd381, %fd98, %fd380; + mul.f64 %fd382, %fd70, %fd381; + mul.f64 %fd384, %fd382, %fd476; + mul.f32 %f1453, %f62, %f217; + cvt.f64.f32 %fd385, %f1453; + sub.f64 %fd386, %fd385, %fd384; + cvt.rn.f32.f64 %f1454, %fd386; + add.f32 %f218, %f188, %f217; + add.f32 %f219, %f189, %f1454; + mul.f32 %f220, %f116, %f129; + setp.leu.f32 %p364, %f130, 0f3C23D70A; + @%p364 bra $L__BB5_220; + + sub.f32 %f1455, %f131, %f130; + add.f32 %f1456, %f130, %f2885; + div.rn.f32 %f2873, %f1455, %f1456; + +$L__BB5_220: + mov.f32 %f2874, 0f00000000; + @%p364 bra $L__BB5_235; + + and.b32 %r461, %r74, 2146435072; + setp.eq.s32 %p366, %r461, 1062207488; + add.f32 %f223, %f130, %f2885; + cvt.f64.f32 %fd110, %f223; + { + .reg .b32 %temp; + mov.b64 {%temp, %r81}, %fd110; + } + abs.f64 %fd111, %fd110; + { // callseq 104, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd111; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd531, [retval0+0]; + } // callseq 104 + setp.lt.s32 %p367, %r81, 0; + and.pred %p26, %p367, %p366; + not.pred %p368, %p26; + @%p368 bra $L__BB5_223; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r462}, %fd531; + } + xor.b32 %r463, %r462, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r464, %temp}, %fd531; + } + mov.b64 %fd531, {%r464, %r463}; + +$L__BB5_223: + setp.eq.f32 %p369, %f223, 0f00000000; + @%p369 bra $L__BB5_227; + bra.uni $L__BB5_224; + +$L__BB5_227: + setp.lt.s32 %p372, %r74, 0; + mov.u32 %r465, 0; + selp.b32 %r467, %r81, 0, %p366; + or.b32 %r468, %r467, 2146435072; + selp.b32 %r469, %r468, %r467, %p372; + mov.b64 %fd531, {%r465, %r469}; + bra.uni $L__BB5_228; + +$L__BB5_224: + setp.gt.s32 %p370, %r81, -1; + @%p370 bra $L__BB5_228; + + cvt.rzi.f64.f64 %fd389, %fd315; + setp.eq.f64 %p371, %fd389, 0d4000000000000000; + @%p371 bra $L__BB5_228; + + mov.f64 %fd531, 0dFFF8000000000000; + +$L__BB5_228: + add.f64 %fd117, %fd110, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r470}, %fd117; + } + and.b32 %r471, %r470, 2146435072; + setp.ne.s32 %p374, %r471, 2146435072; + mov.f64 %fd532, %fd531; + @%p374 bra $L__BB5_234; + + setp.gtu.f64 %p375, %fd111, 0d7FF0000000000000; + mov.f64 %fd532, %fd117; + @%p375 bra $L__BB5_234; + + setp.eq.s32 %p376, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r472, %temp}, %fd315; + } + setp.eq.s32 %p377, %r472, 0; + and.pred %p378, %p376, %p377; + @%p378 bra $L__BB5_233; + bra.uni $L__BB5_231; + +$L__BB5_233: + setp.lt.s32 %p384, %r74, 0; + mov.u32 %r478, 0; + setp.gt.f64 %p385, %fd111, 0d3FF0000000000000; + selp.b32 %r479, 2146435072, 0, %p385; + xor.b32 %r480, %r479, 2146435072; + selp.b32 %r481, %r480, %r479, %p384; + setp.eq.f32 %p386, %f223, 0fBF800000; + selp.b32 %r482, 1072693248, %r481, %p386; + mov.b64 %fd532, {%r478, %r482}; + bra.uni $L__BB5_234; + +$L__BB5_231: + { + .reg .b32 %temp; + mov.b64 {%r473, %temp}, %fd110; + } + and.b32 %r474, %r81, 2147483647; + setp.ne.s32 %p379, %r474, 2146435072; + setp.ne.s32 %p380, %r473, 0; + or.pred %p381, %p379, %p380; + mov.f64 %fd532, %fd531; + @%p381 bra $L__BB5_234; + + setp.ne.s32 %p382, %r75, 1071644672; + and.pred %p383, %p382, %p26; + or.b32 %r475, %r76, -2147483648; + selp.b32 %r476, %r475, %r76, %p383; + mov.u32 %r477, 0; + mov.b64 %fd532, {%r477, %r476}; + +$L__BB5_234: + setp.eq.f32 %p387, %f223, 0f3F800000; + selp.f64 %fd392, 0d3FF0000000000000, %fd532, %p387; + add.f32 %f1458, %f131, %f2885; + cvt.f64.f32 %fd393, %f1458; + div.rn.f64 %fd394, %fd393, %fd392; + cvt.rn.f32.f64 %f2874, %fd394; + +$L__BB5_235: + and.b32 %r483, %r74, 2146435072; + setp.eq.s32 %p388, %r483, 1062207488; + mov.f32 %f1459, 0f47C35000; + min.f32 %f1460, %f2874, %f1459; + cvt.f64.f32 %fd121, %f1460; + min.f32 %f226, %f2873, %f1459; + fma.rn.f32 %f2843, %f226, %f145, %f2843; + mul.f32 %f1461, %f226, %f146; + cvt.f64.f32 %fd122, %f1461; + cvt.f64.f32 %fd123, %f145; + { + .reg .b32 %temp; + mov.b64 {%temp, %r82}, %fd123; + } + abs.f64 %fd124, %fd123; + { // callseq 105, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd124; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd533, [retval0+0]; + } // callseq 105 + @%p388 bra $L__BB5_281; + bra.uni $L__BB5_236; + +$L__BB5_281: + setp.gt.s32 %p449, %r82, -1; + @%p449 bra $L__BB5_283; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r532}, %fd533; + } + xor.b32 %r533, %r532, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r534, %temp}, %fd533; + } + mov.b64 %fd533, {%r534, %r533}; + +$L__BB5_283: + setp.eq.f32 %p450, %f145, 0f00000000; + @%p450 bra $L__BB5_287; + bra.uni $L__BB5_284; + +$L__BB5_287: + setp.lt.s32 %p453, %r74, 0; + mov.u32 %r535, 0; + or.b32 %r536, %r82, 2146435072; + selp.b32 %r537, %r536, %r82, %p453; + mov.b64 %fd533, {%r535, %r537}; + bra.uni $L__BB5_288; + +$L__BB5_236: + setp.eq.f32 %p389, %f145, 0f00000000; + @%p389 bra $L__BB5_240; + bra.uni $L__BB5_237; + +$L__BB5_240: + mov.u32 %r484, 0; + mov.b64 %fd533, {%r484, %r77}; + bra.uni $L__BB5_241; + +$L__BB5_284: + @%p449 bra $L__BB5_288; + + cvt.rzi.f64.f64 %fd437, %fd315; + setp.eq.f64 %p452, %fd437, 0d4000000000000000; + @%p452 bra $L__BB5_288; + + mov.f64 %fd533, 0dFFF8000000000000; + +$L__BB5_288: + add.f64 %fd168, %fd123, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r538}, %fd168; + } + and.b32 %r539, %r538, 2146435072; + setp.ne.s32 %p454, %r539, 2146435072; + mov.f64 %fd543, %fd533; + @%p454 bra $L__BB5_294; + + setp.gtu.f64 %p455, %fd124, 0d7FF0000000000000; + mov.f64 %fd543, %fd168; + @%p455 bra $L__BB5_294; + + setp.eq.s32 %p456, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r540, %temp}, %fd315; + } + setp.eq.s32 %p457, %r540, 0; + and.pred %p458, %p456, %p457; + @%p458 bra $L__BB5_293; + bra.uni $L__BB5_291; + +$L__BB5_293: + setp.lt.s32 %p465, %r74, 0; + mov.u32 %r546, 0; + setp.gt.f64 %p466, %fd124, 0d3FF0000000000000; + selp.b32 %r547, 2146435072, 0, %p466; + xor.b32 %r548, %r547, 2146435072; + selp.b32 %r549, %r548, %r547, %p465; + setp.eq.f32 %p467, %f145, 0fBF800000; + selp.b32 %r550, 1072693248, %r549, %p467; + mov.b64 %fd543, {%r546, %r550}; + bra.uni $L__BB5_294; + +$L__BB5_237: + setp.gt.s32 %p390, %r82, -1; + @%p390 bra $L__BB5_241; + + cvt.rzi.f64.f64 %fd397, %fd315; + setp.eq.f64 %p391, %fd397, 0d4000000000000000; + @%p391 bra $L__BB5_241; + + mov.f64 %fd533, 0dFFF8000000000000; + +$L__BB5_241: + add.f64 %fd128, %fd123, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r485}, %fd128; + } + and.b32 %r486, %r485, 2146435072; + setp.ne.s32 %p392, %r486, 2146435072; + mov.f64 %fd534, %fd533; + @%p392 bra $L__BB5_247; + + setp.gtu.f64 %p393, %fd124, 0d7FF0000000000000; + mov.f64 %fd534, %fd128; + @%p393 bra $L__BB5_247; + + setp.eq.s32 %p394, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r487, %temp}, %fd315; + } + setp.eq.s32 %p395, %r487, 0; + and.pred %p396, %p394, %p395; + @%p396 bra $L__BB5_246; + bra.uni $L__BB5_244; + +$L__BB5_246: + setp.lt.s32 %p400, %r74, 0; + mov.u32 %r491, 0; + setp.gt.f64 %p401, %fd124, 0d3FF0000000000000; + selp.b32 %r492, 2146435072, 0, %p401; + xor.b32 %r493, %r492, 2146435072; + selp.b32 %r494, %r493, %r492, %p400; + setp.eq.f32 %p402, %f145, 0fBF800000; + selp.b32 %r495, 1072693248, %r494, %p402; + mov.b64 %fd534, {%r491, %r495}; + bra.uni $L__BB5_247; + +$L__BB5_291: + { + .reg .b32 %temp; + mov.b64 {%r541, %temp}, %fd123; + } + and.b32 %r542, %r82, 2147483647; + setp.ne.s32 %p459, %r542, 2146435072; + setp.ne.s32 %p460, %r541, 0; + or.pred %p461, %p459, %p460; + mov.f64 %fd543, %fd533; + @%p461 bra $L__BB5_294; + + setp.lt.s32 %p462, %r82, 0; + mov.u32 %r543, 0; + setp.ne.s32 %p463, %r75, 1071644672; + and.pred %p464, %p463, %p462; + or.b32 %r544, %r76, -2147483648; + selp.b32 %r545, %r544, %r76, %p464; + mov.b64 %fd543, {%r543, %r545}; + +$L__BB5_294: + setp.eq.f32 %p468, %f145, 0f3F800000; + selp.f64 %fd440, 0d3FF0000000000000, %fd543, %p468; + mul.f64 %fd441, %fd440, %fd121; + sub.f64 %fd442, %fd122, %fd441; + cvt.f64.f32 %fd443, %f2848; + add.f64 %fd557, %fd442, %fd443; + cvt.f64.f32 %fd173, %f174; + { + .reg .b32 %temp; + mov.b64 {%temp, %r86}, %fd173; + } + abs.f64 %fd174, %fd173; + { // callseq 109, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd174; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd545, [retval0+0]; + } // callseq 109 + setp.gt.s32 %p469, %r86, -1; + @%p469 bra $L__BB5_296; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r551}, %fd545; + } + xor.b32 %r552, %r551, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r553, %temp}, %fd545; + } + mov.b64 %fd545, {%r553, %r552}; + +$L__BB5_296: + setp.eq.f32 %p470, %f174, 0f00000000; + @%p470 bra $L__BB5_300; + bra.uni $L__BB5_297; + +$L__BB5_300: + setp.lt.s32 %p473, %r74, 0; + mov.u32 %r554, 0; + or.b32 %r555, %r86, 2146435072; + selp.b32 %r556, %r555, %r86, %p473; + mov.b64 %fd545, {%r554, %r556}; + bra.uni $L__BB5_301; + +$L__BB5_297: + @%p469 bra $L__BB5_301; + + cvt.rzi.f64.f64 %fd446, %fd315; + setp.eq.f64 %p472, %fd446, 0d4000000000000000; + @%p472 bra $L__BB5_301; + + mov.f64 %fd545, 0dFFF8000000000000; + +$L__BB5_301: + add.f64 %fd180, %fd173, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r557}, %fd180; + } + and.b32 %r558, %r557, 2146435072; + setp.ne.s32 %p474, %r558, 2146435072; + mov.f64 %fd546, %fd545; + @%p474 bra $L__BB5_307; + + setp.gtu.f64 %p475, %fd174, 0d7FF0000000000000; + mov.f64 %fd546, %fd180; + @%p475 bra $L__BB5_307; + + setp.eq.s32 %p476, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r559, %temp}, %fd315; + } + setp.eq.s32 %p477, %r559, 0; + and.pred %p478, %p476, %p477; + @%p478 bra $L__BB5_306; + bra.uni $L__BB5_304; + +$L__BB5_306: + setp.lt.s32 %p485, %r74, 0; + mov.u32 %r565, 0; + setp.gt.f64 %p486, %fd174, 0d3FF0000000000000; + selp.b32 %r566, 2146435072, 0, %p486; + xor.b32 %r567, %r566, 2146435072; + selp.b32 %r568, %r567, %r566, %p485; + setp.eq.f32 %p487, %f174, 0fBF800000; + selp.b32 %r569, 1072693248, %r568, %p487; + mov.b64 %fd546, {%r565, %r569}; + bra.uni $L__BB5_307; + +$L__BB5_244: + { + .reg .b32 %temp; + mov.b64 {%r488, %temp}, %fd123; + } + and.b32 %r489, %r82, 2147483647; + setp.ne.s32 %p397, %r489, 2146435072; + setp.ne.s32 %p398, %r488, 0; + or.pred %p399, %p397, %p398; + mov.f64 %fd534, %fd533; + @%p399 bra $L__BB5_247; + + mov.u32 %r490, 0; + mov.b64 %fd534, {%r490, %r76}; + +$L__BB5_247: + setp.eq.f32 %p403, %f145, 0f3F800000; + selp.f64 %fd400, 0d3FF0000000000000, %fd534, %p403; + mul.f64 %fd401, %fd400, %fd121; + sub.f64 %fd402, %fd122, %fd401; + cvt.f64.f32 %fd403, %f2848; + add.f64 %fd557, %fd402, %fd403; + cvt.f64.f32 %fd133, %f174; + { + .reg .b32 %temp; + mov.b64 {%temp, %r83}, %fd133; + } + abs.f64 %fd134, %fd133; + setp.eq.f32 %p404, %f174, 0f00000000; + @%p404 bra $L__BB5_251; + bra.uni $L__BB5_248; + +$L__BB5_251: + mov.u32 %r496, 0; + mov.b64 %fd535, {%r496, %r77}; + bra.uni $L__BB5_252; + +$L__BB5_248: + { // callseq 106, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd134; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd535, [retval0+0]; + } // callseq 106 + setp.gt.s32 %p405, %r83, -1; + @%p405 bra $L__BB5_252; + + cvt.rzi.f64.f64 %fd406, %fd315; + setp.eq.f64 %p406, %fd406, 0d4000000000000000; + @%p406 bra $L__BB5_252; + + mov.f64 %fd535, 0dFFF8000000000000; + +$L__BB5_252: + add.f64 %fd138, %fd133, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r497}, %fd138; + } + and.b32 %r498, %r497, 2146435072; + setp.ne.s32 %p407, %r498, 2146435072; + mov.f64 %fd536, %fd535; + @%p407 bra $L__BB5_258; + + setp.gtu.f64 %p408, %fd134, 0d7FF0000000000000; + mov.f64 %fd536, %fd138; + @%p408 bra $L__BB5_258; + + setp.eq.s32 %p409, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r499, %temp}, %fd315; + } + setp.eq.s32 %p410, %r499, 0; + and.pred %p411, %p409, %p410; + @%p411 bra $L__BB5_257; + bra.uni $L__BB5_255; + +$L__BB5_257: + setp.lt.s32 %p415, %r74, 0; + mov.u32 %r503, 0; + setp.gt.f64 %p416, %fd134, 0d3FF0000000000000; + selp.b32 %r504, 2146435072, 0, %p416; + xor.b32 %r505, %r504, 2146435072; + selp.b32 %r506, %r505, %r504, %p415; + setp.eq.f32 %p417, %f174, 0fBF800000; + selp.b32 %r507, 1072693248, %r506, %p417; + mov.b64 %fd536, {%r503, %r507}; + bra.uni $L__BB5_258; + +$L__BB5_304: + { + .reg .b32 %temp; + mov.b64 {%r560, %temp}, %fd173; + } + and.b32 %r561, %r86, 2147483647; + setp.ne.s32 %p479, %r561, 2146435072; + setp.ne.s32 %p480, %r560, 0; + or.pred %p481, %p479, %p480; + mov.f64 %fd546, %fd545; + @%p481 bra $L__BB5_307; + + setp.lt.s32 %p482, %r86, 0; + mov.u32 %r562, 0; + setp.ne.s32 %p483, %r75, 1071644672; + and.pred %p484, %p483, %p482; + or.b32 %r563, %r76, -2147483648; + selp.b32 %r564, %r563, %r76, %p484; + mov.b64 %fd546, {%r562, %r564}; + +$L__BB5_307: + setp.eq.f32 %p488, %f174, 0f3F800000; + selp.f64 %fd449, 0d3FF0000000000000, %fd546, %p488; + mul.f64 %fd450, %fd449, %fd121; + mul.f32 %f1465, %f226, %f175; + cvt.f64.f32 %fd451, %f1465; + sub.f64 %fd452, %fd451, %fd450; + cvt.f64.f32 %fd453, %f2847; + add.f64 %fd556, %fd452, %fd453; + cvt.f64.f32 %fd185, %f220; + { + .reg .b32 %temp; + mov.b64 {%temp, %r87}, %fd185; + } + abs.f64 %fd186, %fd185; + { // callseq 110, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd186; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd548, [retval0+0]; + } // callseq 110 + setp.gt.s32 %p489, %r87, -1; + @%p489 bra $L__BB5_309; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r570}, %fd548; + } + xor.b32 %r571, %r570, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r572, %temp}, %fd548; + } + mov.b64 %fd548, {%r572, %r571}; + +$L__BB5_309: + setp.eq.f32 %p490, %f220, 0f00000000; + @%p490 bra $L__BB5_313; + bra.uni $L__BB5_310; + +$L__BB5_313: + setp.lt.s32 %p493, %r74, 0; + mov.u32 %r573, 0; + or.b32 %r574, %r87, 2146435072; + selp.b32 %r575, %r574, %r87, %p493; + mov.b64 %fd548, {%r573, %r575}; + bra.uni $L__BB5_314; + +$L__BB5_310: + @%p489 bra $L__BB5_314; + + cvt.rzi.f64.f64 %fd456, %fd315; + setp.eq.f64 %p492, %fd456, 0d4000000000000000; + @%p492 bra $L__BB5_314; + + mov.f64 %fd548, 0dFFF8000000000000; + +$L__BB5_314: + add.f64 %fd192, %fd185, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r576}, %fd192; + } + and.b32 %r577, %r576, 2146435072; + setp.ne.s32 %p494, %r577, 2146435072; + mov.f64 %fd549, %fd548; + @%p494 bra $L__BB5_320; + + setp.gtu.f64 %p495, %fd186, 0d7FF0000000000000; + mov.f64 %fd549, %fd192; + @%p495 bra $L__BB5_320; + + setp.eq.s32 %p496, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r578, %temp}, %fd315; + } + setp.eq.s32 %p497, %r578, 0; + and.pred %p498, %p496, %p497; + @%p498 bra $L__BB5_319; + bra.uni $L__BB5_317; + +$L__BB5_319: + setp.lt.s32 %p505, %r74, 0; + mov.u32 %r584, 0; + setp.gt.f64 %p506, %fd186, 0d3FF0000000000000; + selp.b32 %r585, 2146435072, 0, %p506; + xor.b32 %r586, %r585, 2146435072; + selp.b32 %r587, %r586, %r585, %p505; + setp.eq.f32 %p507, %f220, 0fBF800000; + selp.b32 %r588, 1072693248, %r587, %p507; + mov.b64 %fd549, {%r584, %r588}; + bra.uni $L__BB5_320; + +$L__BB5_255: + { + .reg .b32 %temp; + mov.b64 {%r500, %temp}, %fd133; + } + and.b32 %r501, %r83, 2147483647; + setp.ne.s32 %p412, %r501, 2146435072; + setp.ne.s32 %p413, %r500, 0; + or.pred %p414, %p412, %p413; + mov.f64 %fd536, %fd535; + @%p414 bra $L__BB5_258; + + mov.u32 %r502, 0; + mov.b64 %fd536, {%r502, %r76}; + +$L__BB5_258: + setp.eq.f32 %p418, %f174, 0f3F800000; + selp.f64 %fd409, 0d3FF0000000000000, %fd536, %p418; + mul.f64 %fd410, %fd409, %fd121; + mul.f32 %f1462, %f226, %f175; + cvt.f64.f32 %fd411, %f1462; + sub.f64 %fd412, %fd411, %fd410; + cvt.f64.f32 %fd413, %f2847; + add.f64 %fd556, %fd412, %fd413; + cvt.f64.f32 %fd143, %f220; + { + .reg .b32 %temp; + mov.b64 {%temp, %r84}, %fd143; + } + abs.f64 %fd144, %fd143; + setp.eq.f32 %p419, %f220, 0f00000000; + @%p419 bra $L__BB5_262; + bra.uni $L__BB5_259; + +$L__BB5_262: + mov.u32 %r508, 0; + mov.b64 %fd537, {%r508, %r77}; + bra.uni $L__BB5_263; + +$L__BB5_259: + { // callseq 107, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd144; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd537, [retval0+0]; + } // callseq 107 + setp.gt.s32 %p420, %r84, -1; + @%p420 bra $L__BB5_263; + + cvt.rzi.f64.f64 %fd416, %fd315; + setp.eq.f64 %p421, %fd416, 0d4000000000000000; + @%p421 bra $L__BB5_263; + + mov.f64 %fd537, 0dFFF8000000000000; + +$L__BB5_263: + add.f64 %fd148, %fd143, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r509}, %fd148; + } + and.b32 %r510, %r509, 2146435072; + setp.ne.s32 %p422, %r510, 2146435072; + mov.f64 %fd538, %fd537; + @%p422 bra $L__BB5_269; + + setp.gtu.f64 %p423, %fd144, 0d7FF0000000000000; + mov.f64 %fd538, %fd148; + @%p423 bra $L__BB5_269; + + setp.eq.s32 %p424, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r511, %temp}, %fd315; + } + setp.eq.s32 %p425, %r511, 0; + and.pred %p426, %p424, %p425; + @%p426 bra $L__BB5_268; + bra.uni $L__BB5_266; + +$L__BB5_268: + setp.lt.s32 %p430, %r74, 0; + mov.u32 %r515, 0; + setp.gt.f64 %p431, %fd144, 0d3FF0000000000000; + selp.b32 %r516, 2146435072, 0, %p431; + xor.b32 %r517, %r516, 2146435072; + selp.b32 %r518, %r517, %r516, %p430; + setp.eq.f32 %p432, %f220, 0fBF800000; + selp.b32 %r519, 1072693248, %r518, %p432; + mov.b64 %fd538, {%r515, %r519}; + bra.uni $L__BB5_269; + +$L__BB5_317: + { + .reg .b32 %temp; + mov.b64 {%r579, %temp}, %fd185; + } + and.b32 %r580, %r87, 2147483647; + setp.ne.s32 %p499, %r580, 2146435072; + setp.ne.s32 %p500, %r579, 0; + or.pred %p501, %p499, %p500; + mov.f64 %fd549, %fd548; + @%p501 bra $L__BB5_320; + + setp.lt.s32 %p502, %r87, 0; + mov.u32 %r581, 0; + setp.ne.s32 %p503, %r75, 1071644672; + and.pred %p504, %p503, %p502; + or.b32 %r582, %r76, -2147483648; + selp.b32 %r583, %r582, %r76, %p504; + mov.b64 %fd549, {%r581, %r583}; + +$L__BB5_320: + mul.f32 %f1466, %f226, 0f00000000; + cvt.f64.f32 %fd459, %f1466; + setp.eq.f32 %p508, %f220, 0f3F800000; + selp.f64 %fd460, 0d3FF0000000000000, %fd549, %p508; + mul.f64 %fd461, %fd460, %fd121; + sub.f64 %fd462, %fd459, %fd461; + cvt.f64.f32 %fd463, %f2846; + add.f64 %fd555, %fd462, %fd463; + cvt.f64.f32 %fd464, %f2845; + sub.f64 %fd465, %fd459, %fd121; + add.f64 %fd554, %fd465, %fd464; + cvt.f64.f32 %fd198, %f218; + { + .reg .b32 %temp; + mov.b64 {%temp, %r88}, %fd198; + } + abs.f64 %fd199, %fd198; + { // callseq 111, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd199; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd551, [retval0+0]; + } // callseq 111 + setp.gt.s32 %p509, %r88, -1; + @%p509 bra $L__BB5_322; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r589}, %fd551; + } + xor.b32 %r590, %r589, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r591, %temp}, %fd551; + } + mov.b64 %fd551, {%r591, %r590}; + +$L__BB5_322: + setp.eq.f32 %p510, %f218, 0f00000000; + @%p510 bra $L__BB5_326; + bra.uni $L__BB5_323; + +$L__BB5_326: + setp.lt.s32 %p513, %r74, 0; + mov.u32 %r592, 0; + or.b32 %r593, %r88, 2146435072; + selp.b32 %r594, %r593, %r88, %p513; + mov.b64 %fd551, {%r592, %r594}; + bra.uni $L__BB5_327; + +$L__BB5_323: + @%p509 bra $L__BB5_327; + + cvt.rzi.f64.f64 %fd468, %fd315; + setp.eq.f64 %p512, %fd468, 0d4000000000000000; + @%p512 bra $L__BB5_327; + + mov.f64 %fd551, 0dFFF8000000000000; + +$L__BB5_327: + add.f64 %fd205, %fd198, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r595}, %fd205; + } + and.b32 %r596, %r595, 2146435072; + setp.ne.s32 %p514, %r596, 2146435072; + mov.f64 %fd552, %fd551; + @%p514 bra $L__BB5_333; + + setp.gtu.f64 %p515, %fd199, 0d7FF0000000000000; + mov.f64 %fd552, %fd205; + @%p515 bra $L__BB5_333; + + setp.eq.s32 %p516, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r597, %temp}, %fd315; + } + setp.eq.s32 %p517, %r597, 0; + and.pred %p518, %p516, %p517; + @%p518 bra $L__BB5_332; + bra.uni $L__BB5_330; + +$L__BB5_332: + setp.lt.s32 %p525, %r74, 0; + mov.u32 %r603, 0; + setp.gt.f64 %p526, %fd199, 0d3FF0000000000000; + selp.b32 %r604, 2146435072, 0, %p526; + xor.b32 %r605, %r604, 2146435072; + selp.b32 %r606, %r605, %r604, %p525; + setp.eq.f32 %p527, %f218, 0fBF800000; + selp.b32 %r607, 1072693248, %r606, %p527; + mov.b64 %fd552, {%r603, %r607}; + bra.uni $L__BB5_333; + +$L__BB5_266: + { + .reg .b32 %temp; + mov.b64 {%r512, %temp}, %fd143; + } + and.b32 %r513, %r84, 2147483647; + setp.ne.s32 %p427, %r513, 2146435072; + setp.ne.s32 %p428, %r512, 0; + or.pred %p429, %p427, %p428; + mov.f64 %fd538, %fd537; + @%p429 bra $L__BB5_269; + + mov.u32 %r514, 0; + mov.b64 %fd538, {%r514, %r76}; + +$L__BB5_269: + mul.f32 %f1463, %f226, 0f00000000; + cvt.f64.f32 %fd419, %f1463; + setp.eq.f32 %p433, %f220, 0f3F800000; + selp.f64 %fd420, 0d3FF0000000000000, %fd538, %p433; + mul.f64 %fd421, %fd420, %fd121; + sub.f64 %fd422, %fd419, %fd421; + cvt.f64.f32 %fd423, %f2846; + add.f64 %fd555, %fd422, %fd423; + cvt.f64.f32 %fd424, %f2845; + sub.f64 %fd425, %fd419, %fd121; + add.f64 %fd554, %fd425, %fd424; + cvt.f64.f32 %fd154, %f218; + { + .reg .b32 %temp; + mov.b64 {%temp, %r85}, %fd154; + } + abs.f64 %fd155, %fd154; + setp.eq.f32 %p434, %f218, 0f00000000; + @%p434 bra $L__BB5_273; + bra.uni $L__BB5_270; + +$L__BB5_273: + mov.u32 %r520, 0; + mov.b64 %fd539, {%r520, %r77}; + bra.uni $L__BB5_274; + +$L__BB5_270: + { // callseq 108, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd155; + .param .b64 param1; + st.param.f64 [param1+0], %fd315; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd539, [retval0+0]; + } // callseq 108 + setp.gt.s32 %p435, %r85, -1; + @%p435 bra $L__BB5_274; + + cvt.rzi.f64.f64 %fd428, %fd315; + setp.eq.f64 %p436, %fd428, 0d4000000000000000; + @%p436 bra $L__BB5_274; + + mov.f64 %fd539, 0dFFF8000000000000; + +$L__BB5_274: + add.f64 %fd159, %fd154, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r521}, %fd159; + } + and.b32 %r522, %r521, 2146435072; + setp.ne.s32 %p437, %r522, 2146435072; + mov.f64 %fd540, %fd539; + @%p437 bra $L__BB5_280; + + setp.gtu.f64 %p438, %fd155, 0d7FF0000000000000; + mov.f64 %fd540, %fd159; + @%p438 bra $L__BB5_280; + + setp.eq.s32 %p439, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r523, %temp}, %fd315; + } + setp.eq.s32 %p440, %r523, 0; + and.pred %p441, %p439, %p440; + @%p441 bra $L__BB5_279; + bra.uni $L__BB5_277; + +$L__BB5_279: + setp.lt.s32 %p445, %r74, 0; + mov.u32 %r527, 0; + setp.gt.f64 %p446, %fd155, 0d3FF0000000000000; + selp.b32 %r528, 2146435072, 0, %p446; + xor.b32 %r529, %r528, 2146435072; + selp.b32 %r530, %r529, %r528, %p445; + setp.eq.f32 %p447, %f218, 0fBF800000; + selp.b32 %r531, 1072693248, %r530, %p447; + mov.b64 %fd540, {%r527, %r531}; + bra.uni $L__BB5_280; + +$L__BB5_330: + { + .reg .b32 %temp; + mov.b64 {%r598, %temp}, %fd198; + } + and.b32 %r599, %r88, 2147483647; + setp.ne.s32 %p519, %r599, 2146435072; + setp.ne.s32 %p520, %r598, 0; + or.pred %p521, %p519, %p520; + mov.f64 %fd552, %fd551; + @%p521 bra $L__BB5_333; + + setp.lt.s32 %p522, %r88, 0; + mov.u32 %r600, 0; + setp.ne.s32 %p523, %r75, 1071644672; + and.pred %p524, %p523, %p522; + or.b32 %r601, %r76, -2147483648; + selp.b32 %r602, %r601, %r76, %p524; + mov.b64 %fd552, {%r600, %r602}; + +$L__BB5_333: + setp.eq.f32 %p528, %f218, 0f3F800000; + selp.f64 %fd471, 0d3FF0000000000000, %fd552, %p528; + mul.f64 %fd472, %fd471, %fd121; + mul.f32 %f1467, %f226, %f219; + cvt.f64.f32 %fd473, %f1467; + sub.f64 %fd474, %fd473, %fd472; + cvt.f64.f32 %fd475, %f2844; + add.f64 %fd553, %fd474, %fd475; + bra.uni $L__BB5_334; + +$L__BB5_277: + { + .reg .b32 %temp; + mov.b64 {%r524, %temp}, %fd154; + } + and.b32 %r525, %r85, 2147483647; + setp.ne.s32 %p442, %r525, 2146435072; + setp.ne.s32 %p443, %r524, 0; + or.pred %p444, %p442, %p443; + mov.f64 %fd540, %fd539; + @%p444 bra $L__BB5_280; + + mov.u32 %r526, 0; + mov.b64 %fd540, {%r526, %r76}; + +$L__BB5_280: + setp.eq.f32 %p448, %f218, 0f3F800000; + selp.f64 %fd431, 0d3FF0000000000000, %fd540, %p448; + mul.f64 %fd432, %fd431, %fd121; + mul.f32 %f1464, %f226, %f219; + cvt.f64.f32 %fd433, %f1464; + sub.f64 %fd434, %fd433, %fd432; + cvt.f64.f32 %fd435, %f2844; + add.f64 %fd553, %fd434, %fd435; + +$L__BB5_334: + cvt.rn.f32.f64 %f2848, %fd557; + cvt.rn.f32.f64 %f2847, %fd556; + cvt.rn.f32.f64 %f2846, %fd555; + cvt.rn.f32.f64 %f2845, %fd554; + cvt.rn.f32.f64 %f2844, %fd553; + fma.rn.f32 %f2842, %f226, %f174, %f2842; + fma.rn.f32 %f2841, %f226, %f220, %f2841; + add.f32 %f2840, %f2840, %f226; + fma.rn.f32 %f2839, %f226, %f218, %f2839; + add.s32 %r786, %r786, 1; + setp.lt.s32 %p529, %r786, %r102; + @%p529 bra $L__BB5_56; + + add.s32 %r785, %r785, 1; + setp.lt.s32 %p530, %r785, %r102; + @%p530 bra $L__BB5_55; + +$L__BB5_336: + ld.param.u32 %r766, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_6]; + div.rn.f32 %f1468, %f2843, %f2848; + mov.f32 %f1469, 0fBF800000; + max.f32 %f1470, %f1468, %f1469; + mov.f32 %f1471, 0f3F800000; + min.f32 %f1472, %f1470, %f1471; + sub.f32 %f2890, %f2890, %f1472; + div.rn.f32 %f1473, %f2842, %f2847; + max.f32 %f1474, %f1473, %f1469; + min.f32 %f1475, %f1474, %f1471; + sub.f32 %f2889, %f2889, %f1475; + neg.f32 %f1476, %f2888; + div.rn.f32 %f1477, %f2841, %f2846; + max.f32 %f1478, %f1477, %f1476; + min.f32 %f1479, %f1478, %f2888; + sub.f32 %f1480, %f2888, %f1479; + neg.f32 %f1481, %f2887; + div.rn.f32 %f1482, %f2840, %f2845; + max.f32 %f1483, %f1482, %f1481; + min.f32 %f1484, %f1483, %f2887; + sub.f32 %f1485, %f2887, %f1484; + neg.f32 %f1486, %f2886; + div.rn.f32 %f1487, %f2839, %f2844; + max.f32 %f1488, %f1487, %f1486; + min.f32 %f1489, %f1488, %f2886; + sub.f32 %f1490, %f2886, %f1489; + max.f32 %f2888, %f1480, %f1471; + mov.f32 %f1491, 0f3C23D70A; + max.f32 %f2887, %f1485, %f1491; + mov.f32 %f1492, 0f3F000000; + max.f32 %f1493, %f1490, %f1492; + min.f32 %f2886, %f1493, %f51; + add.s32 %r784, %r784, 1; + setp.lt.s32 %p531, %r784, %r766; + @%p531 bra $L__BB5_53; + +$L__BB5_337: + mov.f32 %f1509, 0f00000000; + mov.f32 %f2908, %f1509; + mov.f32 %f2909, %f1509; + mov.f32 %f2910, %f1509; + mov.f32 %f2913, %f1509; + mov.f32 %f2917, %f1509; + mov.f32 %f2911, %f1509; + mov.f32 %f2912, %f1509; + mov.f32 %f2914, %f1509; + mov.f32 %f2918, %f1509; + mov.f32 %f2915, %f1509; + mov.f32 %f2916, %f1509; + mov.f32 %f2919, %f1509; + mov.f32 %f2920, %f1509; + mov.f32 %f2921, %f1509; + mov.f32 %f2922, %f1509; + mov.f32 %f2950, %f1509; + @%p40 bra $L__BB5_426; + + mov.f32 %f1526, 0f3F000000; + div.rn.f32 %f1527, %f1526, %f2886; + div.rn.f32 %f1528, %f1527, %f2886; + div.rn.f32 %f1529, %f2888, 0fC0206C98; + div.rn.f32 %f259, %f1529, %f2886; + div.rn.f32 %f260, %f259, %f2886; + sqrt.rn.f32 %f261, %f1528; + mov.f32 %f1530, 0f3F800000; + cvt.rzi.f32.f32 %f1531, %f1530; + add.f32 %f1532, %f1531, %f1531; + mov.f32 %f1533, 0f40000000; + sub.f32 %f1534, %f1533, %f1532; + abs.f32 %f262, %f1534; + mov.u32 %r608, 0; + setp.eq.f32 %p540, %f262, 0f3F800000; + mov.u32 %r787, %r608; + +$L__BB5_339: + cvt.rn.f32.s32 %f1535, %r787; + sub.f32 %f279, %f1535, %f2890; + add.f32 %f1536, %f279, 0f3F000000; + mul.f32 %f1537, %f1536, %f261; + abs.f32 %f280, %f1537; + setp.ge.f32 %p533, %f280, 0f3F8060FE; + mul.f32 %f1538, %f1537, %f1537; + selp.f32 %f1539, %f280, %f1538, %p533; + selp.f32 %f1540, 0f3789CA3C, 0f38B1E96A, %p533; + selp.f32 %f1541, 0fB9F560B9, 0fBA574D20, %p533; + fma.rn.f32 %f1542, %f1540, %f1539, %f1541; + selp.f32 %f1543, 0f3BAC840B, 0f3BAAD5EA, %p533; + fma.rn.f32 %f1544, %f1542, %f1539, %f1543; + selp.f32 %f1545, 0fBD0C8162, 0fBCDC1BE7, %p533; + fma.rn.f32 %f1546, %f1544, %f1539, %f1545; + selp.f32 %f1547, 0f3E1CF906, 0f3DE718AF, %p533; + fma.rn.f32 %f1548, %f1546, %f1539, %f1547; + selp.f32 %f1549, 0f3F6A937E, 0fBEC093AC, %p533; + fma.rn.f32 %f1550, %f1548, %f1539, %f1549; + selp.f32 %f1551, 0f3F20D842, 0f3E0375D3, %p533; + fma.rn.f32 %f1552, %f1550, %f1539, %f1551; + neg.f32 %f1553, %f280; + selp.f32 %f1554, %f1553, %f1537, %p533; + fma.rn.f32 %f281, %f1552, %f1554, %f1554; + mov.b32 %r610, %f1537; + and.b32 %r93, %r610, -2147483648; + add.f32 %f1555, %f279, 0fBF000000; + mul.f32 %f1556, %f1555, %f261; + abs.f32 %f282, %f1556; + setp.ge.f32 %p534, %f282, 0f3F8060FE; + mul.f32 %f1557, %f1556, %f1556; + selp.f32 %f1558, %f282, %f1557, %p534; + selp.f32 %f1559, 0f3789CA3C, 0f38B1E96A, %p534; + selp.f32 %f1560, 0fB9F560B9, 0fBA574D20, %p534; + fma.rn.f32 %f1561, %f1559, %f1558, %f1560; + selp.f32 %f1562, 0f3BAC840B, 0f3BAAD5EA, %p534; + fma.rn.f32 %f1563, %f1561, %f1558, %f1562; + selp.f32 %f1564, 0fBD0C8162, 0fBCDC1BE7, %p534; + fma.rn.f32 %f1565, %f1563, %f1558, %f1564; + selp.f32 %f1566, 0f3E1CF906, 0f3DE718AF, %p534; + fma.rn.f32 %f1567, %f1565, %f1558, %f1566; + selp.f32 %f1568, 0f3F6A937E, 0fBEC093AC, %p534; + fma.rn.f32 %f1569, %f1567, %f1558, %f1568; + selp.f32 %f1570, 0f3F20D842, 0f3E0375D3, %p534; + fma.rn.f32 %f1571, %f1569, %f1558, %f1570; + neg.f32 %f1572, %f282; + selp.f32 %f1573, %f1572, %f1556, %p534; + fma.rn.f32 %f283, %f1571, %f1573, %f1573; + mov.b32 %r611, %f1556; + and.b32 %r94, %r611, -2147483648; + add.f32 %f1574, %f1535, 0f3F000000; + sub.f32 %f1575, %f1574, %f2890; + div.rn.f32 %f284, %f1575, %f2886; + abs.f32 %f285, %f284; + setp.lt.f32 %p535, %f285, 0f00800000; + mul.f32 %f1576, %f285, 0f4B800000; + selp.f32 %f1577, %f1576, %f285, %p535; + selp.f32 %f1578, 0fC3170000, 0fC2FE0000, %p535; + mov.b32 %r612, %f1577; + and.b32 %r613, %r612, 8388607; + or.b32 %r614, %r613, 1065353216; + mov.b32 %f1579, %r614; + shr.u32 %r615, %r612, 23; + cvt.rn.f32.u32 %f1580, %r615; + add.f32 %f1581, %f1578, %f1580; + setp.gt.f32 %p536, %f1579, 0f3FB504F3; + mul.f32 %f1582, %f1579, 0f3F000000; + add.f32 %f1583, %f1581, 0f3F800000; + selp.f32 %f1584, %f1583, %f1581, %p536; + selp.f32 %f1585, %f1582, %f1579, %p536; + add.f32 %f1586, %f1585, 0fBF800000; + add.f32 %f1587, %f1585, 0f3F800000; + rcp.approx.ftz.f32 %f1588, %f1587; + add.f32 %f1589, %f1586, %f1586; + mul.f32 %f1591, %f1589, %f1588; + mul.f32 %f1592, %f1591, %f1591; + mov.f32 %f1593, 0f3C4CAF63; + mov.f32 %f1594, 0f3B18F0FE; + fma.rn.f32 %f1595, %f1594, %f1592, %f1593; + mov.f32 %f1596, 0f3DAAAABD; + fma.rn.f32 %f1597, %f1595, %f1592, %f1596; + mul.rn.f32 %f1598, %f1597, %f1592; + mul.rn.f32 %f1599, %f1598, %f1591; + sub.f32 %f1600, %f1586, %f1591; + add.f32 %f1601, %f1600, %f1600; + neg.f32 %f1602, %f1591; + fma.rn.f32 %f1603, %f1602, %f1586, %f1601; + mul.rn.f32 %f1604, %f1588, %f1603; + add.f32 %f1605, %f1599, %f1591; + sub.f32 %f1606, %f1591, %f1605; + add.f32 %f1607, %f1599, %f1606; + add.f32 %f1608, %f1604, %f1607; + add.f32 %f1609, %f1605, %f1608; + sub.f32 %f1610, %f1605, %f1609; + add.f32 %f1611, %f1608, %f1610; + mov.f32 %f1612, 0f3F317200; + mul.rn.f32 %f1613, %f1584, %f1612; + mov.f32 %f1614, 0f35BFBE8E; + mul.rn.f32 %f1615, %f1584, %f1614; + add.f32 %f1616, %f1613, %f1609; + sub.f32 %f1617, %f1613, %f1616; + add.f32 %f1618, %f1609, %f1617; + add.f32 %f1619, %f1611, %f1618; + add.f32 %f1620, %f1615, %f1619; + add.f32 %f1621, %f1616, %f1620; + sub.f32 %f1622, %f1616, %f1621; + add.f32 %f1623, %f1620, %f1622; + mul.rn.f32 %f1624, %f1533, %f1621; + neg.f32 %f1625, %f1624; + fma.rn.f32 %f1626, %f1533, %f1621, %f1625; + fma.rn.f32 %f1627, %f1533, %f1623, %f1626; + fma.rn.f32 %f1629, %f1509, %f1621, %f1627; + add.rn.f32 %f1630, %f1624, %f1629; + neg.f32 %f1631, %f1630; + add.rn.f32 %f1632, %f1624, %f1631; + add.rn.f32 %f1633, %f1632, %f1629; + mov.b32 %r616, %f1630; + setp.eq.s32 %p537, %r616, 1118925336; + add.s32 %r617, %r616, -1; + mov.b32 %f1634, %r617; + add.f32 %f1635, %f1633, 0f37000000; + selp.f32 %f286, %f1635, %f1633, %p537; + selp.f32 %f1636, %f1634, %f1630, %p537; + mov.f32 %f1637, 0f3FB8AA3B; + mul.rn.f32 %f1638, %f1636, %f1637; + cvt.rzi.f32.f32 %f1639, %f1638; + abs.f32 %f1640, %f1639; + setp.gt.f32 %p538, %f1640, 0f42FC0000; + mov.b32 %r618, %f1639; + and.b32 %r619, %r618, -2147483648; + or.b32 %r620, %r619, 1123811328; + mov.b32 %f1641, %r620; + selp.f32 %f1642, %f1641, %f1639, %p538; + mov.f32 %f1643, 0fBF317218; + fma.rn.f32 %f1644, %f1642, %f1643, %f1636; + mov.f32 %f1645, 0f3102E308; + fma.rn.f32 %f1646, %f1642, %f1645, %f1644; + mul.f32 %f1647, %f1646, 0f3FB8AA3B; + add.f32 %f1648, %f1642, 0f4B40007F; + mov.b32 %r621, %f1648; + shl.b32 %r622, %r621, 23; + mov.b32 %f1649, %r622; + ex2.approx.ftz.f32 %f1650, %f1647; + mul.f32 %f287, %f1650, %f1649; + setp.lt.f32 %p539, %f284, 0f00000000; + and.pred %p27, %p539, %p540; + add.f32 %f1651, %f284, %f284; + selp.f32 %f288, %f1651, 0f00000000, %p540; + add.f32 %f1652, %f285, 0f40000000; + mov.b32 %r95, %f1652; + div.rn.f32 %f289, %f1555, %f2886; + abs.f32 %f290, %f289; + setp.lt.f32 %p541, %f290, 0f00800000; + mul.f32 %f1653, %f290, 0f4B800000; + selp.f32 %f1654, %f1653, %f290, %p541; + selp.f32 %f1655, 0fC3170000, 0fC2FE0000, %p541; + mov.b32 %r623, %f1654; + and.b32 %r624, %r623, 8388607; + or.b32 %r625, %r624, 1065353216; + mov.b32 %f1656, %r625; + shr.u32 %r626, %r623, 23; + cvt.rn.f32.u32 %f1657, %r626; + add.f32 %f1658, %f1655, %f1657; + setp.gt.f32 %p542, %f1656, 0f3FB504F3; + mul.f32 %f1659, %f1656, 0f3F000000; + add.f32 %f1660, %f1658, 0f3F800000; + selp.f32 %f1661, %f1660, %f1658, %p542; + selp.f32 %f1662, %f1659, %f1656, %p542; + add.f32 %f1663, %f1662, 0fBF800000; + add.f32 %f1664, %f1662, 0f3F800000; + rcp.approx.ftz.f32 %f1665, %f1664; + add.f32 %f1666, %f1663, %f1663; + mul.f32 %f1667, %f1666, %f1665; + mul.f32 %f1668, %f1667, %f1667; + fma.rn.f32 %f1669, %f1594, %f1668, %f1593; + fma.rn.f32 %f1670, %f1669, %f1668, %f1596; + mul.rn.f32 %f1671, %f1670, %f1668; + mul.rn.f32 %f1672, %f1671, %f1667; + sub.f32 %f1673, %f1663, %f1667; + add.f32 %f1674, %f1673, %f1673; + neg.f32 %f1675, %f1667; + fma.rn.f32 %f1676, %f1675, %f1663, %f1674; + mul.rn.f32 %f1677, %f1665, %f1676; + add.f32 %f1678, %f1672, %f1667; + sub.f32 %f1679, %f1667, %f1678; + add.f32 %f1680, %f1672, %f1679; + add.f32 %f1681, %f1677, %f1680; + add.f32 %f1682, %f1678, %f1681; + sub.f32 %f1683, %f1678, %f1682; + add.f32 %f1684, %f1681, %f1683; + mul.rn.f32 %f1685, %f1661, %f1612; + mul.rn.f32 %f1686, %f1661, %f1614; + add.f32 %f1687, %f1685, %f1682; + sub.f32 %f1688, %f1685, %f1687; + add.f32 %f1689, %f1682, %f1688; + add.f32 %f1690, %f1684, %f1689; + add.f32 %f1691, %f1686, %f1690; + add.f32 %f1692, %f1687, %f1691; + sub.f32 %f1693, %f1687, %f1692; + add.f32 %f1694, %f1691, %f1693; + mul.rn.f32 %f1695, %f1533, %f1692; + neg.f32 %f1696, %f1695; + fma.rn.f32 %f1697, %f1533, %f1692, %f1696; + fma.rn.f32 %f1698, %f1533, %f1694, %f1697; + fma.rn.f32 %f1699, %f1509, %f1692, %f1698; + add.rn.f32 %f1700, %f1695, %f1699; + neg.f32 %f1701, %f1700; + add.rn.f32 %f1702, %f1695, %f1701; + add.rn.f32 %f1703, %f1702, %f1699; + mov.b32 %r627, %f1700; + setp.eq.s32 %p543, %r627, 1118925336; + add.s32 %r628, %r627, -1; + mov.b32 %f1704, %r628; + add.f32 %f1705, %f1703, 0f37000000; + selp.f32 %f291, %f1705, %f1703, %p543; + selp.f32 %f1706, %f1704, %f1700, %p543; + mul.rn.f32 %f1707, %f1706, %f1637; + cvt.rzi.f32.f32 %f1708, %f1707; + abs.f32 %f1709, %f1708; + setp.gt.f32 %p544, %f1709, 0f42FC0000; + mov.b32 %r629, %f1708; + and.b32 %r630, %r629, -2147483648; + or.b32 %r631, %r630, 1123811328; + mov.b32 %f1710, %r631; + selp.f32 %f1711, %f1710, %f1708, %p544; + fma.rn.f32 %f1712, %f1711, %f1643, %f1706; + fma.rn.f32 %f1713, %f1711, %f1645, %f1712; + mul.f32 %f1714, %f1713, 0f3FB8AA3B; + add.f32 %f1715, %f1711, 0f4B40007F; + mov.b32 %r632, %f1715; + shl.b32 %r633, %r632, 23; + mov.b32 %f1716, %r633; + ex2.approx.ftz.f32 %f1717, %f1714; + mul.f32 %f292, %f1717, %f1716; + add.f32 %f293, %f284, 0f40000000; + setp.lt.f32 %p545, %f289, 0f00000000; + and.pred %p28, %p545, %p540; + selp.f32 %f294, 0fFF800000, 0f7F800000, %p27; + add.f32 %f1718, %f289, %f289; + selp.f32 %f295, %f1718, 0f00000000, %p540; + add.f32 %f1719, %f290, 0f40000000; + mov.b32 %r96, %f1719; + add.f32 %f296, %f289, 0f40000000; + selp.f32 %f297, 0fFF800000, 0f7F800000, %p28; + add.f32 %f1720, %f1535, 0f3F800000; + sub.f32 %f1721, %f1720, %f2890; + div.rn.f32 %f298, %f1721, %f2886; + abs.f32 %f299, %f298; + setp.lt.f32 %p546, %f299, 0f00800000; + mul.f32 %f1722, %f299, 0f4B800000; + selp.f32 %f1723, %f1722, %f299, %p546; + selp.f32 %f1724, 0fC3170000, 0fC2FE0000, %p546; + mov.b32 %r634, %f1723; + and.b32 %r635, %r634, 8388607; + or.b32 %r636, %r635, 1065353216; + mov.b32 %f1725, %r636; + shr.u32 %r637, %r634, 23; + cvt.rn.f32.u32 %f1726, %r637; + add.f32 %f1727, %f1724, %f1726; + setp.gt.f32 %p547, %f1725, 0f3FB504F3; + mul.f32 %f1728, %f1725, 0f3F000000; + add.f32 %f1729, %f1727, 0f3F800000; + selp.f32 %f1730, %f1729, %f1727, %p547; + selp.f32 %f1731, %f1728, %f1725, %p547; + add.f32 %f1732, %f1731, 0fBF800000; + add.f32 %f1733, %f1731, 0f3F800000; + rcp.approx.ftz.f32 %f1734, %f1733; + add.f32 %f1735, %f1732, %f1732; + mul.f32 %f1736, %f1735, %f1734; + mul.f32 %f1737, %f1736, %f1736; + fma.rn.f32 %f1738, %f1594, %f1737, %f1593; + fma.rn.f32 %f1739, %f1738, %f1737, %f1596; + mul.rn.f32 %f1740, %f1739, %f1737; + mul.rn.f32 %f1741, %f1740, %f1736; + sub.f32 %f1742, %f1732, %f1736; + add.f32 %f1743, %f1742, %f1742; + neg.f32 %f1744, %f1736; + fma.rn.f32 %f1745, %f1744, %f1732, %f1743; + mul.rn.f32 %f1746, %f1734, %f1745; + add.f32 %f1747, %f1741, %f1736; + sub.f32 %f1748, %f1736, %f1747; + add.f32 %f1749, %f1741, %f1748; + add.f32 %f1750, %f1746, %f1749; add.f32 %f1751, %f1747, %f1750; - setp.gt.f32 %p193, %f1749, 0f3FB504F3; - mul.f32 %f1752, %f1749, 0f3F000000; - add.f32 %f1753, %f1751, 0f3F800000; - selp.f32 %f1754, %f1752, %f1749, %p193; - selp.f32 %f1755, %f1753, %f1751, %p193; - add.f32 %f343, %f1754, 0fBF800000; - add.f32 %f344, %f1754, 0f3F800000; - add.f32 %f345, %f343, %f343; - mul.rn.f32 %f346, %f1755, %f1742; - mul.rn.f32 %f347, %f1755, %f1743; - setp.lt.f32 %p194, %f341, 0f00000000; - and.pred %p8, %p194, %p188; - add.f32 %f1756, %f341, %f341; - selp.f32 %f348, %f1756, 0f00000000, %p188; - mov.b32 %r204, %f324; - and.b32 %r45, %r204, -2147483648; - ld.local.f32 %f3119, [%rd2]; - ld.local.f32 %f3118, [%rd2+4]; - ld.local.f32 %f3117, [%rd2+8]; - ld.local.f32 %f3116, [%rd2+12]; - ld.local.f32 %f3115, [%rd2+16]; - ld.local.f32 %f3114, [%rd2+24]; - ld.local.f32 %f3113, [%rd2+28]; - ld.local.f32 %f3112, [%rd2+32]; - ld.local.f32 %f3111, [%rd2+36]; - ld.local.f32 %f3110, [%rd2+48]; - ld.local.f32 %f3109, [%rd2+52]; - ld.local.f32 %f3108, [%rd2+56]; - ld.local.f32 %f3107, [%rd2+72]; - ld.local.f32 %f3106, [%rd2+76]; - ld.local.f32 %f3105, [%rd2+96]; - mov.u32 %r319, %r193; - -BB5_123: - setp.ltu.f32 %p195, %f325, 0f3F800000; - @%p195 bra BB5_125; - bra.uni BB5_124; - -BB5_125: - cvt.rn.f32.s32 %f2978, %r318; - sub.f32 %f2977, %f2978, %f3102; - add.f32 %f2976, %f2977, 0f3F800000; - mul.f32 %f2975, %f2976, %f323; - mov.f32 %f1776, 0f3BA0C9F8; - mov.f32 %f1777, 0fBA1268FB; - fma.rn.f32 %f1778, %f1777, %f326, %f1776; - mov.f32 %f1779, 0fBCDABFD4; - fma.rn.f32 %f1780, %f1778, %f326, %f1779; - mov.f32 %f1781, 0f3DE70331; - fma.rn.f32 %f1782, %f1780, %f326, %f1781; - mov.f32 %f1783, 0fBEC09330; - fma.rn.f32 %f1784, %f1782, %f326, %f1783; - mov.f32 %f1785, 0f3F906EBA; - fma.rn.f32 %f1786, %f1784, %f326, %f1785; - mul.f32 %f3121, %f2975, %f1786; - bra.uni BB5_126; - -BB5_124: - mov.f32 %f2926, 0f3F800000; - setp.ltu.f32 %p196, %f325, 0f407AD445; - mov.f32 %f1758, 0f3A03BB71; - mov.f32 %f1759, 0fB7B730FB; - fma.rn.f32 %f1760, %f1759, %f325, %f1758; - mov.f32 %f1761, 0fBBACA3B3; - fma.rn.f32 %f1762, %f1760, %f325, %f1761; - mov.f32 %f1763, 0f3D0A7445; - fma.rn.f32 %f1764, %f1762, %f325, %f1763; - mov.f32 %f1765, 0fBE1B3B75; - fma.rn.f32 %f1766, %f1764, %f325, %f1765; - mov.f32 %f1767, 0fBF6B385A; - fma.rn.f32 %f1768, %f1766, %f325, %f1767; - mov.f32 %f1769, 0fBFD0316E; - fma.rn.f32 %f1770, %f1768, %f325, %f1769; - mov.f32 %f1771, 0fBA031CCE; - fma.rn.f32 %f1772, %f1770, %f325, %f1771; - ex2.approx.ftz.f32 %f1773, %f1772; - sub.f32 %f1775, %f2926, %f1773; - mov.b32 %r205, %f1775; - selp.b32 %r206, %r205, 1065353216, %p196; - or.b32 %r207, %r206, %r45; - mov.b32 %f3121, %r207; - -BB5_126: - setp.ltu.f32 %p197, %f328, 0f3F800000; - @%p197 bra BB5_128; - bra.uni BB5_127; - -BB5_128: - cvt.rn.f32.s32 %f2973, %r318; - sub.f32 %f2972, %f2973, %f3102; - mul.f32 %f2971, %f2972, %f323; - mul.f32 %f2970, %f2971, %f2971; - mov.f32 %f1805, 0f3BA0C9F8; - mov.f32 %f1806, 0fBA1268FB; - fma.rn.f32 %f1807, %f1806, %f2970, %f1805; - mov.f32 %f1808, 0fBCDABFD4; - fma.rn.f32 %f1809, %f1807, %f2970, %f1808; - mov.f32 %f1810, 0f3DE70331; - fma.rn.f32 %f1811, %f1809, %f2970, %f1810; - mov.f32 %f1812, 0fBEC09330; - fma.rn.f32 %f1813, %f1811, %f2970, %f1812; - mov.f32 %f1814, 0f3F906EBA; - fma.rn.f32 %f1815, %f1813, %f2970, %f1814; - mul.f32 %f3122, %f2971, %f1815; - bra.uni BB5_129; - -BB5_127: - cvt.rn.f32.s32 %f2930, %r318; - sub.f32 %f2929, %f2930, %f3102; - mul.f32 %f2928, %f2929, %f323; - mov.b32 %r298, %f2928; - and.b32 %r297, %r298, -2147483648; - mov.f32 %f2927, 0f3F800000; - setp.ltu.f32 %p198, %f328, 0f407AD445; - mov.f32 %f1787, 0f3A03BB71; - mov.f32 %f1788, 0fB7B730FB; - fma.rn.f32 %f1789, %f1788, %f328, %f1787; - mov.f32 %f1790, 0fBBACA3B3; - fma.rn.f32 %f1791, %f1789, %f328, %f1790; - mov.f32 %f1792, 0f3D0A7445; - fma.rn.f32 %f1793, %f1791, %f328, %f1792; - mov.f32 %f1794, 0fBE1B3B75; - fma.rn.f32 %f1795, %f1793, %f328, %f1794; - mov.f32 %f1796, 0fBF6B385A; - fma.rn.f32 %f1797, %f1795, %f328, %f1796; - mov.f32 %f1798, 0fBFD0316E; - fma.rn.f32 %f1799, %f1797, %f328, %f1798; - mov.f32 %f1800, 0fBA031CCE; - fma.rn.f32 %f1801, %f1799, %f328, %f1800; - ex2.approx.ftz.f32 %f1802, %f1801; - sub.f32 %f1804, %f2927, %f1802; - mov.b32 %r208, %f1804; - selp.b32 %r209, %r208, 1065353216, %p198; - or.b32 %r210, %r209, %r297; - mov.b32 %f3122, %r210; - -BB5_129: - sub.f32 %f1816, %f3121, %f3122; - mul.f32 %f388, %f1816, 0f3F000000; - cvt.rn.f32.s32 %f389, %r319; - sub.f32 %f390, %f389, %f3101; - add.f32 %f391, %f390, 0f3F800000; - mul.f32 %f392, %f391, %f323; - abs.f32 %f393, %f392; - setp.ltu.f32 %p199, %f393, 0f3F800000; - @%p199 bra BB5_131; - bra.uni BB5_130; - -BB5_131: - mul.f32 %f1835, %f392, %f392; - mov.f32 %f1836, 0f3BA0C9F8; - mov.f32 %f1837, 0fBA1268FB; - fma.rn.f32 %f1838, %f1837, %f1835, %f1836; - mov.f32 %f1839, 0fBCDABFD4; - fma.rn.f32 %f1840, %f1838, %f1835, %f1839; - mov.f32 %f1841, 0f3DE70331; - fma.rn.f32 %f1842, %f1840, %f1835, %f1841; - mov.f32 %f1843, 0fBEC09330; - fma.rn.f32 %f1844, %f1842, %f1835, %f1843; - mov.f32 %f1845, 0f3F906EBA; - fma.rn.f32 %f1846, %f1844, %f1835, %f1845; - mul.f32 %f3123, %f392, %f1846; - bra.uni BB5_132; - -BB5_130: - mov.f32 %f2931, 0f3F800000; - mov.f32 %f1817, 0f3A03BB71; - mov.f32 %f1818, 0fB7B730FB; - fma.rn.f32 %f1819, %f1818, %f393, %f1817; - mov.f32 %f1820, 0fBBACA3B3; - fma.rn.f32 %f1821, %f1819, %f393, %f1820; - mov.f32 %f1822, 0f3D0A7445; - fma.rn.f32 %f1823, %f1821, %f393, %f1822; - mov.f32 %f1824, 0fBE1B3B75; - fma.rn.f32 %f1825, %f1823, %f393, %f1824; - mov.f32 %f1826, 0fBF6B385A; - fma.rn.f32 %f1827, %f1825, %f393, %f1826; - mov.f32 %f1828, 0fBFD0316E; - fma.rn.f32 %f1829, %f1827, %f393, %f1828; - mov.f32 %f1830, 0fBA031CCE; - fma.rn.f32 %f1831, %f1829, %f393, %f1830; - ex2.approx.ftz.f32 %f1832, %f1831; - sub.f32 %f1834, %f2931, %f1832; - mov.b32 %r211, %f1834; - setp.ltu.f32 %p200, %f393, 0f407AD445; - selp.b32 %r212, %r211, 1065353216, %p200; - mov.b32 %r213, %f392; - and.b32 %r214, %r213, -2147483648; - or.b32 %r215, %r212, %r214; - mov.b32 %f3123, %r215; - -BB5_132: - cvt.rn.f32.s32 %f2933, %r319; - sub.f32 %f2932, %f2933, %f3101; - mul.f32 %f397, %f2932, %f323; - abs.f32 %f398, %f397; - setp.ltu.f32 %p201, %f398, 0f3F800000; - @%p201 bra BB5_134; - bra.uni BB5_133; - -BB5_134: - mul.f32 %f1865, %f397, %f397; - mov.f32 %f1866, 0f3BA0C9F8; - mov.f32 %f1867, 0fBA1268FB; - fma.rn.f32 %f1868, %f1867, %f1865, %f1866; - mov.f32 %f1869, 0fBCDABFD4; - fma.rn.f32 %f1870, %f1868, %f1865, %f1869; - mov.f32 %f1871, 0f3DE70331; - fma.rn.f32 %f1872, %f1870, %f1865, %f1871; - mov.f32 %f1873, 0fBEC09330; - fma.rn.f32 %f1874, %f1872, %f1865, %f1873; - mov.f32 %f1875, 0f3F906EBA; - fma.rn.f32 %f1876, %f1874, %f1865, %f1875; - mul.f32 %f3124, %f397, %f1876; - bra.uni BB5_135; - -BB5_133: - mov.f32 %f2934, 0f3F800000; - mov.f32 %f1847, 0f3A03BB71; - mov.f32 %f1848, 0fB7B730FB; - fma.rn.f32 %f1849, %f1848, %f398, %f1847; - mov.f32 %f1850, 0fBBACA3B3; - fma.rn.f32 %f1851, %f1849, %f398, %f1850; - mov.f32 %f1852, 0f3D0A7445; - fma.rn.f32 %f1853, %f1851, %f398, %f1852; - mov.f32 %f1854, 0fBE1B3B75; - fma.rn.f32 %f1855, %f1853, %f398, %f1854; - mov.f32 %f1856, 0fBF6B385A; - fma.rn.f32 %f1857, %f1855, %f398, %f1856; - mov.f32 %f1858, 0fBFD0316E; - fma.rn.f32 %f1859, %f1857, %f398, %f1858; - mov.f32 %f1860, 0fBA031CCE; - fma.rn.f32 %f1861, %f1859, %f398, %f1860; - ex2.approx.ftz.f32 %f1862, %f1861; - sub.f32 %f1864, %f2934, %f1862; - mov.b32 %r216, %f1864; - setp.ltu.f32 %p202, %f398, 0f407AD445; - selp.b32 %r217, %r216, 1065353216, %p202; - mov.b32 %r218, %f397; - and.b32 %r219, %r218, -2147483648; - or.b32 %r220, %r217, %r219; - mov.b32 %f3124, %r220; - -BB5_135: - sub.f32 %f1879, %f3123, %f3124; - mul.f32 %f402, %f1879, 0f3F000000; - mul.f32 %f1880, %f388, %f3100; - fma.rn.f32 %f403, %f402, %f1880, %f3013; - mad.lo.s32 %r221, %r319, %r63, %r318; - add.s32 %r222, %r221, %r4; - mul.wide.s32 %rd81, %r222, 4; - add.s64 %rd82, %rd1, %rd81; - ld.global.f32 %f404, [%rd82]; - // inline asm - rcp.approx.ftz.f32 %f1877,%f334; - // inline asm - mul.f32 %f1881, %f1877, %f335; - mul.f32 %f1882, %f1881, %f1881; - mov.f32 %f1883, 0f3C4CAF63; - mov.f32 %f1884, 0f3B18F0FE; - fma.rn.f32 %f1885, %f1884, %f1882, %f1883; - mov.f32 %f1886, 0f3DAAAABD; - fma.rn.f32 %f1887, %f1885, %f1882, %f1886; - mul.rn.f32 %f1888, %f1887, %f1882; - mul.rn.f32 %f1889, %f1888, %f1881; - sub.f32 %f1890, %f333, %f1881; - neg.f32 %f1891, %f1881; - add.f32 %f1892, %f1890, %f1890; - fma.rn.f32 %f1893, %f1891, %f333, %f1892; - mul.rn.f32 %f1894, %f1877, %f1893; - add.f32 %f1895, %f1889, %f1881; - sub.f32 %f1896, %f1881, %f1895; - add.f32 %f1897, %f1889, %f1896; - add.f32 %f1898, %f1894, %f1897; - add.f32 %f1899, %f1895, %f1898; - sub.f32 %f1900, %f1895, %f1899; - add.f32 %f1901, %f1898, %f1900; - add.f32 %f1902, %f336, %f1899; - sub.f32 %f1903, %f336, %f1902; - add.f32 %f1904, %f1899, %f1903; - add.f32 %f1905, %f1901, %f1904; - add.f32 %f1906, %f337, %f1905; - add.f32 %f1907, %f1902, %f1906; - sub.f32 %f1908, %f1902, %f1907; - add.f32 %f1909, %f1906, %f1908; - mul.rn.f32 %f1911, %f1730, %f1907; - neg.f32 %f1912, %f1911; - fma.rn.f32 %f1913, %f1730, %f1907, %f1912; - fma.rn.f32 %f1914, %f1730, %f1909, %f1913; - mov.f32 %f1915, 0f00000000; - fma.rn.f32 %f1916, %f1915, %f1907, %f1914; - add.rn.f32 %f1917, %f1911, %f1916; - neg.f32 %f1918, %f1917; - add.rn.f32 %f1919, %f1911, %f1918; - add.rn.f32 %f1920, %f1919, %f1916; - mov.b32 %r223, %f1917; - setp.eq.s32 %p203, %r223, 1118925336; - add.s32 %r224, %r223, -1; - mov.b32 %f1921, %r224; - add.f32 %f1922, %f1920, 0f37000000; - selp.f32 %f1923, %f1921, %f1917, %p203; - selp.f32 %f405, %f1922, %f1920, %p203; - mul.f32 %f1924, %f1923, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1925, %f1924; - mov.f32 %f1926, 0fBF317200; - fma.rn.f32 %f1927, %f1925, %f1926, %f1923; - mov.f32 %f1928, 0fB5BFBE8E; - fma.rn.f32 %f1929, %f1925, %f1928, %f1927; - mul.f32 %f1930, %f1929, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1931, %f1930; - add.f32 %f1932, %f1925, 0f00000000; - ex2.approx.f32 %f1933, %f1932; - mul.f32 %f1934, %f1931, %f1933; - setp.lt.f32 %p204, %f1923, 0fC2D20000; - selp.f32 %f1935, 0f00000000, %f1934, %p204; - setp.gt.f32 %p205, %f1923, 0f42D20000; - selp.f32 %f3125, 0f7F800000, %f1935, %p205; - setp.eq.f32 %p206, %f3125, 0f7F800000; - @%p206 bra BB5_137; - - fma.rn.f32 %f3125, %f3125, %f405, %f3125; - -BB5_137: - setp.geu.f32 %p366, %f330, 0f00000000; - mov.b32 %r225, %f3125; - xor.b32 %r226, %r225, -2147483648; - mov.b32 %f1936, %r226; - selp.f32 %f409, %f1936, %f3125, %p7; - setp.eq.f32 %p207, %f330, 0f00000000; - selp.f32 %f3126, %f338, %f409, %p207; - @%p366 bra BB5_139; - - cvt.rzi.f32.f32 %f1938, %f1730; - setp.neu.f32 %p208, %f1938, 0f40000000; - selp.f32 %f3126, 0f7FFFFFFF, %f409, %p208; - -BB5_139: - abs.f32 %f2944, %f330; - add.f32 %f2943, %f2944, 0f40000000; - mov.b32 %r299, %f2943; - mov.f32 %f2942, 0f00000000; - mov.f32 %f2941, 0f3DAAAABD; - mov.f32 %f2940, 0f3C4CAF63; - mov.f32 %f2939, 0f3B18F0FE; - mov.f32 %f2938, 0fB5BFBE8E; - mov.f32 %f2937, 0fBF317200; - selp.f32 %f2936, 0fFF800000, 0f7F800000, %p7; - add.f32 %f2935, %f330, 0f40000000; - setp.gtu.f32 %p209, %f2944, 0f7F800000; - selp.f32 %f1941, %f2935, %f3126, %p209; - setp.neu.f32 %p210, %f2944, 0f7F800000; - selp.f32 %f1942, %f1941, %f2936, %p210; - setp.gt.s32 %p211, %r299, 2139095039; - selp.f32 %f1943, %f1942, %f3126, %p211; - mul.f32 %f1944, %f1943, 0fBF000000; - setp.eq.f32 %p212, %f330, 0f3F800000; - selp.f32 %f1945, 0fBF000000, %f1944, %p212; - mul.f32 %f1946, %f1945, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1947, %f1946; - fma.rn.f32 %f1949, %f1947, %f2937, %f1945; - fma.rn.f32 %f1951, %f1947, %f2938, %f1949; - mul.f32 %f1952, %f1951, 0f3FB8AA3B; + sub.f32 %f1752, %f1747, %f1751; + add.f32 %f1753, %f1750, %f1752; + mul.rn.f32 %f1754, %f1730, %f1612; + mul.rn.f32 %f1755, %f1730, %f1614; + add.f32 %f1756, %f1754, %f1751; + sub.f32 %f1757, %f1754, %f1756; + add.f32 %f1758, %f1751, %f1757; + add.f32 %f1759, %f1753, %f1758; + add.f32 %f1760, %f1755, %f1759; + add.f32 %f1761, %f1756, %f1760; + sub.f32 %f1762, %f1756, %f1761; + add.f32 %f1763, %f1760, %f1762; + mul.rn.f32 %f1764, %f1533, %f1761; + neg.f32 %f1765, %f1764; + fma.rn.f32 %f1766, %f1533, %f1761, %f1765; + fma.rn.f32 %f1767, %f1533, %f1763, %f1766; + fma.rn.f32 %f1768, %f1509, %f1761, %f1767; + add.rn.f32 %f1769, %f1764, %f1768; + neg.f32 %f1770, %f1769; + add.rn.f32 %f1771, %f1764, %f1770; + add.rn.f32 %f1772, %f1771, %f1768; + mov.b32 %r638, %f1769; + setp.eq.s32 %p548, %r638, 1118925336; + add.s32 %r639, %r638, -1; + mov.b32 %f1773, %r639; + add.f32 %f1774, %f1772, 0f37000000; + selp.f32 %f300, %f1774, %f1772, %p548; + selp.f32 %f1775, %f1773, %f1769, %p548; + mul.rn.f32 %f1776, %f1775, %f1637; + cvt.rzi.f32.f32 %f1777, %f1776; + abs.f32 %f1778, %f1777; + setp.gt.f32 %p549, %f1778, 0f42FC0000; + mov.b32 %r640, %f1777; + and.b32 %r641, %r640, -2147483648; + or.b32 %r642, %r641, 1123811328; + mov.b32 %f1779, %r642; + selp.f32 %f1780, %f1779, %f1777, %p549; + fma.rn.f32 %f1781, %f1780, %f1643, %f1775; + fma.rn.f32 %f1782, %f1780, %f1645, %f1781; + mul.f32 %f1783, %f1782, 0f3FB8AA3B; + add.f32 %f1784, %f1780, 0f4B40007F; + mov.b32 %r643, %f1784; + shl.b32 %r644, %r643, 23; + mov.b32 %f1785, %r644; + ex2.approx.ftz.f32 %f1786, %f1783; + mul.f32 %f301, %f1786, %f1785; + setp.lt.f32 %p550, %f298, 0f00000000; + and.pred %p29, %p550, %p540; + add.f32 %f1787, %f298, %f298; + selp.f32 %f302, %f1787, 0f00000000, %p540; + add.f32 %f1788, %f299, 0f40000000; + mov.b32 %r97, %f1788; + div.rn.f32 %f303, %f279, %f2886; + abs.f32 %f304, %f303; + setp.lt.f32 %p551, %f304, 0f00800000; + mul.f32 %f1789, %f304, 0f4B800000; + selp.f32 %f1790, %f1789, %f304, %p551; + selp.f32 %f1791, 0fC3170000, 0fC2FE0000, %p551; + mov.b32 %r645, %f1790; + and.b32 %r646, %r645, 8388607; + or.b32 %r647, %r646, 1065353216; + mov.b32 %f1792, %r647; + shr.u32 %r648, %r645, 23; + cvt.rn.f32.u32 %f1793, %r648; + add.f32 %f1794, %f1791, %f1793; + setp.gt.f32 %p552, %f1792, 0f3FB504F3; + mul.f32 %f1795, %f1792, 0f3F000000; + add.f32 %f1796, %f1794, 0f3F800000; + selp.f32 %f1797, %f1796, %f1794, %p552; + selp.f32 %f1798, %f1795, %f1792, %p552; + add.f32 %f1799, %f1798, 0fBF800000; + add.f32 %f1800, %f1798, 0f3F800000; + rcp.approx.ftz.f32 %f1801, %f1800; + add.f32 %f1802, %f1799, %f1799; + mul.f32 %f1803, %f1802, %f1801; + mul.f32 %f1804, %f1803, %f1803; + fma.rn.f32 %f1805, %f1594, %f1804, %f1593; + fma.rn.f32 %f1806, %f1805, %f1804, %f1596; + mul.rn.f32 %f1807, %f1806, %f1804; + mul.rn.f32 %f1808, %f1807, %f1803; + sub.f32 %f1809, %f1799, %f1803; + add.f32 %f1810, %f1809, %f1809; + neg.f32 %f1811, %f1803; + fma.rn.f32 %f1812, %f1811, %f1799, %f1810; + mul.rn.f32 %f1813, %f1801, %f1812; + add.f32 %f1814, %f1808, %f1803; + sub.f32 %f1815, %f1803, %f1814; + add.f32 %f1816, %f1808, %f1815; + add.f32 %f1817, %f1813, %f1816; + add.f32 %f1818, %f1814, %f1817; + sub.f32 %f1819, %f1814, %f1818; + add.f32 %f1820, %f1817, %f1819; + mul.rn.f32 %f1821, %f1797, %f1612; + mul.rn.f32 %f1822, %f1797, %f1614; + add.f32 %f1823, %f1821, %f1818; + sub.f32 %f1824, %f1821, %f1823; + add.f32 %f1825, %f1818, %f1824; + add.f32 %f1826, %f1820, %f1825; + add.f32 %f1827, %f1822, %f1826; + add.f32 %f1828, %f1823, %f1827; + sub.f32 %f1829, %f1823, %f1828; + add.f32 %f1830, %f1827, %f1829; + mul.rn.f32 %f1831, %f1533, %f1828; + neg.f32 %f1832, %f1831; + fma.rn.f32 %f1833, %f1533, %f1828, %f1832; + fma.rn.f32 %f1834, %f1533, %f1830, %f1833; + fma.rn.f32 %f1835, %f1509, %f1828, %f1834; + add.rn.f32 %f1836, %f1831, %f1835; + neg.f32 %f1837, %f1836; + add.rn.f32 %f1838, %f1831, %f1837; + add.rn.f32 %f1839, %f1838, %f1835; + mov.b32 %r649, %f1836; + setp.eq.s32 %p553, %r649, 1118925336; + add.s32 %r650, %r649, -1; + mov.b32 %f1840, %r650; + add.f32 %f1841, %f1839, 0f37000000; + selp.f32 %f305, %f1841, %f1839, %p553; + selp.f32 %f1842, %f1840, %f1836, %p553; + mul.rn.f32 %f1843, %f1842, %f1637; + cvt.rzi.f32.f32 %f1844, %f1843; + abs.f32 %f1845, %f1844; + setp.gt.f32 %p554, %f1845, 0f42FC0000; + mov.b32 %r651, %f1844; + and.b32 %r652, %r651, -2147483648; + or.b32 %r653, %r652, 1123811328; + mov.b32 %f1846, %r653; + selp.f32 %f1847, %f1846, %f1844, %p554; + fma.rn.f32 %f1848, %f1847, %f1643, %f1842; + fma.rn.f32 %f1849, %f1847, %f1645, %f1848; + mul.f32 %f1850, %f1849, 0f3FB8AA3B; + add.f32 %f1851, %f1847, 0f4B40007F; + mov.b32 %r654, %f1851; + shl.b32 %r655, %r654, 23; + mov.b32 %f1852, %r655; + ex2.approx.ftz.f32 %f1853, %f1850; + mul.f32 %f306, %f1853, %f1852; + add.f32 %f307, %f298, 0f40000000; + setp.lt.f32 %p555, %f303, 0f00000000; + and.pred %p30, %p555, %p540; + selp.f32 %f308, 0fFF800000, 0f7F800000, %p29; + add.f32 %f1854, %f303, %f303; + selp.f32 %f309, %f1854, 0f00000000, %p540; + add.f32 %f1855, %f304, 0f40000000; + mov.b32 %r98, %f1855; + add.f32 %f310, %f279, 0f3F800000; + add.f32 %f311, %f303, 0f40000000; + selp.f32 %f312, 0fFF800000, 0f7F800000, %p30; + setp.geu.f32 %p31, %f284, 0f00000000; + setp.geu.f32 %p32, %f289, 0f00000000; + setp.geu.f32 %p33, %f298, 0f00000000; + setp.geu.f32 %p34, %f303, 0f00000000; + mov.u32 %r788, %r608; + +$L__BB5_340: + setp.ltu.f32 %p556, %f280, 0f3F8060FE; + mov.f32 %f2924, %f281; + @%p556 bra $L__BB5_342; + + ex2.approx.ftz.f32 %f1856, %f281; + sub.f32 %f1858, %f1530, %f1856; + mov.b32 %r656, %f1858; + or.b32 %r657, %r93, %r656; + mov.b32 %f2924, %r657; + +$L__BB5_342: + setp.ltu.f32 %p557, %f282, 0f3F8060FE; + mov.f32 %f2925, %f283; + @%p557 bra $L__BB5_344; + + ex2.approx.ftz.f32 %f1859, %f283; + sub.f32 %f1861, %f1530, %f1859; + mov.b32 %r658, %f1861; + or.b32 %r659, %r94, %r658; + mov.b32 %f2925, %r659; + +$L__BB5_344: + sub.f32 %f1862, %f2924, %f2925; + mul.f32 %f333, %f1862, 0f3F000000; + cvt.rn.f32.s32 %f334, %r788; + sub.f32 %f335, %f334, %f2889; + add.f32 %f1863, %f335, 0f3F000000; + mul.f32 %f336, %f261, %f1863; + abs.f32 %f1864, %f336; + setp.ltu.f32 %p558, %f1864, 0f3F8060FE; + setp.ge.f32 %p559, %f1864, 0f3F8060FE; + mul.f32 %f1865, %f336, %f336; + selp.f32 %f1866, %f1864, %f1865, %p559; + selp.f32 %f1867, 0f3789CA3C, 0f38B1E96A, %p559; + selp.f32 %f1868, 0fB9F560B9, 0fBA574D20, %p559; + fma.rn.f32 %f1869, %f1867, %f1866, %f1868; + selp.f32 %f1870, 0f3BAC840B, 0f3BAAD5EA, %p559; + fma.rn.f32 %f1871, %f1869, %f1866, %f1870; + selp.f32 %f1872, 0fBD0C8162, 0fBCDC1BE7, %p559; + fma.rn.f32 %f1873, %f1871, %f1866, %f1872; + selp.f32 %f1874, 0f3E1CF906, 0f3DE718AF, %p559; + fma.rn.f32 %f1875, %f1873, %f1866, %f1874; + selp.f32 %f1876, 0f3F6A937E, 0fBEC093AC, %p559; + fma.rn.f32 %f1877, %f1875, %f1866, %f1876; + selp.f32 %f1878, 0f3F20D842, 0f3E0375D3, %p559; + fma.rn.f32 %f1879, %f1877, %f1866, %f1878; + neg.f32 %f1880, %f1864; + selp.f32 %f1881, %f1880, %f336, %p559; + fma.rn.f32 %f2926, %f1879, %f1881, %f1881; + @%p558 bra $L__BB5_346; + + ex2.approx.ftz.f32 %f1882, %f2926; + sub.f32 %f1884, %f1530, %f1882; + mov.b32 %r660, %f1884; + mov.b32 %r661, %f336; + and.b32 %r662, %r661, -2147483648; + or.b32 %r663, %r662, %r660; + mov.b32 %f2926, %r663; + +$L__BB5_346: + add.f32 %f340, %f335, 0fBF000000; + mul.f32 %f341, %f261, %f340; + abs.f32 %f1885, %f341; + setp.ltu.f32 %p560, %f1885, 0f3F8060FE; + setp.ge.f32 %p561, %f1885, 0f3F8060FE; + mul.f32 %f1886, %f341, %f341; + selp.f32 %f1887, %f1885, %f1886, %p561; + selp.f32 %f1888, 0f3789CA3C, 0f38B1E96A, %p561; + selp.f32 %f1889, 0fB9F560B9, 0fBA574D20, %p561; + fma.rn.f32 %f1890, %f1888, %f1887, %f1889; + selp.f32 %f1891, 0f3BAC840B, 0f3BAAD5EA, %p561; + fma.rn.f32 %f1892, %f1890, %f1887, %f1891; + selp.f32 %f1893, 0fBD0C8162, 0fBCDC1BE7, %p561; + fma.rn.f32 %f1894, %f1892, %f1887, %f1893; + selp.f32 %f1895, 0f3E1CF906, 0f3DE718AF, %p561; + fma.rn.f32 %f1896, %f1894, %f1887, %f1895; + selp.f32 %f1897, 0f3F6A937E, 0fBEC093AC, %p561; + fma.rn.f32 %f1898, %f1896, %f1887, %f1897; + selp.f32 %f1899, 0f3F20D842, 0f3E0375D3, %p561; + fma.rn.f32 %f1900, %f1898, %f1887, %f1899; + neg.f32 %f1901, %f1885; + selp.f32 %f1902, %f1901, %f341, %p561; + fma.rn.f32 %f2927, %f1900, %f1902, %f1902; + @%p560 bra $L__BB5_348; + + ex2.approx.ftz.f32 %f1903, %f2927; + sub.f32 %f1905, %f1530, %f1903; + mov.b32 %r664, %f1905; + mov.b32 %r665, %f341; + and.b32 %r666, %r665, -2147483648; + or.b32 %r667, %r666, %r664; + mov.b32 %f2927, %r667; + +$L__BB5_348: + sub.f32 %f1907, %f2926, %f2927; + mul.f32 %f345, %f1907, 0f3F000000; + mul.f32 %f1908, %f333, %f2888; + fma.rn.f32 %f346, %f345, %f1908, %f2887; + mad.lo.s32 %r668, %r788, %r102, %r787; + add.s32 %r669, %r668, %r2; + mul.wide.s32 %rd36, %r669, 4; + add.s64 %rd37, %rd1, %rd36; + ld.global.f32 %f347, [%rd37]; + setp.eq.f32 %p562, %f287, 0f7F800000; + mov.f32 %f2928, 0f7F800000; + @%p562 bra $L__BB5_350; + + fma.rn.f32 %f2928, %f287, %f286, %f287; + +$L__BB5_350: + mov.b32 %r670, %f2928; + xor.b32 %r671, %r670, -2147483648; + mov.b32 %f1909, %r671; + selp.f32 %f350, %f1909, %f2928, %p27; + setp.eq.f32 %p563, %f284, 0f00000000; + selp.f32 %f2929, %f288, %f350, %p563; + @%p31 bra $L__BB5_353; + + cvt.rzi.f32.f32 %f1911, %f1533; + setp.eq.f32 %p564, %f1911, 0f40000000; + mov.f32 %f2929, %f350; + @%p564 bra $L__BB5_353; + + mov.f32 %f2929, 0f7FFFFFFF; + +$L__BB5_353: + setp.eq.f32 %p565, %f292, 0f7F800000; + mov.f32 %f2930, 0f7F800000; + @%p565 bra $L__BB5_355; + + fma.rn.f32 %f2930, %f292, %f291, %f292; + +$L__BB5_355: + mov.b32 %r672, %f2930; + xor.b32 %r673, %r672, -2147483648; + mov.b32 %f1914, %r673; + selp.f32 %f355, %f1914, %f2930, %p28; + setp.eq.f32 %p566, %f289, 0f00000000; + selp.f32 %f2931, %f295, %f355, %p566; + @%p32 bra $L__BB5_358; + + cvt.rzi.f32.f32 %f1916, %f1533; + setp.eq.f32 %p567, %f1916, 0f40000000; + mov.f32 %f2931, %f355; + @%p567 bra $L__BB5_358; + + mov.f32 %f2931, 0f7FFFFFFF; + +$L__BB5_358: + setp.gtu.f32 %p568, %f285, 0f7F800000; + mov.f32 %f2932, 0f7F800000; + selp.f32 %f1919, %f293, %f2929, %p568; + setp.neu.f32 %p569, %f285, 0f7F800000; + selp.f32 %f1920, %f1919, %f294, %p569; + setp.gt.s32 %p570, %r95, 2139095039; + selp.f32 %f1921, %f1920, %f2929, %p570; + mul.f32 %f1922, %f1921, 0fBF000000; + setp.eq.f32 %p571, %f284, 0f3F800000; + selp.f32 %f1923, 0fBF000000, %f1922, %p571; + mov.f32 %f1925, 0f3BBB989D; + fma.rn.f32 %f1926, %f1923, %f1925, %f1526; + mov.f32 %f1928, 0f437C0000; + cvt.sat.f32.f32 %f1929, %f1926; + mov.f32 %f1930, 0f4B400001; + fma.rm.f32 %f1931, %f1929, %f1928, %f1930; + setp.gtu.f32 %p572, %f290, 0f7F800000; + selp.f32 %f1932, %f296, %f2931, %p572; + setp.neu.f32 %p573, %f290, 0f7F800000; + selp.f32 %f1933, %f1932, %f297, %p573; + setp.gt.s32 %p574, %r96, 2139095039; + selp.f32 %f1934, %f1933, %f2931, %p574; + mul.f32 %f1935, %f1934, 0fBF000000; + setp.eq.f32 %p575, %f289, 0f3F800000; + selp.f32 %f1936, 0fBF000000, %f1935, %p575; + fma.rn.f32 %f1937, %f1936, %f1925, %f1526; + cvt.sat.f32.f32 %f1938, %f1937; + fma.rm.f32 %f1939, %f1938, %f1928, %f1930; + add.f32 %f1940, %f1939, 0fCB40007F; + neg.f32 %f1941, %f1940; + fma.rn.f32 %f1942, %f1936, %f1637, %f1941; + mov.f32 %f1943, 0f32A57060; + fma.rn.f32 %f1944, %f1936, %f1943, %f1942; + mov.b32 %r674, %f1939; + shl.b32 %r675, %r674, 23; + mov.b32 %f1945, %r675; + ex2.approx.ftz.f32 %f1946, %f1944; + mul.f32 %f1947, %f1946, %f1945; + mov.b32 %r676, %f1931; + shl.b32 %r677, %r676, 23; + mov.b32 %f1948, %r677; + add.f32 %f1949, %f1931, 0fCB40007F; + neg.f32 %f1950, %f1949; + fma.rn.f32 %f1951, %f1923, %f1637, %f1950; + fma.rn.f32 %f1952, %f1923, %f1943, %f1951; ex2.approx.ftz.f32 %f1953, %f1952; - add.f32 %f1954, %f1947, 0f00000000; - ex2.approx.f32 %f1955, %f1954; - mul.f32 %f1956, %f1953, %f1955; - setp.lt.f32 %p213, %f1945, 0fC2D20000; - selp.f32 %f1957, 0f00000000, %f1956, %p213; - setp.gt.f32 %p214, %f1945, 0f42D20000; - selp.f32 %f413, 0f7F800000, %f1957, %p214; - // inline asm - rcp.approx.ftz.f32 %f1939,%f344; - // inline asm - mul.f32 %f1958, %f1939, %f345; - mul.f32 %f1959, %f1958, %f1958; - fma.rn.f32 %f1962, %f2939, %f1959, %f2940; - fma.rn.f32 %f1964, %f1962, %f1959, %f2941; - mul.rn.f32 %f1965, %f1964, %f1959; - mul.rn.f32 %f1966, %f1965, %f1958; - sub.f32 %f1967, %f343, %f1958; - neg.f32 %f1968, %f1958; - add.f32 %f1969, %f1967, %f1967; - fma.rn.f32 %f1970, %f1968, %f343, %f1969; - mul.rn.f32 %f1971, %f1939, %f1970; - add.f32 %f1972, %f1966, %f1958; - sub.f32 %f1973, %f1958, %f1972; - add.f32 %f1974, %f1966, %f1973; - add.f32 %f1975, %f1971, %f1974; - add.f32 %f1976, %f1972, %f1975; - sub.f32 %f1977, %f1972, %f1976; - add.f32 %f1978, %f1975, %f1977; - add.f32 %f1979, %f346, %f1976; - sub.f32 %f1980, %f346, %f1979; - add.f32 %f1981, %f1976, %f1980; - add.f32 %f1982, %f1978, %f1981; - add.f32 %f1983, %f347, %f1982; - add.f32 %f1984, %f1979, %f1983; - sub.f32 %f1985, %f1979, %f1984; - add.f32 %f1986, %f1983, %f1985; - mul.rn.f32 %f1988, %f1730, %f1984; - neg.f32 %f1989, %f1988; - fma.rn.f32 %f1990, %f1730, %f1984, %f1989; - fma.rn.f32 %f1991, %f1730, %f1986, %f1990; - fma.rn.f32 %f1993, %f2942, %f1984, %f1991; - add.rn.f32 %f1994, %f1988, %f1993; - neg.f32 %f1995, %f1994; - add.rn.f32 %f1996, %f1988, %f1995; - add.rn.f32 %f1997, %f1996, %f1993; - mov.b32 %r227, %f1994; - setp.eq.s32 %p215, %r227, 1118925336; - add.s32 %r228, %r227, -1; - mov.b32 %f1998, %r228; - add.f32 %f1999, %f1997, 0f37000000; - selp.f32 %f2000, %f1998, %f1994, %p215; - selp.f32 %f414, %f1999, %f1997, %p215; - mul.f32 %f2001, %f2000, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2002, %f2001; - fma.rn.f32 %f2003, %f2002, %f2937, %f2000; - fma.rn.f32 %f2004, %f2002, %f2938, %f2003; - mul.f32 %f2005, %f2004, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2006, %f2005; - add.f32 %f2007, %f2002, 0f00000000; - ex2.approx.f32 %f2008, %f2007; - mul.f32 %f2009, %f2006, %f2008; - setp.lt.f32 %p216, %f2000, 0fC2D20000; - selp.f32 %f2010, 0f00000000, %f2009, %p216; - setp.gt.f32 %p217, %f2000, 0f42D20000; - selp.f32 %f3127, 0f7F800000, %f2010, %p217; - setp.eq.f32 %p218, %f3127, 0f7F800000; - @%p218 bra BB5_141; - - fma.rn.f32 %f3127, %f3127, %f414, %f3127; - -BB5_141: - setp.geu.f32 %p367, %f341, 0f00000000; - mov.b32 %r229, %f3127; - xor.b32 %r230, %r229, -2147483648; - mov.b32 %f2011, %r230; - selp.f32 %f418, %f2011, %f3127, %p8; - setp.eq.f32 %p219, %f341, 0f00000000; - selp.f32 %f3128, %f348, %f418, %p219; - @%p367 bra BB5_143; - - cvt.rzi.f32.f32 %f2013, %f1730; - setp.neu.f32 %p220, %f2013, 0f40000000; - selp.f32 %f3128, 0f7FFFFFFF, %f418, %p220; - -BB5_143: - abs.f32 %f2956, %f341; - add.f32 %f2955, %f2956, 0f40000000; - mov.b32 %r300, %f2955; - mov.f32 %f2954, 0f35BFBE8E; - mov.f32 %f2953, 0f3F317200; - selp.f32 %f2952, 0fFF800000, 0f7F800000, %p8; - add.f32 %f2951, %f341, 0f40000000; - mov.f32 %f2950, 0f00000000; - mov.f32 %f2949, 0f3DAAAABD; - mov.f32 %f2948, 0f3C4CAF63; - mov.f32 %f2947, 0f3B18F0FE; - mov.f32 %f2946, 0fB5BFBE8E; - mov.f32 %f2945, 0fBF317200; - setp.gtu.f32 %p221, %f2956, 0f7F800000; - selp.f32 %f2016, %f2951, %f3128, %p221; - setp.neu.f32 %p222, %f2956, 0f7F800000; - selp.f32 %f2017, %f2016, %f2952, %p222; - setp.gt.s32 %p223, %r300, 2139095039; - selp.f32 %f2018, %f2017, %f3128, %p223; - mul.f32 %f2019, %f2018, 0fBF000000; - setp.eq.f32 %p224, %f341, 0f3F800000; - selp.f32 %f2020, 0fBF000000, %f2019, %p224; - mul.f32 %f2021, %f2020, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2022, %f2021; - fma.rn.f32 %f2024, %f2022, %f2945, %f2020; - fma.rn.f32 %f2026, %f2022, %f2946, %f2024; - mul.f32 %f2027, %f2026, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2028, %f2027; - add.f32 %f2029, %f2022, 0f00000000; - ex2.approx.f32 %f2030, %f2029; - mul.f32 %f2031, %f2028, %f2030; - setp.lt.f32 %p225, %f2020, 0fC2D20000; - selp.f32 %f2032, 0f00000000, %f2031, %p225; - setp.gt.f32 %p226, %f2020, 0f42D20000; - selp.f32 %f2033, 0f7F800000, %f2032, %p226; - sub.f32 %f2034, %f413, %f2033; - mul.f32 %f2035, %f318, %f2034; - mul.f32 %f422, %f402, %f2035; - add.f32 %f2036, %f389, 0f3F800000; - sub.f32 %f2037, %f2036, %f3101; - div.rn.f32 %f423, %f2037, %f3098; - abs.f32 %f424, %f423; - setp.lt.f32 %p227, %f424, 0f00800000; - mul.f32 %f2038, %f424, 0f4B800000; - selp.f32 %f2039, 0fC3170000, 0fC2FE0000, %p227; - selp.f32 %f2040, %f2038, %f424, %p227; - mov.b32 %r231, %f2040; - and.b32 %r232, %r231, 8388607; - or.b32 %r233, %r232, 1065353216; - mov.b32 %f2041, %r233; - shr.u32 %r234, %r231, 23; - cvt.rn.f32.u32 %f2042, %r234; - add.f32 %f2043, %f2039, %f2042; - setp.gt.f32 %p228, %f2041, 0f3FB504F3; - mul.f32 %f2044, %f2041, 0f3F000000; - add.f32 %f2045, %f2043, 0f3F800000; - selp.f32 %f2046, %f2044, %f2041, %p228; - selp.f32 %f2047, %f2045, %f2043, %p228; - add.f32 %f425, %f2046, 0fBF800000; - add.f32 %f2015, %f2046, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2014,%f2015; - // inline asm - add.f32 %f427, %f425, %f425; - mul.f32 %f2048, %f2014, %f427; - mul.f32 %f2049, %f2048, %f2048; - fma.rn.f32 %f2052, %f2947, %f2049, %f2948; - fma.rn.f32 %f2054, %f2052, %f2049, %f2949; - mul.rn.f32 %f2055, %f2054, %f2049; - mul.rn.f32 %f2056, %f2055, %f2048; - sub.f32 %f2057, %f425, %f2048; - neg.f32 %f2058, %f2048; - add.f32 %f2059, %f2057, %f2057; - fma.rn.f32 %f2060, %f2058, %f425, %f2059; - mul.rn.f32 %f2061, %f2014, %f2060; - add.f32 %f2062, %f2056, %f2048; - sub.f32 %f2063, %f2048, %f2062; - add.f32 %f2064, %f2056, %f2063; - add.f32 %f2065, %f2061, %f2064; - add.f32 %f2066, %f2062, %f2065; - sub.f32 %f2067, %f2062, %f2066; - add.f32 %f2068, %f2065, %f2067; - mul.rn.f32 %f428, %f2047, %f2953; - mul.rn.f32 %f429, %f2047, %f2954; - add.f32 %f2071, %f428, %f2066; - sub.f32 %f2072, %f428, %f2071; - add.f32 %f2073, %f2066, %f2072; - add.f32 %f2074, %f2068, %f2073; - add.f32 %f2075, %f429, %f2074; - add.f32 %f2076, %f2071, %f2075; - sub.f32 %f2077, %f2071, %f2076; - add.f32 %f2078, %f2075, %f2077; - mul.rn.f32 %f2080, %f1730, %f2076; - neg.f32 %f2081, %f2080; - fma.rn.f32 %f2082, %f1730, %f2076, %f2081; - fma.rn.f32 %f2083, %f1730, %f2078, %f2082; - fma.rn.f32 %f2085, %f2950, %f2076, %f2083; - add.rn.f32 %f2086, %f2080, %f2085; - neg.f32 %f2087, %f2086; - add.rn.f32 %f2088, %f2080, %f2087; - add.rn.f32 %f2089, %f2088, %f2085; - mov.b32 %r235, %f2086; - setp.eq.s32 %p229, %r235, 1118925336; - add.s32 %r236, %r235, -1; - mov.b32 %f2090, %r236; - add.f32 %f2091, %f2089, 0f37000000; - selp.f32 %f2092, %f2090, %f2086, %p229; - selp.f32 %f430, %f2091, %f2089, %p229; - mul.f32 %f2093, %f2092, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2094, %f2093; - fma.rn.f32 %f2095, %f2094, %f2945, %f2092; - fma.rn.f32 %f2096, %f2094, %f2946, %f2095; - mul.f32 %f2097, %f2096, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2098, %f2097; - add.f32 %f2099, %f2094, 0f00000000; - ex2.approx.f32 %f2100, %f2099; - mul.f32 %f2101, %f2098, %f2100; - setp.lt.f32 %p230, %f2092, 0fC2D20000; - selp.f32 %f2102, 0f00000000, %f2101, %p230; - setp.gt.f32 %p231, %f2092, 0f42D20000; - selp.f32 %f3129, 0f7F800000, %f2102, %p231; - setp.eq.f32 %p232, %f3129, 0f7F800000; - @%p232 bra BB5_145; - - fma.rn.f32 %f3129, %f3129, %f430, %f3129; - -BB5_145: - setp.lt.f32 %p233, %f423, 0f00000000; - and.pred %p11, %p233, %p188; - mov.b32 %r237, %f3129; - xor.b32 %r238, %r237, -2147483648; - mov.b32 %f2103, %r238; - selp.f32 %f3131, %f2103, %f3129, %p11; - setp.eq.f32 %p235, %f423, 0f00000000; - @%p235 bra BB5_148; - bra.uni BB5_146; - -BB5_148: - add.f32 %f2106, %f423, %f423; - selp.f32 %f3131, %f2106, 0f00000000, %p188; - bra.uni BB5_149; - -BB5_146: - setp.geu.f32 %p236, %f423, 0f00000000; - @%p236 bra BB5_149; - - cvt.rzi.f32.f32 %f2105, %f1730; - setp.neu.f32 %p237, %f2105, 0f40000000; - selp.f32 %f3131, 0f7FFFFFFF, %f3131, %p237; - -BB5_149: - abs.f32 %f2957, %f423; - add.f32 %f2107, %f2957, 0f40000000; - mov.b32 %r47, %f2107; - setp.lt.s32 %p239, %r47, 2139095040; - @%p239 bra BB5_154; - - abs.f32 %f2968, %f423; - setp.gtu.f32 %p240, %f2968, 0f7F800000; - @%p240 bra BB5_153; - bra.uni BB5_151; - -BB5_153: - add.f32 %f3131, %f423, 0f40000000; - bra.uni BB5_154; - -BB5_151: - abs.f32 %f2969, %f423; - setp.neu.f32 %p241, %f2969, 0f7F800000; - @%p241 bra BB5_154; - - selp.f32 %f3131, 0fFF800000, 0f7F800000, %p11; - -BB5_154: - mov.f32 %f2967, 0f35BFBE8E; - mov.f32 %f2966, 0f3F317200; - mov.f32 %f2965, 0f00000000; - mov.f32 %f2964, 0f3DAAAABD; - mov.f32 %f2963, 0f3C4CAF63; - mov.f32 %f2962, 0f3B18F0FE; - mov.f32 %f2961, 0fB5BFBE8E; - mov.f32 %f2960, 0fBF317200; - cvt.rn.f32.s32 %f2959, %r319; - sub.f32 %f2958, %f2959, %f3101; - mul.f32 %f2110, %f3131, 0fBF000000; - setp.eq.f32 %p242, %f423, 0f3F800000; - selp.f32 %f2111, 0fBF000000, %f2110, %p242; - mul.f32 %f2112, %f2111, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2113, %f2112; - fma.rn.f32 %f2115, %f2113, %f2960, %f2111; - fma.rn.f32 %f2117, %f2113, %f2961, %f2115; - mul.f32 %f2118, %f2117, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2119, %f2118; - add.f32 %f2120, %f2113, 0f00000000; - ex2.approx.f32 %f2121, %f2120; - mul.f32 %f2122, %f2119, %f2121; - setp.lt.f32 %p243, %f2111, 0fC2D20000; - selp.f32 %f2123, 0f00000000, %f2122, %p243; - setp.gt.f32 %p244, %f2111, 0f42D20000; - selp.f32 %f441, 0f7F800000, %f2123, %p244; - div.rn.f32 %f442, %f2958, %f3098; - abs.f32 %f443, %f442; - setp.lt.f32 %p245, %f443, 0f00800000; - mul.f32 %f2124, %f443, 0f4B800000; - selp.f32 %f2125, 0fC3170000, 0fC2FE0000, %p245; - selp.f32 %f2126, %f2124, %f443, %p245; - mov.b32 %r239, %f2126; - and.b32 %r240, %r239, 8388607; - or.b32 %r241, %r240, 1065353216; - mov.b32 %f2127, %r241; - shr.u32 %r242, %r239, 23; - cvt.rn.f32.u32 %f2128, %r242; - add.f32 %f2129, %f2125, %f2128; - setp.gt.f32 %p246, %f2127, 0f3FB504F3; - mul.f32 %f2130, %f2127, 0f3F000000; - add.f32 %f2131, %f2129, 0f3F800000; - selp.f32 %f2132, %f2130, %f2127, %p246; - selp.f32 %f2133, %f2131, %f2129, %p246; - add.f32 %f444, %f2132, 0fBF800000; - add.f32 %f2109, %f2132, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2108,%f2109; - // inline asm - add.f32 %f446, %f444, %f444; - mul.f32 %f2134, %f2108, %f446; - mul.f32 %f2135, %f2134, %f2134; - fma.rn.f32 %f2138, %f2962, %f2135, %f2963; - fma.rn.f32 %f2140, %f2138, %f2135, %f2964; - mul.rn.f32 %f2141, %f2140, %f2135; - mul.rn.f32 %f2142, %f2141, %f2134; - sub.f32 %f2143, %f444, %f2134; - neg.f32 %f2144, %f2134; - add.f32 %f2145, %f2143, %f2143; - fma.rn.f32 %f2146, %f2144, %f444, %f2145; - mul.rn.f32 %f2147, %f2108, %f2146; - add.f32 %f2148, %f2142, %f2134; - sub.f32 %f2149, %f2134, %f2148; - add.f32 %f2150, %f2142, %f2149; - add.f32 %f2151, %f2147, %f2150; - add.f32 %f2152, %f2148, %f2151; - sub.f32 %f2153, %f2148, %f2152; - add.f32 %f2154, %f2151, %f2153; - mul.rn.f32 %f447, %f2133, %f2966; - mul.rn.f32 %f448, %f2133, %f2967; - add.f32 %f2157, %f447, %f2152; - sub.f32 %f2158, %f447, %f2157; - add.f32 %f2159, %f2152, %f2158; - add.f32 %f2160, %f2154, %f2159; - add.f32 %f2161, %f448, %f2160; - add.f32 %f2162, %f2157, %f2161; - sub.f32 %f2163, %f2157, %f2162; - add.f32 %f2164, %f2161, %f2163; - mul.rn.f32 %f2166, %f1730, %f2162; - neg.f32 %f2167, %f2166; - fma.rn.f32 %f2168, %f1730, %f2162, %f2167; - fma.rn.f32 %f2169, %f1730, %f2164, %f2168; - fma.rn.f32 %f2171, %f2965, %f2162, %f2169; - add.rn.f32 %f2172, %f2166, %f2171; - neg.f32 %f2173, %f2172; - add.rn.f32 %f2174, %f2166, %f2173; - add.rn.f32 %f2175, %f2174, %f2171; - mov.b32 %r243, %f2172; - setp.eq.s32 %p247, %r243, 1118925336; - add.s32 %r244, %r243, -1; - mov.b32 %f2176, %r244; - add.f32 %f2177, %f2175, 0f37000000; - selp.f32 %f2178, %f2176, %f2172, %p247; - selp.f32 %f449, %f2177, %f2175, %p247; - mul.f32 %f2179, %f2178, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2180, %f2179; - fma.rn.f32 %f2181, %f2180, %f2960, %f2178; - fma.rn.f32 %f2182, %f2180, %f2961, %f2181; - mul.f32 %f2183, %f2182, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2184, %f2183; - add.f32 %f2185, %f2180, 0f00000000; - ex2.approx.f32 %f2186, %f2185; - mul.f32 %f2187, %f2184, %f2186; - setp.lt.f32 %p248, %f2178, 0fC2D20000; - selp.f32 %f2188, 0f00000000, %f2187, %p248; - setp.gt.f32 %p249, %f2178, 0f42D20000; - selp.f32 %f3132, 0f7F800000, %f2188, %p249; - setp.eq.f32 %p250, %f3132, 0f7F800000; - @%p250 bra BB5_156; - - fma.rn.f32 %f3132, %f3132, %f449, %f3132; - -BB5_156: - setp.lt.f32 %p251, %f442, 0f00000000; - and.pred %p12, %p251, %p188; - mov.b32 %r245, %f3132; - xor.b32 %r246, %r245, -2147483648; - mov.b32 %f2189, %r246; - selp.f32 %f3134, %f2189, %f3132, %p12; - setp.eq.f32 %p253, %f442, 0f00000000; - @%p253 bra BB5_159; - bra.uni BB5_157; - -BB5_159: - add.f32 %f2192, %f442, %f442; - selp.f32 %f3134, %f2192, 0f00000000, %p188; - bra.uni BB5_160; - -BB5_157: - setp.geu.f32 %p254, %f442, 0f00000000; - @%p254 bra BB5_160; - - cvt.rzi.f32.f32 %f2191, %f1730; - setp.neu.f32 %p255, %f2191, 0f40000000; - selp.f32 %f3134, 0f7FFFFFFF, %f3134, %p255; - -BB5_160: - abs.f32 %f2882, %f442; - add.f32 %f2193, %f2882, 0f40000000; - mov.b32 %r48, %f2193; - setp.lt.s32 %p257, %r48, 2139095040; - @%p257 bra BB5_165; - - abs.f32 %f2983, %f442; - setp.gtu.f32 %p258, %f2983, 0f7F800000; - @%p258 bra BB5_164; - bra.uni BB5_162; - -BB5_164: - add.f32 %f3134, %f442, 0f40000000; - bra.uni BB5_165; - -BB5_162: - abs.f32 %f2984, %f442; - setp.neu.f32 %p259, %f2984, 0f7F800000; - @%p259 bra BB5_165; - - selp.f32 %f3134, 0fFF800000, 0f7F800000, %p12; - -BB5_165: - mov.f32 %f2888, 0f00000000; - mov.f32 %f2887, 0f3DAAAABD; - mov.f32 %f2886, 0f3C4CAF63; - mov.f32 %f2885, 0f3B18F0FE; - mov.f32 %f2884, 0fB5BFBE8E; - mov.f32 %f2883, 0fBF317200; - mul.f32 %f2196, %f3134, 0fBF000000; - setp.eq.f32 %p260, %f442, 0f3F800000; - selp.f32 %f2197, 0fBF000000, %f2196, %p260; - mul.f32 %f2198, %f2197, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2199, %f2198; - fma.rn.f32 %f2201, %f2199, %f2883, %f2197; - fma.rn.f32 %f2203, %f2199, %f2884, %f2201; - mul.f32 %f2204, %f2203, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2205, %f2204; - add.f32 %f2206, %f2199, 0f00000000; - ex2.approx.f32 %f2207, %f2206; - mul.f32 %f2208, %f2205, %f2207; - setp.lt.f32 %p261, %f2197, 0fC2D20000; - selp.f32 %f2209, 0f00000000, %f2208, %p261; - setp.gt.f32 %p262, %f2197, 0f42D20000; - selp.f32 %f2210, 0f7F800000, %f2209, %p262; - sub.f32 %f2211, %f441, %f2210; - mul.f32 %f2212, %f318, %f2211; - mul.f32 %f460, %f388, %f2212; - // inline asm - rcp.approx.ftz.f32 %f2194,%f334; - // inline asm - mul.f32 %f2213, %f2194, %f335; - mul.f32 %f2214, %f2213, %f2213; - fma.rn.f32 %f2217, %f2885, %f2214, %f2886; - fma.rn.f32 %f2219, %f2217, %f2214, %f2887; - mul.rn.f32 %f2220, %f2219, %f2214; - mul.rn.f32 %f2221, %f2220, %f2213; - sub.f32 %f2222, %f333, %f2213; - neg.f32 %f2223, %f2213; - add.f32 %f2224, %f2222, %f2222; - fma.rn.f32 %f2225, %f2223, %f333, %f2224; - mul.rn.f32 %f2226, %f2194, %f2225; - add.f32 %f2227, %f2221, %f2213; - sub.f32 %f2228, %f2213, %f2227; - add.f32 %f2229, %f2221, %f2228; - add.f32 %f2230, %f2226, %f2229; - add.f32 %f2231, %f2227, %f2230; - sub.f32 %f2232, %f2227, %f2231; - add.f32 %f2233, %f2230, %f2232; - add.f32 %f2234, %f336, %f2231; - sub.f32 %f2235, %f336, %f2234; - add.f32 %f2236, %f2231, %f2235; - add.f32 %f2237, %f2233, %f2236; - add.f32 %f2238, %f337, %f2237; - add.f32 %f2239, %f2234, %f2238; - sub.f32 %f2240, %f2234, %f2239; - add.f32 %f2241, %f2238, %f2240; - mul.rn.f32 %f2243, %f1730, %f2239; - neg.f32 %f2244, %f2243; - fma.rn.f32 %f2245, %f1730, %f2239, %f2244; - fma.rn.f32 %f2246, %f1730, %f2241, %f2245; - fma.rn.f32 %f2248, %f2888, %f2239, %f2246; - add.rn.f32 %f2249, %f2243, %f2248; - neg.f32 %f2250, %f2249; - add.rn.f32 %f2251, %f2243, %f2250; - add.rn.f32 %f2252, %f2251, %f2248; - mov.b32 %r247, %f2249; - setp.eq.s32 %p263, %r247, 1118925336; - add.s32 %r248, %r247, -1; - mov.b32 %f2253, %r248; - add.f32 %f2254, %f2252, 0f37000000; - selp.f32 %f2255, %f2253, %f2249, %p263; - selp.f32 %f461, %f2254, %f2252, %p263; - mul.f32 %f2256, %f2255, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2257, %f2256; - fma.rn.f32 %f2258, %f2257, %f2883, %f2255; - fma.rn.f32 %f2259, %f2257, %f2884, %f2258; - mul.f32 %f2260, %f2259, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2261, %f2260; - add.f32 %f2262, %f2257, 0f00000000; - ex2.approx.f32 %f2263, %f2262; - mul.f32 %f2264, %f2261, %f2263; - setp.lt.f32 %p264, %f2255, 0fC2D20000; - selp.f32 %f2265, 0f00000000, %f2264, %p264; - setp.gt.f32 %p265, %f2255, 0f42D20000; - selp.f32 %f3135, 0f7F800000, %f2265, %p265; - setp.eq.f32 %p266, %f3135, 0f7F800000; - @%p266 bra BB5_167; - - fma.rn.f32 %f3135, %f3135, %f461, %f3135; - -BB5_167: - setp.eq.f32 %p352, %f330, 0f00000000; - setp.geu.f32 %p351, %f330, 0f00000000; - mov.b32 %r249, %f3135; - xor.b32 %r250, %r249, -2147483648; - mov.b32 %f2266, %r250; - selp.f32 %f465, %f2266, %f3135, %p7; - selp.f32 %f3136, %f338, %f465, %p352; - @%p351 bra BB5_169; - - cvt.rzi.f32.f32 %f2268, %f1730; - setp.neu.f32 %p268, %f2268, 0f40000000; - selp.f32 %f3136, 0f7FFFFFFF, %f465, %p268; - -BB5_169: - abs.f32 %f2898, %f330; - setp.eq.f32 %p356, %f330, 0f3F800000; - add.f32 %f2897, %f2898, 0f40000000; - mov.b32 %r290, %f2897; - setp.gt.s32 %p355, %r290, 2139095039; - setp.neu.f32 %p354, %f2898, 0f7F800000; - setp.gtu.f32 %p353, %f2898, 0f7F800000; - mov.f32 %f2896, 0f00000000; - mov.f32 %f2895, 0f3DAAAABD; - mov.f32 %f2894, 0f3C4CAF63; - mov.f32 %f2893, 0f3B18F0FE; - mov.f32 %f2892, 0fB5BFBE8E; - mov.f32 %f2891, 0fBF317200; - selp.f32 %f2890, 0fFF800000, 0f7F800000, %p7; - add.f32 %f2889, %f330, 0f40000000; - selp.f32 %f2271, %f2889, %f3136, %p353; - selp.f32 %f2272, %f2271, %f2890, %p354; - selp.f32 %f2273, %f2272, %f3136, %p355; - mul.f32 %f2274, %f2273, 0fBF000000; - selp.f32 %f2275, 0fBF000000, %f2274, %p356; - mul.f32 %f2276, %f2275, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2277, %f2276; - fma.rn.f32 %f2279, %f2277, %f2891, %f2275; - fma.rn.f32 %f2281, %f2277, %f2892, %f2279; - mul.f32 %f2282, %f2281, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2283, %f2282; - add.f32 %f2284, %f2277, 0f00000000; - ex2.approx.f32 %f2285, %f2284; - mul.f32 %f2286, %f2283, %f2285; - setp.lt.f32 %p273, %f2275, 0fC2D20000; - selp.f32 %f2287, 0f00000000, %f2286, %p273; - setp.gt.f32 %p274, %f2275, 0f42D20000; - selp.f32 %f469, 0f7F800000, %f2287, %p274; - // inline asm - rcp.approx.ftz.f32 %f2269,%f344; - // inline asm - mul.f32 %f2288, %f2269, %f345; - mul.f32 %f2289, %f2288, %f2288; - fma.rn.f32 %f2292, %f2893, %f2289, %f2894; - fma.rn.f32 %f2294, %f2292, %f2289, %f2895; - mul.rn.f32 %f2295, %f2294, %f2289; - mul.rn.f32 %f2296, %f2295, %f2288; - sub.f32 %f2297, %f343, %f2288; - neg.f32 %f2298, %f2288; - add.f32 %f2299, %f2297, %f2297; - fma.rn.f32 %f2300, %f2298, %f343, %f2299; - mul.rn.f32 %f2301, %f2269, %f2300; - add.f32 %f2302, %f2296, %f2288; - sub.f32 %f2303, %f2288, %f2302; - add.f32 %f2304, %f2296, %f2303; - add.f32 %f2305, %f2301, %f2304; - add.f32 %f2306, %f2302, %f2305; - sub.f32 %f2307, %f2302, %f2306; - add.f32 %f2308, %f2305, %f2307; - add.f32 %f2309, %f346, %f2306; - sub.f32 %f2310, %f346, %f2309; - add.f32 %f2311, %f2306, %f2310; - add.f32 %f2312, %f2308, %f2311; - add.f32 %f2313, %f347, %f2312; - add.f32 %f2314, %f2309, %f2313; - sub.f32 %f2315, %f2309, %f2314; - add.f32 %f2316, %f2313, %f2315; - mul.rn.f32 %f2318, %f1730, %f2314; - neg.f32 %f2319, %f2318; - fma.rn.f32 %f2320, %f1730, %f2314, %f2319; - fma.rn.f32 %f2321, %f1730, %f2316, %f2320; - fma.rn.f32 %f2323, %f2896, %f2314, %f2321; - add.rn.f32 %f2324, %f2318, %f2323; - neg.f32 %f2325, %f2324; - add.rn.f32 %f2326, %f2318, %f2325; - add.rn.f32 %f2327, %f2326, %f2323; - mov.b32 %r251, %f2324; - setp.eq.s32 %p275, %r251, 1118925336; - add.s32 %r252, %r251, -1; - mov.b32 %f2328, %r252; - add.f32 %f2329, %f2327, 0f37000000; - selp.f32 %f2330, %f2328, %f2324, %p275; - selp.f32 %f470, %f2329, %f2327, %p275; - mul.f32 %f2331, %f2330, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2332, %f2331; - fma.rn.f32 %f2333, %f2332, %f2891, %f2330; - fma.rn.f32 %f2334, %f2332, %f2892, %f2333; - mul.f32 %f2335, %f2334, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2336, %f2335; - add.f32 %f2337, %f2332, 0f00000000; - ex2.approx.f32 %f2338, %f2337; - mul.f32 %f2339, %f2336, %f2338; - setp.lt.f32 %p276, %f2330, 0fC2D20000; - selp.f32 %f2340, 0f00000000, %f2339, %p276; - setp.gt.f32 %p277, %f2330, 0f42D20000; - selp.f32 %f3137, 0f7F800000, %f2340, %p277; - setp.eq.f32 %p278, %f3137, 0f7F800000; - @%p278 bra BB5_171; - - fma.rn.f32 %f3137, %f3137, %f470, %f3137; - -BB5_171: - setp.eq.f32 %p358, %f341, 0f00000000; - setp.geu.f32 %p357, %f341, 0f00000000; - mov.b32 %r253, %f3137; - xor.b32 %r254, %r253, -2147483648; - mov.b32 %f2341, %r254; - selp.f32 %f474, %f2341, %f3137, %p8; - selp.f32 %f3138, %f348, %f474, %p358; - @%p357 bra BB5_173; - - cvt.rzi.f32.f32 %f2343, %f1730; - setp.neu.f32 %p280, %f2343, 0f40000000; - selp.f32 %f3138, 0f7FFFFFFF, %f474, %p280; - -BB5_173: - abs.f32 %f2911, %f341; - cvt.rn.f32.s32 %f2910, %r318; - sub.f32 %f2909, %f2910, %f3102; - add.f32 %f2908, %f2909, 0f3F800000; - setp.eq.f32 %p362, %f341, 0f3F800000; - add.f32 %f2907, %f2911, 0f40000000; - mov.b32 %r291, %f2907; - setp.gt.s32 %p361, %r291, 2139095039; - setp.neu.f32 %p360, %f2911, 0f7F800000; - setp.gtu.f32 %p359, %f2911, 0f7F800000; - selp.f32 %f2906, 0fFF800000, 0f7F800000, %p8; - add.f32 %f2905, %f341, 0f40000000; - mov.f32 %f2904, 0f00000000; - mov.f32 %f2903, 0f3DAAAABD; - mov.f32 %f2902, 0f3C4CAF63; - mov.f32 %f2901, 0f3B18F0FE; - mov.f32 %f2900, 0fB5BFBE8E; - mov.f32 %f2899, 0fBF317200; - selp.f32 %f2346, %f2905, %f3138, %p359; - selp.f32 %f2347, %f2346, %f2906, %p360; - selp.f32 %f2348, %f2347, %f3138, %p361; - mul.f32 %f2349, %f2348, 0fBF000000; - selp.f32 %f2350, 0fBF000000, %f2349, %p362; - mul.f32 %f2351, %f2350, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2352, %f2351; - fma.rn.f32 %f2354, %f2352, %f2899, %f2350; - fma.rn.f32 %f2356, %f2352, %f2900, %f2354; - mul.f32 %f2357, %f2356, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2358, %f2357; - add.f32 %f2359, %f2352, 0f00000000; - ex2.approx.f32 %f2360, %f2359; - mul.f32 %f2361, %f2358, %f2360; - setp.lt.f32 %p285, %f2350, 0fC2D20000; - selp.f32 %f2362, 0f00000000, %f2361, %p285; - setp.gt.f32 %p286, %f2350, 0f42D20000; - selp.f32 %f2363, 0f7F800000, %f2362, %p286; - mul.f32 %f2364, %f2909, %f2363; - mul.f32 %f2365, %f2908, %f469; - sub.f32 %f2366, %f2365, %f2364; - mul.f32 %f2367, %f319, %f2366; - mul.f32 %f478, %f402, %f2367; - // inline asm - rcp.approx.ftz.f32 %f2344,%f2015; - // inline asm - mul.f32 %f2368, %f2344, %f427; - mul.f32 %f2369, %f2368, %f2368; - fma.rn.f32 %f2372, %f2901, %f2369, %f2902; - fma.rn.f32 %f2374, %f2372, %f2369, %f2903; - mul.rn.f32 %f2375, %f2374, %f2369; - mul.rn.f32 %f2376, %f2375, %f2368; - sub.f32 %f2377, %f425, %f2368; - neg.f32 %f2378, %f2368; - add.f32 %f2379, %f2377, %f2377; - fma.rn.f32 %f2380, %f2378, %f425, %f2379; - mul.rn.f32 %f2381, %f2344, %f2380; - add.f32 %f2382, %f2376, %f2368; - sub.f32 %f2383, %f2368, %f2382; - add.f32 %f2384, %f2376, %f2383; - add.f32 %f2385, %f2381, %f2384; - add.f32 %f2386, %f2382, %f2385; - sub.f32 %f2387, %f2382, %f2386; - add.f32 %f2388, %f2385, %f2387; - add.f32 %f2389, %f428, %f2386; - sub.f32 %f2390, %f428, %f2389; - add.f32 %f2391, %f2386, %f2390; - add.f32 %f2392, %f2388, %f2391; - add.f32 %f2393, %f429, %f2392; - add.f32 %f2394, %f2389, %f2393; - sub.f32 %f2395, %f2389, %f2394; - add.f32 %f2396, %f2393, %f2395; - mul.rn.f32 %f2398, %f1730, %f2394; - neg.f32 %f2399, %f2398; - fma.rn.f32 %f2400, %f1730, %f2394, %f2399; - fma.rn.f32 %f2401, %f1730, %f2396, %f2400; - fma.rn.f32 %f2403, %f2904, %f2394, %f2401; - add.rn.f32 %f2404, %f2398, %f2403; - neg.f32 %f2405, %f2404; - add.rn.f32 %f2406, %f2398, %f2405; - add.rn.f32 %f2407, %f2406, %f2403; - mov.b32 %r255, %f2404; - setp.eq.s32 %p287, %r255, 1118925336; - add.s32 %r256, %r255, -1; - mov.b32 %f2408, %r256; - add.f32 %f2409, %f2407, 0f37000000; - selp.f32 %f2410, %f2408, %f2404, %p287; - selp.f32 %f479, %f2409, %f2407, %p287; - mul.f32 %f2411, %f2410, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2412, %f2411; - fma.rn.f32 %f2413, %f2412, %f2899, %f2410; - fma.rn.f32 %f2414, %f2412, %f2900, %f2413; - mul.f32 %f2415, %f2414, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2416, %f2415; - add.f32 %f2417, %f2412, 0f00000000; - ex2.approx.f32 %f2418, %f2417; - mul.f32 %f2419, %f2416, %f2418; - setp.lt.f32 %p288, %f2410, 0fC2D20000; - selp.f32 %f2420, 0f00000000, %f2419, %p288; - setp.gt.f32 %p289, %f2410, 0f42D20000; - selp.f32 %f3139, 0f7F800000, %f2420, %p289; - setp.eq.f32 %p290, %f3139, 0f7F800000; - @%p290 bra BB5_175; - - fma.rn.f32 %f3139, %f3139, %f479, %f3139; - -BB5_175: - setp.eq.f32 %p363, %f423, 0f00000000; - mov.b32 %r257, %f3139; - xor.b32 %r258, %r257, -2147483648; - mov.b32 %f2421, %r258; - selp.f32 %f3141, %f2421, %f3139, %p11; - @%p363 bra BB5_178; - bra.uni BB5_176; - -BB5_178: - add.f32 %f2424, %f423, %f423; - selp.f32 %f3141, %f2424, 0f00000000, %p188; - bra.uni BB5_179; - -BB5_176: - setp.geu.f32 %p292, %f423, 0f00000000; - @%p292 bra BB5_179; - - cvt.rzi.f32.f32 %f2423, %f1730; - setp.neu.f32 %p293, %f2423, 0f40000000; - selp.f32 %f3141, 0f7FFFFFFF, %f3141, %p293; - -BB5_179: - abs.f32 %f2913, %f423; - add.f32 %f2912, %f2913, 0f40000000; - mov.b32 %r292, %f2912; - setp.lt.s32 %p364, %r292, 2139095040; - @%p364 bra BB5_184; - - abs.f32 %f2981, %f423; - setp.gtu.f32 %p296, %f2981, 0f7F800000; - @%p296 bra BB5_183; - bra.uni BB5_181; - -BB5_183: - add.f32 %f3141, %f423, 0f40000000; - bra.uni BB5_184; - -BB5_181: - abs.f32 %f2982, %f423; - setp.neu.f32 %p297, %f2982, 0f7F800000; - @%p297 bra BB5_184; - - selp.f32 %f3141, 0fFF800000, 0f7F800000, %p11; - -BB5_184: - setp.eq.f32 %p365, %f423, 0f3F800000; - mov.f32 %f2919, 0f00000000; - mov.f32 %f2918, 0f3DAAAABD; - mov.f32 %f2917, 0f3C4CAF63; - mov.f32 %f2916, 0f3B18F0FE; - mov.f32 %f2915, 0fB5BFBE8E; - mov.f32 %f2914, 0fBF317200; - mul.f32 %f2427, %f3141, 0fBF000000; - selp.f32 %f2428, 0fBF000000, %f2427, %p365; - mul.f32 %f2429, %f2428, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2430, %f2429; - fma.rn.f32 %f2432, %f2430, %f2914, %f2428; - fma.rn.f32 %f2434, %f2430, %f2915, %f2432; - mul.f32 %f2435, %f2434, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2436, %f2435; - add.f32 %f2437, %f2430, 0f00000000; - ex2.approx.f32 %f2438, %f2437; - mul.f32 %f2439, %f2436, %f2438; - setp.lt.f32 %p299, %f2428, 0fC2D20000; - selp.f32 %f2440, 0f00000000, %f2439, %p299; - setp.gt.f32 %p300, %f2428, 0f42D20000; - selp.f32 %f490, 0f7F800000, %f2440, %p300; - // inline asm - rcp.approx.ftz.f32 %f2425,%f2109; - // inline asm - mul.f32 %f2441, %f2425, %f446; - mul.f32 %f2442, %f2441, %f2441; - fma.rn.f32 %f2445, %f2916, %f2442, %f2917; - fma.rn.f32 %f2447, %f2445, %f2442, %f2918; - mul.rn.f32 %f2448, %f2447, %f2442; - mul.rn.f32 %f2449, %f2448, %f2441; - sub.f32 %f2450, %f444, %f2441; - neg.f32 %f2451, %f2441; - add.f32 %f2452, %f2450, %f2450; - fma.rn.f32 %f2453, %f2451, %f444, %f2452; - mul.rn.f32 %f2454, %f2425, %f2453; - add.f32 %f2455, %f2449, %f2441; - sub.f32 %f2456, %f2441, %f2455; - add.f32 %f2457, %f2449, %f2456; - add.f32 %f2458, %f2454, %f2457; - add.f32 %f2459, %f2455, %f2458; - sub.f32 %f2460, %f2455, %f2459; - add.f32 %f2461, %f2458, %f2460; - add.f32 %f2462, %f447, %f2459; - sub.f32 %f2463, %f447, %f2462; - add.f32 %f2464, %f2459, %f2463; - add.f32 %f2465, %f2461, %f2464; - add.f32 %f2466, %f448, %f2465; - add.f32 %f2467, %f2462, %f2466; - sub.f32 %f2468, %f2462, %f2467; - add.f32 %f2469, %f2466, %f2468; - mul.rn.f32 %f2471, %f1730, %f2467; - neg.f32 %f2472, %f2471; - fma.rn.f32 %f2473, %f1730, %f2467, %f2472; - fma.rn.f32 %f2474, %f1730, %f2469, %f2473; - fma.rn.f32 %f2476, %f2919, %f2467, %f2474; - add.rn.f32 %f2477, %f2471, %f2476; - neg.f32 %f2478, %f2477; - add.rn.f32 %f2479, %f2471, %f2478; - add.rn.f32 %f2480, %f2479, %f2476; - mov.b32 %r259, %f2477; - setp.eq.s32 %p301, %r259, 1118925336; - add.s32 %r260, %r259, -1; - mov.b32 %f2481, %r260; - add.f32 %f2482, %f2480, 0f37000000; - selp.f32 %f2483, %f2481, %f2477, %p301; - selp.f32 %f491, %f2482, %f2480, %p301; - mul.f32 %f2484, %f2483, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2485, %f2484; - fma.rn.f32 %f2486, %f2485, %f2914, %f2483; - fma.rn.f32 %f2487, %f2485, %f2915, %f2486; - mul.f32 %f2488, %f2487, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2489, %f2488; - add.f32 %f2490, %f2485, 0f00000000; - ex2.approx.f32 %f2491, %f2490; - mul.f32 %f2492, %f2489, %f2491; - setp.lt.f32 %p302, %f2483, 0fC2D20000; - selp.f32 %f2493, 0f00000000, %f2492, %p302; - setp.gt.f32 %p303, %f2483, 0f42D20000; - selp.f32 %f3142, 0f7F800000, %f2493, %p303; - setp.eq.f32 %p304, %f3142, 0f7F800000; - @%p304 bra BB5_186; - - fma.rn.f32 %f3142, %f3142, %f491, %f3142; - -BB5_186: - setp.eq.f32 %p368, %f442, 0f00000000; - mov.b32 %r261, %f3142; - xor.b32 %r262, %r261, -2147483648; - mov.b32 %f2494, %r262; - selp.f32 %f3144, %f2494, %f3142, %p12; - @%p368 bra BB5_189; - bra.uni BB5_187; - -BB5_189: - add.f32 %f2497, %f442, %f442; - selp.f32 %f3144, %f2497, 0f00000000, %p188; - bra.uni BB5_190; - -BB5_187: - setp.geu.f32 %p306, %f442, 0f00000000; - @%p306 bra BB5_190; - - cvt.rzi.f32.f32 %f2496, %f1730; - setp.neu.f32 %p307, %f2496, 0f40000000; - selp.f32 %f3144, 0f7FFFFFFF, %f3144, %p307; - -BB5_190: - abs.f32 %f2986, %f442; - add.f32 %f2985, %f2986, 0f40000000; - mov.b32 %r301, %f2985; - setp.lt.s32 %p369, %r301, 2139095040; - @%p369 bra BB5_195; - - abs.f32 %f2979, %f442; - setp.gtu.f32 %p310, %f2979, 0f7F800000; - @%p310 bra BB5_194; - bra.uni BB5_192; - -BB5_194: - add.f32 %f3144, %f442, 0f40000000; - bra.uni BB5_195; - -BB5_192: - abs.f32 %f2980, %f442; - setp.neu.f32 %p311, %f2980, 0f7F800000; - @%p311 bra BB5_195; - - selp.f32 %f3144, 0fFF800000, 0f7F800000, %p12; - -BB5_195: - setp.eq.f32 %p370, %f442, 0f3F800000; - cvt.rn.f32.s32 %f2924, %r319; - sub.f32 %f2923, %f2924, %f3101; - add.f32 %f2922, %f2923, 0f3F800000; - mov.f32 %f2921, 0fB5BFBE8E; - mov.f32 %f2920, 0fBF317200; - mul.f32 %f2498, %f3144, 0fBF000000; - selp.f32 %f2499, 0fBF000000, %f2498, %p370; - mul.f32 %f2500, %f2499, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2501, %f2500; - fma.rn.f32 %f2503, %f2501, %f2920, %f2499; - fma.rn.f32 %f2505, %f2501, %f2921, %f2503; - mul.f32 %f2506, %f2505, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2507, %f2506; - add.f32 %f2508, %f2501, 0f00000000; - ex2.approx.f32 %f2509, %f2508; - mul.f32 %f2510, %f2507, %f2509; - setp.lt.f32 %p313, %f2499, 0fC2D20000; - selp.f32 %f2511, 0f00000000, %f2510, %p313; - setp.gt.f32 %p314, %f2499, 0f42D20000; - selp.f32 %f2512, 0f7F800000, %f2511, %p314; - mul.f32 %f2513, %f2923, %f2512; - mul.f32 %f2514, %f2922, %f490; - sub.f32 %f2515, %f2514, %f2513; - mul.f32 %f2516, %f319, %f2515; - fma.rn.f32 %f2517, %f388, %f2516, %f478; - mul.f32 %f2518, %f422, %f422; - add.f32 %f502, %f3097, %f403; - div.rn.f32 %f2519, %f2518, %f502; - add.f32 %f3119, %f2519, %f3119; - mul.f32 %f2520, %f460, %f422; - div.rn.f32 %f2521, %f2520, %f502; - add.f32 %f3118, %f2521, %f3118; - mul.f32 %f2522, %f388, %f402; - mul.f32 %f2523, %f2522, %f422; - div.rn.f32 %f2524, %f2523, %f502; - add.f32 %f3117, %f2524, %f3117; - div.rn.f32 %f2525, %f422, %f502; - add.f32 %f3116, %f2525, %f3116; - mul.f32 %f2526, %f2517, %f422; - div.rn.f32 %f2527, %f2526, %f502; - add.f32 %f3115, %f2527, %f3115; - mul.f32 %f2528, %f460, %f460; - div.rn.f32 %f2529, %f2528, %f502; - add.f32 %f3114, %f2529, %f3114; - mul.f32 %f2530, %f2522, %f460; - div.rn.f32 %f2531, %f2530, %f502; - add.f32 %f3113, %f2531, %f3113; - div.rn.f32 %f2532, %f460, %f502; - add.f32 %f3112, %f2532, %f3112; - mul.f32 %f2533, %f2517, %f460; - div.rn.f32 %f2534, %f2533, %f502; - add.f32 %f3111, %f2534, %f3111; - mul.f32 %f2535, %f2522, %f2522; - div.rn.f32 %f2536, %f2535, %f502; - add.f32 %f3110, %f2536, %f3110; - div.rn.f32 %f2537, %f2522, %f502; - add.f32 %f3109, %f2537, %f3109; - mul.f32 %f2538, %f2517, %f2522; - div.rn.f32 %f2539, %f2538, %f502; - add.f32 %f3108, %f2539, %f3108; - rcp.rn.f32 %f2540, %f502; - add.f32 %f3107, %f2540, %f3107; - div.rn.f32 %f2541, %f2517, %f502; - add.f32 %f3106, %f2541, %f3106; - mul.f32 %f2542, %f2517, %f2517; - div.rn.f32 %f2543, %f2542, %f502; - add.f32 %f3105, %f2543, %f3105; - setp.leu.f32 %p315, %f502, 0f00000000; - @%p315 bra BB5_204; - - add.f32 %f518, %f3097, %f404; - setp.gt.f32 %p316, %f518, 0f00000000; - @%p316 bra BB5_198; - bra.uni BB5_197; - -BB5_198: - setp.lt.f32 %p317, %f502, 0f00800000; - mul.f32 %f2545, %f502, 0f4B000000; - selp.f32 %f520, %f2545, %f502, %p317; - selp.f32 %f2546, 0fC1B80000, 0f00000000, %p317; - mov.b32 %r263, %f520; - add.s32 %r264, %r263, -1059760811; - and.b32 %r265, %r264, -8388608; - sub.s32 %r266, %r263, %r265; - mov.b32 %f2547, %r266; - cvt.rn.f32.s32 %f2548, %r265; - mov.f32 %f2549, 0f34000000; - fma.rn.f32 %f2550, %f2548, %f2549, %f2546; - add.f32 %f2551, %f2547, 0fBF800000; - mov.f32 %f2552, 0f3E1039F6; - mov.f32 %f2553, 0fBE055027; - fma.rn.f32 %f2554, %f2553, %f2551, %f2552; - mov.f32 %f2555, 0fBDF8CDCC; - fma.rn.f32 %f2556, %f2554, %f2551, %f2555; - mov.f32 %f2557, 0f3E0F2955; - fma.rn.f32 %f2558, %f2556, %f2551, %f2557; - mov.f32 %f2559, 0fBE2AD8B9; - fma.rn.f32 %f2560, %f2558, %f2551, %f2559; - mov.f32 %f2561, 0f3E4CED0B; - fma.rn.f32 %f2562, %f2560, %f2551, %f2561; - mov.f32 %f2563, 0fBE7FFF22; - fma.rn.f32 %f2564, %f2562, %f2551, %f2563; - mov.f32 %f2565, 0f3EAAAA78; - fma.rn.f32 %f2566, %f2564, %f2551, %f2565; - mov.f32 %f2567, 0fBF000000; - fma.rn.f32 %f2568, %f2566, %f2551, %f2567; - mul.f32 %f2569, %f2551, %f2568; - fma.rn.f32 %f2570, %f2569, %f2551, %f2551; - mov.f32 %f2571, 0f3F317218; - fma.rn.f32 %f3145, %f2550, %f2571, %f2570; - setp.lt.u32 %p318, %r263, 2139095040; - @%p318 bra BB5_200; - - mov.f32 %f2572, 0f7F800000; - fma.rn.f32 %f3145, %f520, %f2572, %f2572; - -BB5_200: - setp.eq.f32 %p319, %f520, 0f00000000; - selp.f32 %f2573, 0fFF800000, %f3145, %p319; - mul.f32 %f2574, %f518, %f2573; - sub.f32 %f524, %f2574, %f403; - mul.f32 %f2575, %f518, 0f4B000000; - setp.lt.f32 %p320, %f518, 0f00800000; - selp.f32 %f525, %f2575, %f518, %p320; - selp.f32 %f2576, 0fC1B80000, 0f00000000, %p320; - mov.b32 %r267, %f525; - add.s32 %r268, %r267, -1059760811; - and.b32 %r269, %r268, -8388608; - sub.s32 %r270, %r267, %r269; - mov.b32 %f2577, %r270; - cvt.rn.f32.s32 %f2578, %r269; - fma.rn.f32 %f2580, %f2578, %f2549, %f2576; - add.f32 %f2581, %f2577, 0fBF800000; - fma.rn.f32 %f2584, %f2553, %f2581, %f2552; - fma.rn.f32 %f2586, %f2584, %f2581, %f2555; - fma.rn.f32 %f2588, %f2586, %f2581, %f2557; - fma.rn.f32 %f2590, %f2588, %f2581, %f2559; - fma.rn.f32 %f2592, %f2590, %f2581, %f2561; - fma.rn.f32 %f2594, %f2592, %f2581, %f2563; - fma.rn.f32 %f2596, %f2594, %f2581, %f2565; - fma.rn.f32 %f2598, %f2596, %f2581, %f2567; - mul.f32 %f2599, %f2581, %f2598; - fma.rn.f32 %f2600, %f2599, %f2581, %f2581; - fma.rn.f32 %f3146, %f2580, %f2571, %f2600; - setp.lt.u32 %p321, %r267, 2139095040; - @%p321 bra BB5_202; - - mov.f32 %f2602, 0f7F800000; - fma.rn.f32 %f3146, %f525, %f2602, %f2602; - -BB5_202: - setp.eq.f32 %p322, %f525, 0f00000000; - selp.f32 %f2603, 0fFF800000, %f3146, %p322; - mul.f32 %f2604, %f518, %f2603; - sub.f32 %f2605, %f524, %f2604; - add.f32 %f3147, %f404, %f2605; - bra.uni BB5_203; - -BB5_197: - neg.f32 %f2544, %f403; - sub.f32 %f3147, %f2544, %f3097; - -BB5_203: - add.f32 %f3120, %f3120, %f3147; - -BB5_204: - add.s32 %r319, %r319, 1; - setp.lt.s32 %p323, %r319, %r63; - @%p323 bra BB5_123; - - st.local.f32 [%rd2], %f3119; - st.local.f32 [%rd2+4], %f3118; - st.local.f32 [%rd2+20], %f3118; - st.local.f32 [%rd2+8], %f3117; - st.local.f32 [%rd2+40], %f3117; - st.local.f32 [%rd2+12], %f3116; - st.local.f32 [%rd2+60], %f3116; - st.local.f32 [%rd2+16], %f3115; - st.local.f32 [%rd2+80], %f3115; - st.local.f32 [%rd2+24], %f3114; - st.local.f32 [%rd2+28], %f3113; - st.local.f32 [%rd2+44], %f3113; - st.local.f32 [%rd2+32], %f3112; - st.local.f32 [%rd2+64], %f3112; - st.local.f32 [%rd2+36], %f3111; - st.local.f32 [%rd2+84], %f3111; - st.local.f32 [%rd2+48], %f3110; - st.local.f32 [%rd2+52], %f3109; - st.local.f32 [%rd2+68], %f3109; - st.local.f32 [%rd2+56], %f3108; - st.local.f32 [%rd2+88], %f3108; - st.local.f32 [%rd2+72], %f3107; - st.local.f32 [%rd2+76], %f3106; - st.local.f32 [%rd2+92], %f3106; - st.local.f32 [%rd2+96], %f3105; - add.s32 %r318, %r318, 1; - setp.lt.s32 %p324, %r318, %r63; - @%p324 bra BB5_122; - -BB5_206: - mov.f32 %f3150, 0f00000000; - ld.local.f32 %f2607, [%rd2]; - rcp.rn.f32 %f534, %f2607; - ld.local.f32 %f2608, [%rd2+4]; - mul.f32 %f535, %f534, %f2608; - ld.local.f32 %f2609, [%rd2+8]; - ld.local.f32 %f2610, [%rd2+12]; - ld.local.f32 %f2611, [%rd2+16]; - ld.local.f32 %f2612, [%rd2+20]; - ld.local.f32 %f2613, [%rd2+24]; - ld.local.f32 %f2614, [%rd2+28]; - ld.local.f32 %f2615, [%rd2+32]; - ld.local.f32 %f2616, [%rd2+36]; - ld.local.f32 %f2617, [%rd2+40]; - ld.local.f32 %f2618, [%rd2+44]; - st.local.f32 [%rd2+4], %f535; - mul.f32 %f536, %f534, %f2609; - st.local.f32 [%rd2+8], %f536; - mul.f32 %f537, %f534, %f2610; - st.local.f32 [%rd2+12], %f537; - mul.f32 %f538, %f534, %f2611; - st.local.f32 [%rd2+16], %f538; - ld.local.f32 %f2619, [%rd2+4]; - fma.rn.f32 %f2620, %f2619, %f2612, 0f00000000; - sub.f32 %f2621, %f2613, %f2620; - ld.local.f32 %f539, [%rd2+20]; - st.local.f32 [%rd2+24], %f2621; - fma.rn.f32 %f2622, %f536, %f539, 0f00000000; - rcp.rn.f32 %f540, %f2621; - sub.f32 %f2623, %f2614, %f2622; - mul.f32 %f541, %f540, %f2623; - st.local.f32 [%rd2+28], %f541; - fma.rn.f32 %f2624, %f537, %f539, 0f00000000; - sub.f32 %f2625, %f2615, %f2624; - mul.f32 %f542, %f540, %f2625; - st.local.f32 [%rd2+32], %f542; - fma.rn.f32 %f2626, %f538, %f539, 0f00000000; - sub.f32 %f2627, %f2616, %f2626; - mul.f32 %f543, %f540, %f2627; - st.local.f32 [%rd2+36], %f543; - ld.local.f32 %f2628, [%rd2+4]; - fma.rn.f32 %f2629, %f2628, %f2617, 0f00000000; - sub.f32 %f544, %f2618, %f2629; - st.local.f32 [%rd2+44], %f544; - add.s64 %rd104, %rd2, 40; - add.s64 %rd103, %rd2, 8; - mov.u32 %r320, -1; - -BB5_207: - ld.local.f32 %f2630, [%rd104]; - ld.local.f32 %f2631, [%rd103]; - fma.rn.f32 %f3150, %f2631, %f2630, %f3150; - add.s64 %rd104, %rd104, 4; - add.s64 %rd103, %rd103, 20; - add.s32 %r320, %r320, 1; - setp.lt.s32 %p325, %r320, 1; - @%p325 bra BB5_207; - - ld.local.f32 %f2633, [%rd2+48]; - sub.f32 %f2634, %f2633, %f3150; - ld.local.f32 %f547, [%rd2+40]; - ld.local.f32 %f2635, [%rd2+52]; - ld.local.f32 %f2636, [%rd2+56]; - ld.local.f32 %f2637, [%rd2+60]; - ld.local.f32 %f2638, [%rd2+4]; - ld.local.f32 %f2639, [%rd2+64]; - st.local.f32 [%rd2+48], %f2634; - fma.rn.f32 %f2640, %f537, %f547, 0f00000000; - fma.rn.f32 %f2641, %f542, %f544, %f2640; - rcp.rn.f32 %f548, %f2634; - sub.f32 %f2642, %f2635, %f2641; - mul.f32 %f549, %f548, %f2642; - st.local.f32 [%rd2+52], %f549; - fma.rn.f32 %f2643, %f538, %f547, 0f00000000; - fma.rn.f32 %f2644, %f543, %f544, %f2643; - sub.f32 %f2645, %f2636, %f2644; - mul.f32 %f550, %f548, %f2645; - st.local.f32 [%rd2+56], %f550; - fma.rn.f32 %f2646, %f2638, %f2637, 0f00000000; - sub.f32 %f551, %f2639, %f2646; - st.local.f32 [%rd2+64], %f551; - add.s64 %rd106, %rd2, 60; - add.s64 %rd105, %rd2, 8; - mov.f32 %f3151, 0f00000000; - mov.u32 %r321, -1; - -BB5_209: - ld.local.f32 %f2647, [%rd106]; - ld.local.f32 %f2648, [%rd105]; - fma.rn.f32 %f3151, %f2648, %f2647, %f3151; - add.s64 %rd106, %rd106, 4; - add.s64 %rd105, %rd105, 20; - add.s32 %r321, %r321, 1; - setp.lt.s32 %p326, %r321, 1; - @%p326 bra BB5_209; - - ld.local.f32 %f2650, [%rd2+68]; - sub.f32 %f554, %f2650, %f3151; - st.local.f32 [%rd2+68], %f554; - add.s64 %rd108, %rd2, 60; - add.s64 %rd107, %rd2, 12; - mov.f32 %f3152, 0f00000000; - mov.u32 %r322, -1; - -BB5_211: - ld.local.f32 %f2651, [%rd108]; - ld.local.f32 %f2652, [%rd107]; - fma.rn.f32 %f3152, %f2652, %f2651, %f3152; - add.s64 %rd108, %rd108, 4; - add.s64 %rd107, %rd107, 20; - add.s32 %r322, %r322, 1; - setp.lt.s32 %p327, %r322, 2; - @%p327 bra BB5_211; - - ld.local.f32 %f2654, [%rd2+72]; - sub.f32 %f2655, %f2654, %f3152; - ld.local.f32 %f557, [%rd2+60]; - ld.local.f32 %f2656, [%rd2+76]; - ld.local.f32 %f2657, [%rd2+80]; - ld.local.f32 %f2658, [%rd2+4]; - ld.local.f32 %f2659, [%rd2+84]; - st.local.f32 [%rd2+72], %f2655; - fma.rn.f32 %f2660, %f538, %f557, 0f00000000; - fma.rn.f32 %f2661, %f543, %f551, %f2660; - fma.rn.f32 %f2662, %f550, %f554, %f2661; - rcp.rn.f32 %f558, %f2655; - sub.f32 %f2663, %f2656, %f2662; - mul.f32 %f559, %f558, %f2663; - st.local.f32 [%rd2+76], %f559; - fma.rn.f32 %f2664, %f2658, %f2657, 0f00000000; - sub.f32 %f560, %f2659, %f2664; - st.local.f32 [%rd2+84], %f560; - add.s64 %rd110, %rd2, 80; - add.s64 %rd109, %rd2, 8; - mov.f32 %f3153, 0f00000000; - mov.u32 %r323, -1; - -BB5_213: - ld.local.f32 %f2665, [%rd110]; - ld.local.f32 %f2666, [%rd109]; - fma.rn.f32 %f3153, %f2666, %f2665, %f3153; - add.s64 %rd110, %rd110, 4; - add.s64 %rd109, %rd109, 20; - add.s32 %r323, %r323, 1; - setp.lt.s32 %p328, %r323, 1; - @%p328 bra BB5_213; - - ld.local.f32 %f2668, [%rd2+88]; - sub.f32 %f563, %f2668, %f3153; - st.local.f32 [%rd2+88], %f563; - add.s64 %rd112, %rd2, 80; - add.s64 %rd111, %rd2, 12; - mov.f32 %f3154, 0f00000000; - mov.u32 %r324, -1; - -BB5_215: - ld.local.f32 %f2669, [%rd112]; - ld.local.f32 %f2670, [%rd111]; - fma.rn.f32 %f3154, %f2670, %f2669, %f3154; - add.s64 %rd112, %rd112, 4; - add.s64 %rd111, %rd111, 20; - add.s32 %r324, %r324, 1; - setp.lt.s32 %p329, %r324, 2; - @%p329 bra BB5_215; - - ld.local.f32 %f2672, [%rd2+92]; - sub.f32 %f566, %f2672, %f3154; - st.local.f32 [%rd2+92], %f566; - add.s64 %rd114, %rd2, 80; - add.s64 %rd113, %rd2, 16; - mov.f32 %f3155, 0f00000000; - mov.u32 %r325, -1; - -BB5_217: - ld.local.f32 %f2673, [%rd114]; - ld.local.f32 %f2674, [%rd113]; - fma.rn.f32 %f3155, %f2674, %f2673, %f3155; - add.s64 %rd114, %rd114, 4; - add.s64 %rd113, %rd113, 20; - add.s32 %r325, %r325, 1; - setp.lt.s32 %p330, %r325, 3; - @%p330 bra BB5_217; - - mov.u32 %r296, %tid.x; - mov.u32 %r295, %ctaid.x; - mov.u32 %r294, %ntid.x; - mad.lo.s32 %r293, %r294, %r295, %r296; - ld.param.u64 %rd101, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_9]; - ld.param.u64 %rd100, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_8]; - ld.param.u32 %r285, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_10]; - ld.param.u64 %rd99, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_7]; - ld.local.f32 %f2675, [%rd2+96]; - sub.f32 %f2676, %f2675, %f3155; - ld.local.f32 %f2677, [%rd2+80]; - st.local.f32 [%rd2+96], %f2676; - add.f32 %f2678, %f535, 0f00000000; - mov.f32 %f2679, 0f00000000; - sub.f32 %f2680, %f2679, %f2678; - add.f32 %f2681, %f536, 0f00000000; - fma.rn.f32 %f2682, %f541, %f2680, %f2681; - sub.f32 %f2683, %f2679, %f2682; - add.f32 %f2684, %f537, 0f00000000; - fma.rn.f32 %f2685, %f542, %f2680, %f2684; - fma.rn.f32 %f2686, %f549, %f2683, %f2685; - sub.f32 %f2687, %f2679, %f2686; - add.f32 %f2688, %f538, 0f00000000; - fma.rn.f32 %f2689, %f543, %f2680, %f2688; - fma.rn.f32 %f2690, %f550, %f2683, %f2689; - fma.rn.f32 %f2691, %f559, %f2687, %f2690; - sub.f32 %f2692, %f2679, %f2691; - div.rn.f32 %f2693, %f2692, %f2676; - fma.rn.f32 %f2694, %f566, %f2693, 0f00000000; - sub.f32 %f2695, %f2687, %f2694; - mul.f32 %f2696, %f558, %f2695; - fma.rn.f32 %f2697, %f554, %f2696, 0f00000000; - fma.rn.f32 %f2698, %f563, %f2693, %f2697; - sub.f32 %f2699, %f2683, %f2698; - mul.f32 %f2700, %f548, %f2699; - fma.rn.f32 %f2701, %f544, %f2700, 0f00000000; - fma.rn.f32 %f2702, %f551, %f2696, %f2701; - fma.rn.f32 %f2703, %f560, %f2693, %f2702; - sub.f32 %f2704, %f2680, %f2703; - mul.f32 %f2705, %f540, %f2704; - fma.rn.f32 %f2706, %f539, %f2705, 0f00000000; - fma.rn.f32 %f2707, %f547, %f2700, %f2706; - fma.rn.f32 %f2708, %f557, %f2696, %f2707; - fma.rn.f32 %f2709, %f2677, %f2693, %f2708; - mov.f32 %f2710, 0f3F800000; - sub.f32 %f2711, %f2710, %f2709; - mul.f32 %f2712, %f534, %f2711; - fma.rn.f32 %f2713, %f535, 0f00000000, 0f00000000; - sub.f32 %f2714, %f2710, %f2713; - fma.rn.f32 %f2715, %f536, 0f00000000, 0f00000000; - fma.rn.f32 %f2716, %f541, %f2714, %f2715; - sub.f32 %f2717, %f2679, %f2716; - fma.rn.f32 %f2718, %f537, 0f00000000, 0f00000000; - fma.rn.f32 %f2719, %f542, %f2714, %f2718; - fma.rn.f32 %f2720, %f549, %f2717, %f2719; - sub.f32 %f2721, %f2679, %f2720; - fma.rn.f32 %f2722, %f538, 0f00000000, 0f00000000; - fma.rn.f32 %f2723, %f543, %f2714, %f2722; - fma.rn.f32 %f2724, %f550, %f2717, %f2723; - fma.rn.f32 %f2725, %f559, %f2721, %f2724; - sub.f32 %f2726, %f2679, %f2725; - div.rn.f32 %f2727, %f2726, %f2676; - fma.rn.f32 %f2728, %f566, %f2727, 0f00000000; - sub.f32 %f2729, %f2721, %f2728; - mul.f32 %f2730, %f558, %f2729; - fma.rn.f32 %f2731, %f554, %f2730, 0f00000000; - fma.rn.f32 %f2732, %f563, %f2727, %f2731; - sub.f32 %f2733, %f2717, %f2732; - mul.f32 %f2734, %f548, %f2733; - fma.rn.f32 %f2735, %f544, %f2734, 0f00000000; - fma.rn.f32 %f2736, %f551, %f2730, %f2735; - fma.rn.f32 %f2737, %f560, %f2727, %f2736; - sub.f32 %f2738, %f2714, %f2737; - mul.f32 %f2739, %f540, %f2738; - sub.f32 %f2740, %f2679, %f2713; - fma.rn.f32 %f2741, %f541, %f2740, %f2715; - sub.f32 %f2742, %f2710, %f2741; - fma.rn.f32 %f2743, %f542, %f2740, %f2718; - fma.rn.f32 %f2744, %f549, %f2742, %f2743; - sub.f32 %f2745, %f2679, %f2744; - fma.rn.f32 %f2746, %f543, %f2740, %f2722; - fma.rn.f32 %f2747, %f550, %f2742, %f2746; - fma.rn.f32 %f2748, %f559, %f2745, %f2747; - sub.f32 %f2749, %f2679, %f2748; - div.rn.f32 %f2750, %f2749, %f2676; - fma.rn.f32 %f2751, %f566, %f2750, 0f00000000; - sub.f32 %f2752, %f2745, %f2751; - mul.f32 %f2753, %f558, %f2752; - fma.rn.f32 %f2754, %f554, %f2753, 0f00000000; - fma.rn.f32 %f2755, %f563, %f2750, %f2754; - sub.f32 %f2756, %f2742, %f2755; - mul.f32 %f2757, %f548, %f2756; - sub.f32 %f2758, %f2679, %f2741; - fma.rn.f32 %f2759, %f549, %f2758, %f2743; - sub.f32 %f2760, %f2710, %f2759; - fma.rn.f32 %f2761, %f550, %f2758, %f2746; - fma.rn.f32 %f2762, %f559, %f2760, %f2761; - sub.f32 %f2763, %f2679, %f2762; - div.rn.f32 %f2764, %f2763, %f2676; - fma.rn.f32 %f2765, %f566, %f2764, 0f00000000; - sub.f32 %f2766, %f2760, %f2765; - mul.f32 %f2767, %f558, %f2766; - sub.f32 %f2768, %f2679, %f2759; - fma.rn.f32 %f2769, %f559, %f2768, %f2761; - sub.f32 %f2770, %f2710, %f2769; - div.rn.f32 %f2771, %f2770, %f2676; - cvta.to.global.u64 %rd83, %rd99; - mul.wide.s32 %rd84, %r293, 4; - add.s64 %rd85, %rd83, %rd84; - st.global.f32 [%rd85], %f3102; - shl.b32 %r281, %r285, 2; - cvt.s64.s32 %rd86, %r281; - add.s64 %rd87, %rd85, %rd86; - st.global.f32 [%rd87], %f3101; - add.s64 %rd88, %rd87, %rd86; - st.global.f32 [%rd88], %f3100; - add.s64 %rd89, %rd88, %rd86; - st.global.f32 [%rd89], %f3013; - add.s64 %rd90, %rd89, %rd86; - st.global.f32 [%rd90], %f3098; - cvta.to.global.u64 %rd91, %rd100; - add.s64 %rd92, %rd91, %rd84; - st.global.f32 [%rd92], %f2712; - add.s64 %rd93, %rd92, %rd86; - st.global.f32 [%rd93], %f2739; - add.s64 %rd94, %rd93, %rd86; - st.global.f32 [%rd94], %f2757; - add.s64 %rd95, %rd94, %rd86; - st.global.f32 [%rd95], %f2767; - add.s64 %rd96, %rd95, %rd86; - st.global.f32 [%rd96], %f2771; - cvta.to.global.u64 %rd97, %rd101; - add.s64 %rd98, %rd97, %rd84; - st.global.f32 [%rd98], %f3120; - -BB5_219: + mul.f32 %f1954, %f1953, %f1948; + sub.f32 %f1955, %f1954, %f1947; + mul.f32 %f1956, %f259, %f1955; + mul.f32 %f358, %f345, %f1956; + add.f32 %f1957, %f334, 0f3F000000; + sub.f32 %f1958, %f1957, %f2889; + div.rn.f32 %f359, %f1958, %f2886; + abs.f32 %f360, %f359; + setp.lt.f32 %p576, %f360, 0f00800000; + mul.f32 %f1959, %f360, 0f4B800000; + selp.f32 %f1960, %f1959, %f360, %p576; + selp.f32 %f1961, 0fC3170000, 0fC2FE0000, %p576; + mov.b32 %r678, %f1960; + and.b32 %r679, %r678, 8388607; + or.b32 %r680, %r679, 1065353216; + mov.b32 %f1962, %r680; + shr.u32 %r681, %r678, 23; + cvt.rn.f32.u32 %f1963, %r681; + add.f32 %f1964, %f1961, %f1963; + setp.gt.f32 %p577, %f1962, 0f3FB504F3; + mul.f32 %f1965, %f1962, 0f3F000000; + add.f32 %f1966, %f1964, 0f3F800000; + selp.f32 %f1967, %f1966, %f1964, %p577; + selp.f32 %f1968, %f1965, %f1962, %p577; + add.f32 %f1969, %f1968, 0fBF800000; + add.f32 %f1970, %f1968, 0f3F800000; + rcp.approx.ftz.f32 %f1971, %f1970; + add.f32 %f1972, %f1969, %f1969; + mul.f32 %f1974, %f1972, %f1971; + mul.f32 %f1975, %f1974, %f1974; + fma.rn.f32 %f1978, %f1594, %f1975, %f1593; + fma.rn.f32 %f1980, %f1978, %f1975, %f1596; + mul.rn.f32 %f1981, %f1980, %f1975; + mul.rn.f32 %f1982, %f1981, %f1974; + sub.f32 %f1983, %f1969, %f1974; + add.f32 %f1984, %f1983, %f1983; + neg.f32 %f1985, %f1974; + fma.rn.f32 %f1986, %f1985, %f1969, %f1984; + mul.rn.f32 %f1987, %f1971, %f1986; + add.f32 %f1988, %f1982, %f1974; + sub.f32 %f1989, %f1974, %f1988; + add.f32 %f1990, %f1982, %f1989; + add.f32 %f1991, %f1987, %f1990; + add.f32 %f1992, %f1988, %f1991; + sub.f32 %f1993, %f1988, %f1992; + add.f32 %f1994, %f1991, %f1993; + mul.rn.f32 %f1996, %f1967, %f1612; + mul.rn.f32 %f1998, %f1967, %f1614; + add.f32 %f1999, %f1996, %f1992; + sub.f32 %f2000, %f1996, %f1999; + add.f32 %f2001, %f1992, %f2000; + add.f32 %f2002, %f1994, %f2001; + add.f32 %f2003, %f1998, %f2002; + add.f32 %f2004, %f1999, %f2003; + sub.f32 %f2005, %f1999, %f2004; + add.f32 %f2006, %f2003, %f2005; + mul.rn.f32 %f2007, %f1533, %f2004; + neg.f32 %f2008, %f2007; + fma.rn.f32 %f2009, %f1533, %f2004, %f2008; + fma.rn.f32 %f2010, %f1533, %f2006, %f2009; + mov.f32 %f2011, 0f00000000; + fma.rn.f32 %f2012, %f2011, %f2004, %f2010; + add.rn.f32 %f2013, %f2007, %f2012; + neg.f32 %f2014, %f2013; + add.rn.f32 %f2015, %f2007, %f2014; + add.rn.f32 %f2016, %f2015, %f2012; + mov.b32 %r682, %f2013; + setp.eq.s32 %p578, %r682, 1118925336; + add.s32 %r683, %r682, -1; + mov.b32 %f2017, %r683; + add.f32 %f2018, %f2016, 0f37000000; + selp.f32 %f361, %f2018, %f2016, %p578; + selp.f32 %f2019, %f2017, %f2013, %p578; + mul.rn.f32 %f2020, %f2019, %f1637; + cvt.rzi.f32.f32 %f2021, %f2020; + abs.f32 %f2022, %f2021; + setp.gt.f32 %p579, %f2022, 0f42FC0000; + mov.b32 %r684, %f2021; + and.b32 %r685, %r684, -2147483648; + or.b32 %r686, %r685, 1123811328; + mov.b32 %f2023, %r686; + selp.f32 %f2024, %f2023, %f2021, %p579; + fma.rn.f32 %f2026, %f2024, %f1643, %f2019; + fma.rn.f32 %f2028, %f2024, %f1645, %f2026; + mul.f32 %f2029, %f2028, 0f3FB8AA3B; + add.f32 %f2030, %f2024, 0f4B40007F; + mov.b32 %r687, %f2030; + shl.b32 %r688, %r687, 23; + mov.b32 %f2031, %r688; + ex2.approx.ftz.f32 %f2032, %f2029; + mul.f32 %f362, %f2032, %f2031; + setp.eq.f32 %p580, %f362, 0f7F800000; + @%p580 bra $L__BB5_360; + + fma.rn.f32 %f2932, %f362, %f361, %f362; + +$L__BB5_360: + setp.lt.f32 %p581, %f359, 0f00000000; + and.pred %p35, %p581, %p540; + setp.eq.f32 %p583, %f359, 0f00000000; + @%p583 bra $L__BB5_364; + bra.uni $L__BB5_361; + +$L__BB5_364: + add.f32 %f2037, %f359, %f359; + selp.f32 %f2934, %f2037, 0f00000000, %p540; + bra.uni $L__BB5_365; + +$L__BB5_361: + mov.b32 %r689, %f2932; + xor.b32 %r690, %r689, -2147483648; + mov.b32 %f2033, %r690; + selp.f32 %f2934, %f2033, %f2932, %p35; + setp.geu.f32 %p584, %f359, 0f00000000; + @%p584 bra $L__BB5_365; + + cvt.rzi.f32.f32 %f2035, %f1533; + setp.eq.f32 %p585, %f2035, 0f40000000; + @%p585 bra $L__BB5_365; + + mov.f32 %f2934, 0f7FFFFFFF; + +$L__BB5_365: + add.f32 %f2038, %f360, 0f40000000; + mov.b32 %r691, %f2038; + setp.lt.s32 %p587, %r691, 2139095040; + @%p587 bra $L__BB5_370; + + setp.gtu.f32 %p588, %f360, 0f7F800000; + @%p588 bra $L__BB5_369; + bra.uni $L__BB5_367; + +$L__BB5_369: + add.f32 %f2934, %f359, 0f40000000; + bra.uni $L__BB5_370; + +$L__BB5_367: + setp.neu.f32 %p589, %f360, 0f7F800000; + @%p589 bra $L__BB5_370; + + selp.f32 %f2934, 0fFF800000, 0f7F800000, %p35; + +$L__BB5_370: + mul.f32 %f2040, %f2934, 0fBF000000; + setp.eq.f32 %p590, %f359, 0f3F800000; + selp.f32 %f2041, 0fBF000000, %f2040, %p590; + fma.rn.f32 %f2044, %f2041, %f1925, %f1526; + cvt.sat.f32.f32 %f2047, %f2044; + fma.rm.f32 %f2049, %f2047, %f1928, %f1930; + add.f32 %f2050, %f2049, 0fCB40007F; + neg.f32 %f2051, %f2050; + fma.rn.f32 %f2052, %f2041, %f1637, %f2051; + fma.rn.f32 %f2054, %f2041, %f1943, %f2052; + mov.b32 %r692, %f2049; + shl.b32 %r693, %r692, 23; + mov.b32 %f2055, %r693; + ex2.approx.ftz.f32 %f2056, %f2054; + mul.f32 %f371, %f2056, %f2055; + div.rn.f32 %f372, %f340, %f2886; + abs.f32 %f373, %f372; + setp.lt.f32 %p591, %f373, 0f00800000; + mul.f32 %f2057, %f373, 0f4B800000; + selp.f32 %f2058, %f2057, %f373, %p591; + selp.f32 %f2059, 0fC3170000, 0fC2FE0000, %p591; + mov.b32 %r694, %f2058; + and.b32 %r695, %r694, 8388607; + or.b32 %r696, %r695, 1065353216; + mov.b32 %f2060, %r696; + shr.u32 %r697, %r694, 23; + cvt.rn.f32.u32 %f2061, %r697; + add.f32 %f2062, %f2059, %f2061; + setp.gt.f32 %p592, %f2060, 0f3FB504F3; + mul.f32 %f2063, %f2060, 0f3F000000; + add.f32 %f2064, %f2062, 0f3F800000; + selp.f32 %f2065, %f2064, %f2062, %p592; + selp.f32 %f2066, %f2063, %f2060, %p592; + add.f32 %f2067, %f2066, 0fBF800000; + add.f32 %f2068, %f2066, 0f3F800000; + rcp.approx.ftz.f32 %f2069, %f2068; + add.f32 %f2070, %f2067, %f2067; + mul.f32 %f2072, %f2070, %f2069; + mul.f32 %f2073, %f2072, %f2072; + fma.rn.f32 %f2076, %f1594, %f2073, %f1593; + fma.rn.f32 %f2078, %f2076, %f2073, %f1596; + mul.rn.f32 %f2079, %f2078, %f2073; + mul.rn.f32 %f2080, %f2079, %f2072; + sub.f32 %f2081, %f2067, %f2072; + add.f32 %f2082, %f2081, %f2081; + neg.f32 %f2083, %f2072; + fma.rn.f32 %f2084, %f2083, %f2067, %f2082; + mul.rn.f32 %f2085, %f2069, %f2084; + add.f32 %f2086, %f2080, %f2072; + sub.f32 %f2087, %f2072, %f2086; + add.f32 %f2088, %f2080, %f2087; + add.f32 %f2089, %f2085, %f2088; + add.f32 %f2090, %f2086, %f2089; + sub.f32 %f2091, %f2086, %f2090; + add.f32 %f2092, %f2089, %f2091; + mul.rn.f32 %f2094, %f2065, %f1612; + mul.rn.f32 %f2096, %f2065, %f1614; + add.f32 %f2097, %f2094, %f2090; + sub.f32 %f2098, %f2094, %f2097; + add.f32 %f2099, %f2090, %f2098; + add.f32 %f2100, %f2092, %f2099; + add.f32 %f2101, %f2096, %f2100; + add.f32 %f2102, %f2097, %f2101; + sub.f32 %f2103, %f2097, %f2102; + add.f32 %f2104, %f2101, %f2103; + mul.rn.f32 %f2105, %f1533, %f2102; + neg.f32 %f2106, %f2105; + fma.rn.f32 %f2107, %f1533, %f2102, %f2106; + fma.rn.f32 %f2108, %f1533, %f2104, %f2107; + fma.rn.f32 %f2110, %f2011, %f2102, %f2108; + add.rn.f32 %f2111, %f2105, %f2110; + neg.f32 %f2112, %f2111; + add.rn.f32 %f2113, %f2105, %f2112; + add.rn.f32 %f2114, %f2113, %f2110; + mov.b32 %r698, %f2111; + setp.eq.s32 %p593, %r698, 1118925336; + add.s32 %r699, %r698, -1; + mov.b32 %f2115, %r699; + add.f32 %f2116, %f2114, 0f37000000; + selp.f32 %f374, %f2116, %f2114, %p593; + selp.f32 %f2117, %f2115, %f2111, %p593; + mul.rn.f32 %f2118, %f2117, %f1637; + cvt.rzi.f32.f32 %f2119, %f2118; + abs.f32 %f2120, %f2119; + setp.gt.f32 %p594, %f2120, 0f42FC0000; + mov.b32 %r700, %f2119; + and.b32 %r701, %r700, -2147483648; + or.b32 %r702, %r701, 1123811328; + mov.b32 %f2121, %r702; + selp.f32 %f2122, %f2121, %f2119, %p594; + fma.rn.f32 %f2124, %f2122, %f1643, %f2117; + fma.rn.f32 %f2126, %f2122, %f1645, %f2124; + mul.f32 %f2127, %f2126, 0f3FB8AA3B; + add.f32 %f2128, %f2122, 0f4B40007F; + mov.b32 %r703, %f2128; + shl.b32 %r704, %r703, 23; + mov.b32 %f2129, %r704; + ex2.approx.ftz.f32 %f2130, %f2127; + mul.f32 %f375, %f2130, %f2129; + setp.eq.f32 %p595, %f375, 0f7F800000; + mov.f32 %f2935, 0f7F800000; + @%p595 bra $L__BB5_372; + + fma.rn.f32 %f2935, %f375, %f374, %f375; + +$L__BB5_372: + setp.lt.f32 %p596, %f372, 0f00000000; + and.pred %p36, %p596, %p540; + setp.eq.f32 %p598, %f372, 0f00000000; + @%p598 bra $L__BB5_376; + bra.uni $L__BB5_373; + +$L__BB5_376: + add.f32 %f2135, %f372, %f372; + selp.f32 %f2937, %f2135, 0f00000000, %p540; + bra.uni $L__BB5_377; + +$L__BB5_373: + mov.b32 %r705, %f2935; + xor.b32 %r706, %r705, -2147483648; + mov.b32 %f2131, %r706; + selp.f32 %f2937, %f2131, %f2935, %p36; + setp.geu.f32 %p599, %f372, 0f00000000; + @%p599 bra $L__BB5_377; + + cvt.rzi.f32.f32 %f2133, %f1533; + setp.eq.f32 %p600, %f2133, 0f40000000; + @%p600 bra $L__BB5_377; + + mov.f32 %f2937, 0f7FFFFFFF; + +$L__BB5_377: + add.f32 %f2136, %f373, 0f40000000; + mov.b32 %r707, %f2136; + setp.lt.s32 %p602, %r707, 2139095040; + @%p602 bra $L__BB5_382; + + setp.gtu.f32 %p603, %f373, 0f7F800000; + @%p603 bra $L__BB5_381; + bra.uni $L__BB5_379; + +$L__BB5_381: + add.f32 %f2937, %f372, 0f40000000; + bra.uni $L__BB5_382; + +$L__BB5_379: + setp.neu.f32 %p604, %f373, 0f7F800000; + @%p604 bra $L__BB5_382; + + selp.f32 %f2937, 0fFF800000, 0f7F800000, %p36; + +$L__BB5_382: + mul.f32 %f2138, %f2937, 0fBF000000; + setp.eq.f32 %p605, %f372, 0f3F800000; + selp.f32 %f2139, 0fBF000000, %f2138, %p605; + fma.rn.f32 %f2142, %f2139, %f1925, %f1526; + cvt.sat.f32.f32 %f2145, %f2142; + fma.rm.f32 %f2147, %f2145, %f1928, %f1930; + add.f32 %f2148, %f2147, 0fCB40007F; + neg.f32 %f2149, %f2148; + fma.rn.f32 %f2150, %f2139, %f1637, %f2149; + fma.rn.f32 %f2152, %f2139, %f1943, %f2150; + mov.b32 %r708, %f2147; + shl.b32 %r709, %r708, 23; + mov.b32 %f2153, %r709; + ex2.approx.ftz.f32 %f2154, %f2152; + mul.f32 %f2155, %f2154, %f2153; + sub.f32 %f384, %f371, %f2155; + setp.eq.f32 %p606, %f301, 0f7F800000; + mov.f32 %f2938, 0f7F800000; + @%p606 bra $L__BB5_384; + + fma.rn.f32 %f2938, %f301, %f300, %f301; + +$L__BB5_384: + mov.b32 %r710, %f2938; + xor.b32 %r711, %r710, -2147483648; + mov.b32 %f2156, %r711; + selp.f32 %f387, %f2156, %f2938, %p29; + setp.eq.f32 %p607, %f298, 0f00000000; + selp.f32 %f2939, %f302, %f387, %p607; + @%p33 bra $L__BB5_387; + + cvt.rzi.f32.f32 %f2158, %f1533; + setp.eq.f32 %p608, %f2158, 0f40000000; + mov.f32 %f2939, %f387; + @%p608 bra $L__BB5_387; + + mov.f32 %f2939, 0f7FFFFFFF; + +$L__BB5_387: + setp.eq.f32 %p609, %f306, 0f7F800000; + mov.f32 %f2940, 0f7F800000; + @%p609 bra $L__BB5_389; + + fma.rn.f32 %f2940, %f306, %f305, %f306; + +$L__BB5_389: + mov.b32 %r712, %f2940; + xor.b32 %r713, %r712, -2147483648; + mov.b32 %f2161, %r713; + selp.f32 %f392, %f2161, %f2940, %p30; + setp.eq.f32 %p610, %f303, 0f00000000; + selp.f32 %f2941, %f309, %f392, %p610; + @%p34 bra $L__BB5_392; + + cvt.rzi.f32.f32 %f2163, %f1533; + setp.eq.f32 %p611, %f2163, 0f40000000; + mov.f32 %f2941, %f392; + @%p611 bra $L__BB5_392; + + mov.f32 %f2941, 0f7FFFFFFF; + +$L__BB5_392: + mul.f32 %f2166, %f259, %f384; + mul.f32 %f395, %f333, %f2166; + setp.gtu.f32 %p612, %f299, 0f7F800000; + mov.f32 %f2942, 0f7F800000; + selp.f32 %f2167, %f307, %f2939, %p612; + setp.neu.f32 %p613, %f299, 0f7F800000; + selp.f32 %f2168, %f2167, %f308, %p613; + setp.gt.s32 %p614, %r97, 2139095039; + selp.f32 %f2169, %f2168, %f2939, %p614; + mul.f32 %f2170, %f2169, 0fBF000000; + setp.eq.f32 %p615, %f298, 0f3F800000; + selp.f32 %f2171, 0fBF000000, %f2170, %p615; + fma.rn.f32 %f2174, %f2171, %f1925, %f1526; + cvt.sat.f32.f32 %f2177, %f2174; + fma.rm.f32 %f2179, %f2177, %f1928, %f1930; + setp.gtu.f32 %p616, %f304, 0f7F800000; + selp.f32 %f2180, %f311, %f2941, %p616; + setp.neu.f32 %p617, %f304, 0f7F800000; + selp.f32 %f2181, %f2180, %f312, %p617; + setp.gt.s32 %p618, %r98, 2139095039; + selp.f32 %f2182, %f2181, %f2941, %p618; + mul.f32 %f2183, %f2182, 0fBF000000; + setp.eq.f32 %p619, %f303, 0f3F800000; + selp.f32 %f2184, 0fBF000000, %f2183, %p619; + fma.rn.f32 %f2185, %f2184, %f1925, %f1526; + cvt.sat.f32.f32 %f2186, %f2185; + fma.rm.f32 %f2187, %f2186, %f1928, %f1930; + add.f32 %f2188, %f2187, 0fCB40007F; + neg.f32 %f2189, %f2188; + fma.rn.f32 %f2190, %f2184, %f1637, %f2189; + fma.rn.f32 %f2192, %f2184, %f1943, %f2190; + mov.b32 %r714, %f2187; + shl.b32 %r715, %r714, 23; + mov.b32 %f2193, %r715; + ex2.approx.ftz.f32 %f2194, %f2192; + mul.f32 %f2195, %f2194, %f2193; + mul.f32 %f2196, %f279, %f2195; + mov.b32 %r716, %f2179; + shl.b32 %r717, %r716, 23; + mov.b32 %f2197, %r717; + add.f32 %f2198, %f2179, 0fCB40007F; + neg.f32 %f2199, %f2198; + fma.rn.f32 %f2200, %f2171, %f1637, %f2199; + fma.rn.f32 %f2201, %f2171, %f1943, %f2200; + ex2.approx.ftz.f32 %f2202, %f2201; + mul.f32 %f2203, %f2202, %f2197; + mul.f32 %f2204, %f310, %f2203; + sub.f32 %f2205, %f2204, %f2196; + mul.f32 %f2206, %f260, %f2205; + mul.f32 %f396, %f345, %f2206; + add.f32 %f2207, %f334, 0f3F800000; + sub.f32 %f2208, %f2207, %f2889; + div.rn.f32 %f397, %f2208, %f2886; + abs.f32 %f398, %f397; + setp.lt.f32 %p620, %f398, 0f00800000; + mul.f32 %f2209, %f398, 0f4B800000; + selp.f32 %f2210, %f2209, %f398, %p620; + selp.f32 %f2211, 0fC3170000, 0fC2FE0000, %p620; + mov.b32 %r718, %f2210; + and.b32 %r719, %r718, 8388607; + or.b32 %r720, %r719, 1065353216; + mov.b32 %f2212, %r720; + shr.u32 %r721, %r718, 23; + cvt.rn.f32.u32 %f2213, %r721; + add.f32 %f2214, %f2211, %f2213; + setp.gt.f32 %p621, %f2212, 0f3FB504F3; + mul.f32 %f2215, %f2212, 0f3F000000; + add.f32 %f2216, %f2214, 0f3F800000; + selp.f32 %f2217, %f2216, %f2214, %p621; + selp.f32 %f2218, %f2215, %f2212, %p621; + add.f32 %f2219, %f2218, 0fBF800000; + add.f32 %f2220, %f2218, 0f3F800000; + rcp.approx.ftz.f32 %f2221, %f2220; + add.f32 %f2222, %f2219, %f2219; + mul.f32 %f2224, %f2222, %f2221; + mul.f32 %f2225, %f2224, %f2224; + fma.rn.f32 %f2228, %f1594, %f2225, %f1593; + fma.rn.f32 %f2230, %f2228, %f2225, %f1596; + mul.rn.f32 %f2231, %f2230, %f2225; + mul.rn.f32 %f2232, %f2231, %f2224; + sub.f32 %f2233, %f2219, %f2224; + add.f32 %f2234, %f2233, %f2233; + neg.f32 %f2235, %f2224; + fma.rn.f32 %f2236, %f2235, %f2219, %f2234; + mul.rn.f32 %f2237, %f2221, %f2236; + add.f32 %f2238, %f2232, %f2224; + sub.f32 %f2239, %f2224, %f2238; + add.f32 %f2240, %f2232, %f2239; + add.f32 %f2241, %f2237, %f2240; + add.f32 %f2242, %f2238, %f2241; + sub.f32 %f2243, %f2238, %f2242; + add.f32 %f2244, %f2241, %f2243; + mul.rn.f32 %f2246, %f2217, %f1612; + mul.rn.f32 %f2248, %f2217, %f1614; + add.f32 %f2249, %f2246, %f2242; + sub.f32 %f2250, %f2246, %f2249; + add.f32 %f2251, %f2242, %f2250; + add.f32 %f2252, %f2244, %f2251; + add.f32 %f2253, %f2248, %f2252; + add.f32 %f2254, %f2249, %f2253; + sub.f32 %f2255, %f2249, %f2254; + add.f32 %f2256, %f2253, %f2255; + mul.rn.f32 %f2257, %f1533, %f2254; + neg.f32 %f2258, %f2257; + fma.rn.f32 %f2259, %f1533, %f2254, %f2258; + fma.rn.f32 %f2260, %f1533, %f2256, %f2259; + fma.rn.f32 %f2262, %f2011, %f2254, %f2260; + add.rn.f32 %f2263, %f2257, %f2262; + neg.f32 %f2264, %f2263; + add.rn.f32 %f2265, %f2257, %f2264; + add.rn.f32 %f2266, %f2265, %f2262; + mov.b32 %r722, %f2263; + setp.eq.s32 %p622, %r722, 1118925336; + add.s32 %r723, %r722, -1; + mov.b32 %f2267, %r723; + add.f32 %f2268, %f2266, 0f37000000; + selp.f32 %f399, %f2268, %f2266, %p622; + selp.f32 %f2269, %f2267, %f2263, %p622; + mul.rn.f32 %f2270, %f2269, %f1637; + cvt.rzi.f32.f32 %f2271, %f2270; + abs.f32 %f2272, %f2271; + setp.gt.f32 %p623, %f2272, 0f42FC0000; + mov.b32 %r724, %f2271; + and.b32 %r725, %r724, -2147483648; + or.b32 %r726, %r725, 1123811328; + mov.b32 %f2273, %r726; + selp.f32 %f2274, %f2273, %f2271, %p623; + fma.rn.f32 %f2276, %f2274, %f1643, %f2269; + fma.rn.f32 %f2278, %f2274, %f1645, %f2276; + mul.f32 %f2279, %f2278, 0f3FB8AA3B; + add.f32 %f2280, %f2274, 0f4B40007F; + mov.b32 %r727, %f2280; + shl.b32 %r728, %r727, 23; + mov.b32 %f2281, %r728; + ex2.approx.ftz.f32 %f2282, %f2279; + mul.f32 %f400, %f2282, %f2281; + setp.eq.f32 %p624, %f400, 0f7F800000; + @%p624 bra $L__BB5_394; + + fma.rn.f32 %f2942, %f400, %f399, %f400; + +$L__BB5_394: + setp.lt.f32 %p625, %f397, 0f00000000; + and.pred %p37, %p625, %p540; + setp.eq.f32 %p627, %f397, 0f00000000; + @%p627 bra $L__BB5_398; + bra.uni $L__BB5_395; + +$L__BB5_398: + add.f32 %f2287, %f397, %f397; + selp.f32 %f2944, %f2287, 0f00000000, %p540; + bra.uni $L__BB5_399; + +$L__BB5_395: + mov.b32 %r729, %f2942; + xor.b32 %r730, %r729, -2147483648; + mov.b32 %f2283, %r730; + selp.f32 %f2944, %f2283, %f2942, %p37; + setp.geu.f32 %p628, %f397, 0f00000000; + @%p628 bra $L__BB5_399; + + cvt.rzi.f32.f32 %f2285, %f1533; + setp.eq.f32 %p629, %f2285, 0f40000000; + @%p629 bra $L__BB5_399; + + mov.f32 %f2944, 0f7FFFFFFF; + +$L__BB5_399: + add.f32 %f2288, %f398, 0f40000000; + mov.b32 %r731, %f2288; + setp.lt.s32 %p631, %r731, 2139095040; + @%p631 bra $L__BB5_404; + + setp.gtu.f32 %p632, %f398, 0f7F800000; + @%p632 bra $L__BB5_403; + bra.uni $L__BB5_401; + +$L__BB5_403: + add.f32 %f2944, %f397, 0f40000000; + bra.uni $L__BB5_404; + +$L__BB5_401: + setp.neu.f32 %p633, %f398, 0f7F800000; + @%p633 bra $L__BB5_404; + + selp.f32 %f2944, 0fFF800000, 0f7F800000, %p37; + +$L__BB5_404: + mul.f32 %f2290, %f2944, 0fBF000000; + setp.eq.f32 %p634, %f397, 0f3F800000; + selp.f32 %f2291, 0fBF000000, %f2290, %p634; + fma.rn.f32 %f2294, %f2291, %f1925, %f1526; + cvt.sat.f32.f32 %f2297, %f2294; + fma.rm.f32 %f2299, %f2297, %f1928, %f1930; + add.f32 %f2300, %f2299, 0fCB40007F; + neg.f32 %f2301, %f2300; + fma.rn.f32 %f2302, %f2291, %f1637, %f2301; + fma.rn.f32 %f2304, %f2291, %f1943, %f2302; + mov.b32 %r732, %f2299; + shl.b32 %r733, %r732, 23; + mov.b32 %f2305, %r733; + ex2.approx.ftz.f32 %f2306, %f2304; + mul.f32 %f409, %f2306, %f2305; + div.rn.f32 %f410, %f335, %f2886; + abs.f32 %f411, %f410; + setp.lt.f32 %p635, %f411, 0f00800000; + mul.f32 %f2307, %f411, 0f4B800000; + selp.f32 %f2308, %f2307, %f411, %p635; + selp.f32 %f2309, 0fC3170000, 0fC2FE0000, %p635; + mov.b32 %r734, %f2308; + and.b32 %r735, %r734, 8388607; + or.b32 %r736, %r735, 1065353216; + mov.b32 %f2310, %r736; + shr.u32 %r737, %r734, 23; + cvt.rn.f32.u32 %f2311, %r737; + add.f32 %f2312, %f2309, %f2311; + setp.gt.f32 %p636, %f2310, 0f3FB504F3; + mul.f32 %f2313, %f2310, 0f3F000000; + add.f32 %f2314, %f2312, 0f3F800000; + selp.f32 %f2315, %f2314, %f2312, %p636; + selp.f32 %f2316, %f2313, %f2310, %p636; + add.f32 %f2317, %f2316, 0fBF800000; + add.f32 %f2318, %f2316, 0f3F800000; + rcp.approx.ftz.f32 %f2319, %f2318; + add.f32 %f2320, %f2317, %f2317; + mul.f32 %f2322, %f2320, %f2319; + mul.f32 %f2323, %f2322, %f2322; + fma.rn.f32 %f2326, %f1594, %f2323, %f1593; + fma.rn.f32 %f2328, %f2326, %f2323, %f1596; + mul.rn.f32 %f2329, %f2328, %f2323; + mul.rn.f32 %f2330, %f2329, %f2322; + sub.f32 %f2331, %f2317, %f2322; + add.f32 %f2332, %f2331, %f2331; + neg.f32 %f2333, %f2322; + fma.rn.f32 %f2334, %f2333, %f2317, %f2332; + mul.rn.f32 %f2335, %f2319, %f2334; + add.f32 %f2336, %f2330, %f2322; + sub.f32 %f2337, %f2322, %f2336; + add.f32 %f2338, %f2330, %f2337; + add.f32 %f2339, %f2335, %f2338; + add.f32 %f2340, %f2336, %f2339; + sub.f32 %f2341, %f2336, %f2340; + add.f32 %f2342, %f2339, %f2341; + mul.rn.f32 %f2344, %f2315, %f1612; + mul.rn.f32 %f2346, %f2315, %f1614; + add.f32 %f2347, %f2344, %f2340; + sub.f32 %f2348, %f2344, %f2347; + add.f32 %f2349, %f2340, %f2348; + add.f32 %f2350, %f2342, %f2349; + add.f32 %f2351, %f2346, %f2350; + add.f32 %f2352, %f2347, %f2351; + sub.f32 %f2353, %f2347, %f2352; + add.f32 %f2354, %f2351, %f2353; + mul.rn.f32 %f2355, %f1533, %f2352; + neg.f32 %f2356, %f2355; + fma.rn.f32 %f2357, %f1533, %f2352, %f2356; + fma.rn.f32 %f2358, %f1533, %f2354, %f2357; + fma.rn.f32 %f2360, %f2011, %f2352, %f2358; + add.rn.f32 %f2361, %f2355, %f2360; + neg.f32 %f2362, %f2361; + add.rn.f32 %f2363, %f2355, %f2362; + add.rn.f32 %f2364, %f2363, %f2360; + mov.b32 %r738, %f2361; + setp.eq.s32 %p637, %r738, 1118925336; + add.s32 %r739, %r738, -1; + mov.b32 %f2365, %r739; + add.f32 %f2366, %f2364, 0f37000000; + selp.f32 %f412, %f2366, %f2364, %p637; + selp.f32 %f2367, %f2365, %f2361, %p637; + mul.rn.f32 %f2368, %f2367, %f1637; + cvt.rzi.f32.f32 %f2369, %f2368; + abs.f32 %f2370, %f2369; + setp.gt.f32 %p638, %f2370, 0f42FC0000; + mov.b32 %r740, %f2369; + and.b32 %r741, %r740, -2147483648; + or.b32 %r742, %r741, 1123811328; + mov.b32 %f2371, %r742; + selp.f32 %f2372, %f2371, %f2369, %p638; + fma.rn.f32 %f2374, %f2372, %f1643, %f2367; + fma.rn.f32 %f2376, %f2372, %f1645, %f2374; + mul.f32 %f2377, %f2376, 0f3FB8AA3B; + add.f32 %f2378, %f2372, 0f4B40007F; + mov.b32 %r743, %f2378; + shl.b32 %r744, %r743, 23; + mov.b32 %f2379, %r744; + ex2.approx.ftz.f32 %f2380, %f2377; + mul.f32 %f413, %f2380, %f2379; + setp.eq.f32 %p639, %f413, 0f7F800000; + mov.f32 %f2945, 0f7F800000; + @%p639 bra $L__BB5_406; + + fma.rn.f32 %f2945, %f413, %f412, %f413; + +$L__BB5_406: + setp.lt.f32 %p640, %f410, 0f00000000; + and.pred %p38, %p640, %p540; + setp.eq.f32 %p642, %f410, 0f00000000; + @%p642 bra $L__BB5_410; + bra.uni $L__BB5_407; + +$L__BB5_410: + add.f32 %f2385, %f410, %f410; + selp.f32 %f2947, %f2385, 0f00000000, %p540; + bra.uni $L__BB5_411; + +$L__BB5_407: + mov.b32 %r745, %f2945; + xor.b32 %r746, %r745, -2147483648; + mov.b32 %f2381, %r746; + selp.f32 %f2947, %f2381, %f2945, %p38; + setp.geu.f32 %p643, %f410, 0f00000000; + @%p643 bra $L__BB5_411; + + cvt.rzi.f32.f32 %f2383, %f1533; + setp.eq.f32 %p644, %f2383, 0f40000000; + @%p644 bra $L__BB5_411; + + mov.f32 %f2947, 0f7FFFFFFF; + +$L__BB5_411: + add.f32 %f2386, %f411, 0f40000000; + mov.b32 %r747, %f2386; + setp.lt.s32 %p646, %r747, 2139095040; + @%p646 bra $L__BB5_416; + + setp.gtu.f32 %p647, %f411, 0f7F800000; + @%p647 bra $L__BB5_415; + bra.uni $L__BB5_413; + +$L__BB5_415: + add.f32 %f2947, %f410, 0f40000000; + bra.uni $L__BB5_416; + +$L__BB5_413: + setp.neu.f32 %p648, %f411, 0f7F800000; + @%p648 bra $L__BB5_416; + + selp.f32 %f2947, 0fFF800000, 0f7F800000, %p38; + +$L__BB5_416: + mul.f32 %f2387, %f2947, 0fBF000000; + setp.eq.f32 %p649, %f410, 0f3F800000; + selp.f32 %f2388, 0fBF000000, %f2387, %p649; + fma.rn.f32 %f2391, %f2388, %f1925, %f1526; + cvt.sat.f32.f32 %f2394, %f2391; + fma.rm.f32 %f2396, %f2394, %f1928, %f1930; + add.f32 %f2397, %f2396, 0fCB40007F; + neg.f32 %f2398, %f2397; + fma.rn.f32 %f2399, %f2388, %f1637, %f2398; + fma.rn.f32 %f2401, %f2388, %f1943, %f2399; + mov.b32 %r748, %f2396; + shl.b32 %r749, %r748, 23; + mov.b32 %f2402, %r749; + ex2.approx.ftz.f32 %f2403, %f2401; + mul.f32 %f2404, %f2403, %f2402; + add.f32 %f2405, %f335, 0f3F800000; + mul.f32 %f2406, %f2405, %f409; + mul.f32 %f2407, %f335, %f2404; + sub.f32 %f2408, %f2406, %f2407; + mul.f32 %f2409, %f260, %f2408; + fma.rn.f32 %f2410, %f333, %f2409, %f396; + mul.f32 %f2411, %f358, %f358; + add.f32 %f422, %f2885, %f346; + div.rn.f32 %f2412, %f2411, %f422; + add.f32 %f2919, %f2919, %f2412; + mul.f32 %f2413, %f395, %f358; + div.rn.f32 %f2414, %f2413, %f422; + add.f32 %f2918, %f2918, %f2414; + mul.f32 %f2415, %f333, %f345; + mul.f32 %f2416, %f2415, %f358; + div.rn.f32 %f2417, %f2416, %f422; + add.f32 %f2917, %f2917, %f2417; + div.rn.f32 %f2418, %f358, %f422; + add.f32 %f2916, %f2916, %f2418; + mul.f32 %f2419, %f2410, %f358; + div.rn.f32 %f2420, %f2419, %f422; + add.f32 %f2915, %f2915, %f2420; + mul.f32 %f2421, %f395, %f395; + div.rn.f32 %f2422, %f2421, %f422; + add.f32 %f2914, %f2914, %f2422; + mul.f32 %f2423, %f2415, %f395; + div.rn.f32 %f2424, %f2423, %f422; + add.f32 %f2913, %f2913, %f2424; + div.rn.f32 %f2425, %f395, %f422; + add.f32 %f2912, %f2912, %f2425; + mul.f32 %f2426, %f2410, %f395; + div.rn.f32 %f2427, %f2426, %f422; + add.f32 %f2911, %f2911, %f2427; + mul.f32 %f2428, %f2415, %f2415; + div.rn.f32 %f2429, %f2428, %f422; + add.f32 %f2910, %f2910, %f2429; + div.rn.f32 %f2430, %f2415, %f422; + add.f32 %f2909, %f2909, %f2430; + mul.f32 %f2431, %f2410, %f2415; + div.rn.f32 %f2432, %f2431, %f422; + add.f32 %f2908, %f2908, %f2432; + rcp.rn.f32 %f2433, %f422; + add.f32 %f2920, %f2920, %f2433; + div.rn.f32 %f2434, %f2410, %f422; + add.f32 %f2921, %f2921, %f2434; + mul.f32 %f2435, %f2410, %f2410; + div.rn.f32 %f2436, %f2435, %f422; + add.f32 %f2922, %f2922, %f2436; + setp.leu.f32 %p650, %f422, 0f00000000; + @%p650 bra $L__BB5_424; + + add.f32 %f438, %f2885, %f347; + setp.gt.f32 %p651, %f438, 0f00000000; + @%p651 bra $L__BB5_419; + bra.uni $L__BB5_418; + +$L__BB5_419: + setp.lt.f32 %p652, %f422, 0f00800000; + mul.f32 %f2439, %f422, 0f4B000000; + selp.f32 %f440, %f2439, %f422, %p652; + selp.f32 %f2440, 0fC1B80000, 0f00000000, %p652; + mov.b32 %r750, %f440; + add.s32 %r751, %r750, -1059760811; + and.b32 %r752, %r751, -8388608; + sub.s32 %r753, %r750, %r752; + mov.b32 %f2441, %r753; + cvt.rn.f32.s32 %f2442, %r752; + mov.f32 %f2443, 0f34000000; + fma.rn.f32 %f2444, %f2442, %f2443, %f2440; + add.f32 %f2445, %f2441, 0fBF800000; + mov.f32 %f2446, 0f3E1039F6; + mov.f32 %f2447, 0fBE055027; + fma.rn.f32 %f2448, %f2447, %f2445, %f2446; + mov.f32 %f2449, 0fBDF8CDCC; + fma.rn.f32 %f2450, %f2448, %f2445, %f2449; + mov.f32 %f2451, 0f3E0F2955; + fma.rn.f32 %f2452, %f2450, %f2445, %f2451; + mov.f32 %f2453, 0fBE2AD8B9; + fma.rn.f32 %f2454, %f2452, %f2445, %f2453; + mov.f32 %f2455, 0f3E4CED0B; + fma.rn.f32 %f2456, %f2454, %f2445, %f2455; + mov.f32 %f2457, 0fBE7FFF22; + fma.rn.f32 %f2458, %f2456, %f2445, %f2457; + mov.f32 %f2459, 0f3EAAAA78; + fma.rn.f32 %f2460, %f2458, %f2445, %f2459; + mov.f32 %f2461, 0fBF000000; + fma.rn.f32 %f2462, %f2460, %f2445, %f2461; + mul.f32 %f2463, %f2445, %f2462; + fma.rn.f32 %f2464, %f2463, %f2445, %f2445; + mov.f32 %f2465, 0f3F317218; + fma.rn.f32 %f2948, %f2444, %f2465, %f2464; + setp.lt.u32 %p653, %r750, 2139095040; + @%p653 bra $L__BB5_421; + + mov.f32 %f2466, 0f7F800000; + fma.rn.f32 %f2948, %f440, %f2466, %f2466; + +$L__BB5_421: + setp.eq.f32 %p654, %f440, 0f00000000; + selp.f32 %f2467, 0fFF800000, %f2948, %p654; + mul.f32 %f2468, %f438, %f2467; + sub.f32 %f444, %f2468, %f346; + mul.f32 %f2469, %f438, 0f4B000000; + setp.lt.f32 %p655, %f438, 0f00800000; + selp.f32 %f445, %f2469, %f438, %p655; + selp.f32 %f2470, 0fC1B80000, 0f00000000, %p655; + mov.b32 %r754, %f445; + add.s32 %r755, %r754, -1059760811; + and.b32 %r756, %r755, -8388608; + sub.s32 %r757, %r754, %r756; + mov.b32 %f2471, %r757; + cvt.rn.f32.s32 %f2472, %r756; + fma.rn.f32 %f2474, %f2472, %f2443, %f2470; + add.f32 %f2475, %f2471, 0fBF800000; + fma.rn.f32 %f2478, %f2447, %f2475, %f2446; + fma.rn.f32 %f2480, %f2478, %f2475, %f2449; + fma.rn.f32 %f2482, %f2480, %f2475, %f2451; + fma.rn.f32 %f2484, %f2482, %f2475, %f2453; + fma.rn.f32 %f2486, %f2484, %f2475, %f2455; + fma.rn.f32 %f2488, %f2486, %f2475, %f2457; + fma.rn.f32 %f2490, %f2488, %f2475, %f2459; + fma.rn.f32 %f2492, %f2490, %f2475, %f2461; + mul.f32 %f2493, %f2475, %f2492; + fma.rn.f32 %f2494, %f2493, %f2475, %f2475; + fma.rn.f32 %f2949, %f2474, %f2465, %f2494; + setp.lt.u32 %p656, %r754, 2139095040; + @%p656 bra $L__BB5_423; + + mov.f32 %f2496, 0f7F800000; + fma.rn.f32 %f2949, %f445, %f2496, %f2496; + +$L__BB5_423: + setp.eq.f32 %p657, %f445, 0f00000000; + selp.f32 %f2497, 0fFF800000, %f2949, %p657; + mul.f32 %f2498, %f438, %f2497; + sub.f32 %f2499, %f444, %f2498; + add.f32 %f2500, %f347, %f2499; + add.f32 %f2950, %f2950, %f2500; + bra.uni $L__BB5_424; + +$L__BB5_418: + neg.f32 %f2437, %f346; + sub.f32 %f2438, %f2437, %f2885; + add.f32 %f2950, %f2950, %f2438; + +$L__BB5_424: + add.s32 %r788, %r788, 1; + setp.lt.s32 %p658, %r788, %r102; + @%p658 bra $L__BB5_340; + + add.s32 %r787, %r787, 1; + setp.lt.s32 %p659, %r787, %r102; + @%p659 bra $L__BB5_339; + +$L__BB5_426: + ld.param.u64 %rd58, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_9]; + ld.param.u64 %rd57, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_8]; + ld.param.u32 %r767, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_10]; + ld.param.u64 %rd56, [_Z25kernel_MLEFit_SCMOSXYNBS_PKfS0_S0_fiiiPfS1_S1_i_param_7]; + rcp.rn.f32 %f2501, %f2919; + mov.f32 %f2502, 0f3F800000; + mul.f32 %f2503, %f2501, %f2918; + mul.f32 %f2504, %f2501, %f2917; + mul.f32 %f2505, %f2501, %f2916; + mul.f32 %f2506, %f2501, %f2915; + fma.rn.f32 %f2507, %f2503, %f2918, 0f00000000; + sub.f32 %f2509, %f2914, %f2507; + fma.rn.f32 %f2510, %f2504, %f2918, 0f00000000; + rcp.rn.f32 %f2511, %f2509; + sub.f32 %f2512, %f2913, %f2510; + mul.f32 %f2513, %f2511, %f2512; + fma.rn.f32 %f2514, %f2505, %f2918, 0f00000000; + sub.f32 %f2515, %f2912, %f2514; + mul.f32 %f2516, %f2511, %f2515; + fma.rn.f32 %f2517, %f2506, %f2918, 0f00000000; + sub.f32 %f2518, %f2911, %f2517; + mul.f32 %f2519, %f2511, %f2518; + fma.rn.f32 %f2520, %f2503, %f2917, 0f00000000; + sub.f32 %f2521, %f2913, %f2520; + fma.rn.f32 %f2522, %f2504, %f2917, 0f00000000; + fma.rn.f32 %f2523, %f2513, %f2521, %f2522; + sub.f32 %f2524, %f2910, %f2523; + fma.rn.f32 %f2525, %f2505, %f2917, 0f00000000; + fma.rn.f32 %f2526, %f2516, %f2521, %f2525; + rcp.rn.f32 %f2527, %f2524; + sub.f32 %f2528, %f2909, %f2526; + mul.f32 %f2529, %f2527, %f2528; + fma.rn.f32 %f2530, %f2506, %f2917, 0f00000000; + fma.rn.f32 %f2531, %f2519, %f2521, %f2530; + sub.f32 %f2532, %f2908, %f2531; + mul.f32 %f2533, %f2527, %f2532; + fma.rn.f32 %f2534, %f2503, %f2916, 0f00000000; + sub.f32 %f2535, %f2912, %f2534; + fma.rn.f32 %f2536, %f2504, %f2916, 0f00000000; + fma.rn.f32 %f2537, %f2513, %f2535, %f2536; + sub.f32 %f2538, %f2909, %f2537; + fma.rn.f32 %f2539, %f2505, %f2916, 0f00000000; + fma.rn.f32 %f2540, %f2516, %f2535, %f2539; + fma.rn.f32 %f2541, %f2529, %f2538, %f2540; + sub.f32 %f2542, %f2920, %f2541; + fma.rn.f32 %f2543, %f2506, %f2916, 0f00000000; + fma.rn.f32 %f2544, %f2519, %f2535, %f2543; + fma.rn.f32 %f2545, %f2533, %f2538, %f2544; + rcp.rn.f32 %f2546, %f2542; + sub.f32 %f2547, %f2921, %f2545; + mul.f32 %f2548, %f2546, %f2547; + fma.rn.f32 %f2549, %f2503, %f2915, 0f00000000; + sub.f32 %f2550, %f2911, %f2549; + fma.rn.f32 %f2551, %f2504, %f2915, 0f00000000; + fma.rn.f32 %f2552, %f2513, %f2550, %f2551; + sub.f32 %f2553, %f2908, %f2552; + fma.rn.f32 %f2554, %f2505, %f2915, 0f00000000; + fma.rn.f32 %f2555, %f2516, %f2550, %f2554; + fma.rn.f32 %f2556, %f2529, %f2553, %f2555; + sub.f32 %f2557, %f2921, %f2556; + fma.rn.f32 %f2558, %f2506, %f2915, 0f00000000; + fma.rn.f32 %f2559, %f2519, %f2550, %f2558; + fma.rn.f32 %f2560, %f2533, %f2553, %f2559; + fma.rn.f32 %f2561, %f2548, %f2557, %f2560; + sub.f32 %f2562, %f2922, %f2561; + add.f32 %f2563, %f2503, 0f00000000; + sub.f32 %f2564, %f1509, %f2563; + add.f32 %f2565, %f2504, 0f00000000; + fma.rn.f32 %f2566, %f2513, %f2564, %f2565; + sub.f32 %f2567, %f1509, %f2566; + add.f32 %f2568, %f2505, 0f00000000; + fma.rn.f32 %f2569, %f2516, %f2564, %f2568; + fma.rn.f32 %f2570, %f2529, %f2567, %f2569; + sub.f32 %f2571, %f1509, %f2570; + add.f32 %f2572, %f2506, 0f00000000; + fma.rn.f32 %f2573, %f2519, %f2564, %f2572; + fma.rn.f32 %f2574, %f2533, %f2567, %f2573; + fma.rn.f32 %f2575, %f2548, %f2571, %f2574; + sub.f32 %f2576, %f1509, %f2575; + div.rn.f32 %f2577, %f2576, %f2562; + fma.rn.f32 %f2578, %f2557, %f2577, 0f00000000; + sub.f32 %f2579, %f2571, %f2578; + mul.f32 %f2580, %f2546, %f2579; + fma.rn.f32 %f2581, %f2538, %f2580, 0f00000000; + fma.rn.f32 %f2582, %f2553, %f2577, %f2581; + sub.f32 %f2583, %f2567, %f2582; + mul.f32 %f2584, %f2527, %f2583; + fma.rn.f32 %f2585, %f2521, %f2584, 0f00000000; + fma.rn.f32 %f2586, %f2535, %f2580, %f2585; + fma.rn.f32 %f2587, %f2550, %f2577, %f2586; + sub.f32 %f2588, %f2564, %f2587; + mul.f32 %f2589, %f2511, %f2588; + fma.rn.f32 %f2590, %f2918, %f2589, 0f00000000; + fma.rn.f32 %f2591, %f2917, %f2584, %f2590; + fma.rn.f32 %f2592, %f2916, %f2580, %f2591; + fma.rn.f32 %f2593, %f2915, %f2577, %f2592; + sub.f32 %f2594, %f2502, %f2593; + mul.f32 %f2595, %f2501, %f2594; + fma.rn.f32 %f2596, %f2503, 0f00000000, 0f00000000; + sub.f32 %f2597, %f2502, %f2596; + fma.rn.f32 %f2598, %f2504, 0f00000000, 0f00000000; + fma.rn.f32 %f2599, %f2513, %f2597, %f2598; + sub.f32 %f2600, %f1509, %f2599; + fma.rn.f32 %f2601, %f2505, 0f00000000, 0f00000000; + fma.rn.f32 %f2602, %f2516, %f2597, %f2601; + fma.rn.f32 %f2603, %f2529, %f2600, %f2602; + sub.f32 %f2604, %f1509, %f2603; + fma.rn.f32 %f2605, %f2506, 0f00000000, 0f00000000; + fma.rn.f32 %f2606, %f2519, %f2597, %f2605; + fma.rn.f32 %f2607, %f2533, %f2600, %f2606; + fma.rn.f32 %f2608, %f2548, %f2604, %f2607; + sub.f32 %f2609, %f1509, %f2608; + div.rn.f32 %f2610, %f2609, %f2562; + fma.rn.f32 %f2611, %f2557, %f2610, 0f00000000; + sub.f32 %f2612, %f2604, %f2611; + mul.f32 %f2613, %f2546, %f2612; + fma.rn.f32 %f2614, %f2538, %f2613, 0f00000000; + fma.rn.f32 %f2615, %f2553, %f2610, %f2614; + sub.f32 %f2616, %f2600, %f2615; + mul.f32 %f2617, %f2527, %f2616; + fma.rn.f32 %f2618, %f2521, %f2617, 0f00000000; + fma.rn.f32 %f2619, %f2535, %f2613, %f2618; + fma.rn.f32 %f2620, %f2550, %f2610, %f2619; + sub.f32 %f2621, %f2597, %f2620; + mul.f32 %f2622, %f2511, %f2621; + sub.f32 %f2623, %f1509, %f2596; + fma.rn.f32 %f2624, %f2513, %f2623, %f2598; + sub.f32 %f2625, %f2502, %f2624; + fma.rn.f32 %f2626, %f2516, %f2623, %f2601; + fma.rn.f32 %f2627, %f2529, %f2625, %f2626; + sub.f32 %f2628, %f1509, %f2627; + fma.rn.f32 %f2629, %f2519, %f2623, %f2605; + fma.rn.f32 %f2630, %f2533, %f2625, %f2629; + fma.rn.f32 %f2631, %f2548, %f2628, %f2630; + sub.f32 %f2632, %f1509, %f2631; + div.rn.f32 %f2633, %f2632, %f2562; + fma.rn.f32 %f2634, %f2557, %f2633, 0f00000000; + sub.f32 %f2635, %f2628, %f2634; + mul.f32 %f2636, %f2546, %f2635; + fma.rn.f32 %f2637, %f2538, %f2636, 0f00000000; + fma.rn.f32 %f2638, %f2553, %f2633, %f2637; + sub.f32 %f2639, %f2625, %f2638; + mul.f32 %f2640, %f2527, %f2639; + sub.f32 %f2641, %f1509, %f2624; + fma.rn.f32 %f2642, %f2529, %f2641, %f2626; + sub.f32 %f2643, %f2502, %f2642; + fma.rn.f32 %f2644, %f2533, %f2641, %f2629; + fma.rn.f32 %f2645, %f2548, %f2643, %f2644; + sub.f32 %f2646, %f1509, %f2645; + div.rn.f32 %f2647, %f2646, %f2562; + fma.rn.f32 %f2648, %f2557, %f2647, 0f00000000; + sub.f32 %f2649, %f2643, %f2648; + mul.f32 %f2650, %f2546, %f2649; + sub.f32 %f2651, %f1509, %f2642; + fma.rn.f32 %f2652, %f2548, %f2651, %f2644; + sub.f32 %f2653, %f2502, %f2652; + div.rn.f32 %f2654, %f2653, %f2562; + cvta.to.global.u64 %rd38, %rd56; + mul.wide.s32 %rd39, %r1, 4; + add.s64 %rd40, %rd38, %rd39; + st.global.f32 [%rd40], %f2890; + add.s32 %r762, %r1, %r767; + mul.wide.s32 %rd41, %r767, 4; + add.s64 %rd42, %rd40, %rd41; + st.global.f32 [%rd42], %f2889; + add.s32 %r763, %r762, %r767; + shl.b32 %r764, %r767, 3; + cvt.s64.s32 %rd43, %r764; + add.s64 %rd44, %rd40, %rd43; + st.global.f32 [%rd44], %f2888; + add.s32 %r765, %r763, %r767; + mul.wide.s32 %rd45, %r765, 4; + add.s64 %rd46, %rd38, %rd45; + st.global.f32 [%rd46], %f2887; + add.s64 %rd47, %rd44, %rd43; + st.global.f32 [%rd47], %f2886; + cvta.to.global.u64 %rd48, %rd57; + add.s64 %rd49, %rd48, %rd39; + st.global.f32 [%rd49], %f2595; + add.s64 %rd50, %rd49, %rd41; + st.global.f32 [%rd50], %f2622; + add.s64 %rd51, %rd49, %rd43; + st.global.f32 [%rd51], %f2640; + add.s64 %rd52, %rd48, %rd45; + st.global.f32 [%rd52], %f2650; + add.s64 %rd53, %rd51, %rd43; + st.global.f32 [%rd53], %f2654; + cvta.to.global.u64 %rd54, %rd58; + add.s64 %rd55, %rd54, %rd39; + st.global.f32 [%rd55], %f2950; + +$L__BB5_427: ret; -} +} // .globl _Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i .visible .entry _Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i( .param .u64 _Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_0, @@ -21662,4239 +34941,9688 @@ BB5_219: .param .u32 _Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_18 ) { - .local .align 4 .b8 __local_depot6[100]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<384>; - .reg .f32 %f<3424>; - .reg .b32 %r<332>; - .reg .b64 %rd<120>; - - - mov.u64 %SPL, __local_depot6; - ld.param.u64 %rd46, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_0]; - ld.param.u64 %rd47, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_1]; - ld.param.u64 %rd48, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_2]; - ld.param.u64 %rd49, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_3]; - ld.param.f32 %f656, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_4]; - ld.param.f32 %f657, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_5]; - ld.param.f32 %f658, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_6]; - ld.param.f32 %f659, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_7]; - ld.param.f32 %f660, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_8]; - ld.param.f32 %f661, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_9]; - ld.param.f32 %f662, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_10]; - ld.param.f32 %f663, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_11]; - ld.param.u32 %r63, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_12]; - ld.param.u32 %r64, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_13]; - ld.param.u32 %r65, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_14]; - ld.param.u32 %r66, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_18]; - cvta.to.global.u64 %rd1, %rd46; - add.u64 %rd2, %SPL, 0; - mov.u32 %r67, %ntid.x; - mov.u32 %r68, %ctaid.x; - mov.u32 %r69, %tid.x; - mad.lo.s32 %r1, %r67, %r68, %r69; - setp.ge.s32 %p17, %r1, %r66; - @%p17 bra BB6_227; - - cvta.to.global.u64 %rd3, %rd49; - mov.u32 %r70, 0; - mov.u64 %rd107, %rd2; - mov.u32 %r308, %r70; - -BB6_2: - st.local.u32 [%rd107], %r70; - add.s64 %rd107, %rd107, 4; - add.s32 %r308, %r308, 1; - setp.lt.u32 %p18, %r308, 25; - @%p18 bra BB6_2; - - mul.lo.s32 %r72, %r63, %r63; - mul.lo.s32 %r4, %r72, %r1; - mul.wide.s32 %rd55, %r1, 4; - add.s64 %rd7, %rd3, %rd55; - mov.f32 %f666, 0f00000000; - setp.lt.s32 %p19, %r63, 1; - mov.f32 %f1, %f666; - mov.f32 %f2, %f666; - mov.f32 %f3, %f666; - @%p19 bra BB6_17; - - and.b32 %r5, %r63, 3; - shl.b32 %r6, %r63, 2; - mov.f32 %f669, 0f00000000; - mov.u32 %r73, 0; - mov.u32 %r309, %r73; - mov.f32 %f1, %f669; - mov.f32 %f2, %f669; - mov.f32 %f3, %f669; - -BB6_5: - cvt.rn.f32.s32 %f4, %r309; - setp.eq.s32 %p20, %r5, 0; - @%p20 bra BB6_6; - - setp.eq.s32 %p21, %r5, 1; - @%p21 bra BB6_8; - bra.uni BB6_9; - -BB6_8: - mov.u32 %r311, %r73; - bra.uni BB6_13; - -BB6_6: - mov.u32 %r313, %r73; - mov.f32 %f3266, %f1; - mov.f32 %f3267, %f2; - mov.f32 %f3268, %f3; - mov.f32 %f1, %f669; - mov.f32 %f2, %f669; - mov.f32 %f3, %f669; - bra.uni BB6_14; - -BB6_9: - setp.eq.s32 %p22, %r5, 2; - @%p22 bra BB6_10; - bra.uni BB6_11; - -BB6_10: - mov.u32 %r310, %r73; - bra.uni BB6_12; - -BB6_11: - add.s32 %r78, %r309, %r4; - mul.wide.s32 %rd56, %r78, 4; - add.s64 %rd57, %rd1, %rd56; - ld.global.f32 %f673, [%rd57]; - fma.rn.f32 %f3, %f4, %f673, %f3; - fma.rn.f32 %f2, %f673, 0f00000000, %f2; - add.f32 %f1, %f1, %f673; - mov.u32 %r310, 1; - -BB6_12: - neg.s32 %r79, %r310; - and.b32 %r80, %r79, %r63; - add.s32 %r81, %r80, %r309; - add.s32 %r82, %r81, %r4; - mul.wide.s32 %rd58, %r82, 4; - add.s64 %rd59, %rd1, %rd58; - ld.global.f32 %f674, [%rd59]; - fma.rn.f32 %f3, %f4, %f674, %f3; - cvt.rn.f32.s32 %f675, %r310; - fma.rn.f32 %f2, %f675, %f674, %f2; - add.f32 %f1, %f1, %f674; - add.s32 %r311, %r310, 1; - -BB6_13: - mad.lo.s32 %r83, %r311, %r63, %r309; - add.s32 %r84, %r83, %r4; - mul.wide.s32 %rd60, %r84, 4; - add.s64 %rd61, %rd1, %rd60; - ld.global.f32 %f676, [%rd61]; - fma.rn.f32 %f3268, %f4, %f676, %f3; - cvt.rn.f32.s32 %f677, %r311; - fma.rn.f32 %f3267, %f677, %f676, %f2; - add.f32 %f3266, %f1, %f676; - add.s32 %r313, %r311, 1; - mov.f32 %f1, %f3266; - mov.f32 %f2, %f3267; - mov.f32 %f3, %f3268; - -BB6_14: - setp.lt.u32 %p23, %r63, 4; - @%p23 bra BB6_16; - -BB6_15: - mad.lo.s32 %r85, %r313, %r63, %r309; - add.s32 %r86, %r85, %r4; - mul.wide.s32 %rd62, %r86, 4; - add.s64 %rd63, %rd1, %rd62; - ld.global.f32 %f678, [%rd63]; - fma.rn.f32 %f679, %f4, %f678, %f3268; - cvt.rn.f32.s32 %f680, %r313; - fma.rn.f32 %f681, %f680, %f678, %f3267; - add.f32 %f682, %f3266, %f678; - cvt.s64.s32 %rd64, %r6; - add.s64 %rd65, %rd63, %rd64; - ld.global.f32 %f683, [%rd65]; - fma.rn.f32 %f684, %f4, %f683, %f679; - add.s32 %r87, %r313, 1; - cvt.rn.f32.s32 %f685, %r87; - fma.rn.f32 %f686, %f685, %f683, %f681; - add.f32 %f687, %f682, %f683; - add.s64 %rd66, %rd65, %rd64; - ld.global.f32 %f688, [%rd66]; - fma.rn.f32 %f689, %f4, %f688, %f684; - add.s32 %r88, %r313, 2; - cvt.rn.f32.s32 %f690, %r88; - fma.rn.f32 %f691, %f690, %f688, %f686; - add.f32 %f692, %f687, %f688; - add.s64 %rd67, %rd66, %rd64; - ld.global.f32 %f693, [%rd67]; - fma.rn.f32 %f3268, %f4, %f693, %f689; - add.s32 %r89, %r313, 3; - cvt.rn.f32.s32 %f694, %r89; - fma.rn.f32 %f3267, %f694, %f693, %f691; - add.f32 %f3266, %f692, %f693; - add.s32 %r313, %r313, 4; - setp.lt.s32 %p24, %r313, %r63; - mov.f32 %f1, %f3266; - mov.f32 %f2, %f3267; - mov.f32 %f3, %f3268; - @%p24 bra BB6_15; - -BB6_16: - add.s32 %r309, %r309, 1; - setp.lt.s32 %p25, %r309, %r63; - @%p25 bra BB6_5; - -BB6_17: - div.rn.f32 %f3370, %f3, %f1; - div.rn.f32 %f3369, %f2, %f1; - mov.f32 %f697, 0f3F000000; - div.rn.f32 %f698, %f697, %f656; - div.rn.f32 %f40, %f698, %f656; - mov.f32 %f3277, 0f51BA43B7; - mov.f32 %f3278, %f666; - @%p19 bra BB6_36; - - and.b32 %r16, %r63, 3; - mov.f32 %f3278, 0f00000000; - mov.u32 %r90, 0; - mov.f32 %f3277, 0f51BA43B7; - mov.u32 %r314, %r90; - -BB6_19: - mov.u32 %r315, %r90; - -BB6_20: - cvt.rn.f32.s32 %f703, %r315; - mul.f32 %f704, %f703, %f703; - mul.f32 %f45, %f40, %f704; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f702, 0f00000000; - mov.f32 %f3295, %f702; - mov.f32 %f3296, %f702; - mov.u32 %r316, %r90; - -BB6_21: - sub.s32 %r94, %r316, %r314; - cvt.rn.f32.s32 %f50, %r94; - mul.lo.s32 %r20, %r316, %r63; - setp.eq.s32 %p27, %r16, 0; - @%p27 bra BB6_22; - - setp.eq.s32 %p28, %r16, 1; - @%p28 bra BB6_26; - bra.uni BB6_24; - -BB6_26: - mul.f32 %f719, %f50, %f50; - mul.f32 %f3286, %f40, %f719; - neg.f32 %f720, %f3286; - mul.f32 %f721, %f3286, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f722, %f721; - mov.f32 %f723, 0fBF317200; - fma.rn.f32 %f724, %f722, %f723, %f720; - mov.f32 %f725, 0fB5BFBE8E; - fma.rn.f32 %f726, %f722, %f725, %f724; - mul.f32 %f727, %f726, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f728, %f727; - add.f32 %f729, %f722, 0f00000000; - ex2.approx.f32 %f730, %f729; - mul.f32 %f3285, %f728, %f730; - mov.u32 %r318, 0; - bra.uni BB6_29; - -BB6_22: - mov.f32 %f3289, %f3295; - mov.f32 %f3290, %f3296; - mov.u32 %r320, %r90; - mov.f32 %f3295, %f702; - mov.f32 %f3296, %f702; - bra.uni BB6_30; - -BB6_24: - setp.ne.s32 %p29, %r16, 2; - @%p29 bra BB6_27; - - mul.f32 %f707, %f50, %f50; - mul.f32 %f3286, %f40, %f707; - neg.f32 %f708, %f3286; - mul.f32 %f709, %f3286, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f710, %f709; - mov.f32 %f711, 0fBF317200; - fma.rn.f32 %f712, %f710, %f711, %f708; - mov.f32 %f713, 0fB5BFBE8E; - fma.rn.f32 %f714, %f710, %f713, %f712; - mul.f32 %f715, %f714, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f716, %f715; - add.f32 %f717, %f710, 0f00000000; - ex2.approx.f32 %f718, %f717; - mul.f32 %f3285, %f716, %f718; - mov.u32 %r317, 0; - bra.uni BB6_28; - -BB6_27: - setp.lt.f32 %p30, %f45, 0fC2D20000; - mul.f32 %f731, %f50, %f50; - mul.f32 %f3286, %f40, %f731; - neg.f32 %f732, %f3286; - mul.f32 %f733, %f3286, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f734, %f733; - mov.f32 %f735, 0fBF317200; - fma.rn.f32 %f736, %f734, %f735, %f732; - mov.f32 %f737, 0fB5BFBE8E; - fma.rn.f32 %f738, %f734, %f737, %f736; - mul.f32 %f739, %f738, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f740, %f739; - add.f32 %f741, %f734, 0f00000000; - ex2.approx.f32 %f742, %f741; - mul.f32 %f3285, %f740, %f742; - setp.gt.f32 %p31, %f3286, 0f42D20000; - selp.f32 %f743, 0f00000000, %f3285, %p31; - setp.lt.f32 %p32, %f3286, 0fC2D20000; - selp.f32 %f744, 0f7F800000, %f743, %p32; - cvt.rzi.f32.f32 %f745, %f47; - fma.rn.f32 %f746, %f745, %f735, %f46; - fma.rn.f32 %f747, %f745, %f737, %f746; - mul.f32 %f748, %f747, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f749, %f748; - add.f32 %f750, %f745, 0f00000000; - ex2.approx.f32 %f751, %f750; - mul.f32 %f752, %f749, %f751; - setp.gt.f32 %p33, %f45, 0f42D20000; - selp.f32 %f753, 0f00000000, %f752, %p33; - selp.f32 %f754, 0f7F800000, %f753, %p30; - mul.f32 %f755, %f744, %f754; - add.s32 %r98, %r20, %r4; - mul.wide.s32 %rd68, %r98, 4; - add.s64 %rd69, %rd1, %rd68; - ld.global.f32 %f756, [%rd69]; - fma.rn.f32 %f3296, %f756, %f755, %f3296; - add.f32 %f3295, %f3295, %f755; - mov.u32 %r317, 1; - -BB6_28: - sub.s32 %r99, %r315, %r317; - cvt.rn.f32.s32 %f757, %r99; - mul.f32 %f758, %f757, %f757; - setp.gt.f32 %p34, %f3286, 0f42D20000; - selp.f32 %f759, 0f00000000, %f3285, %p34; - setp.lt.f32 %p35, %f3286, 0fC2D20000; - selp.f32 %f760, 0f7F800000, %f759, %p35; - mul.f32 %f761, %f40, %f758; - neg.f32 %f762, %f761; - mul.f32 %f763, %f761, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f764, %f763; - mov.f32 %f765, 0fBF317200; - fma.rn.f32 %f766, %f764, %f765, %f762; - mov.f32 %f767, 0fB5BFBE8E; - fma.rn.f32 %f768, %f764, %f767, %f766; - mul.f32 %f769, %f768, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f770, %f769; - add.f32 %f771, %f764, 0f00000000; - ex2.approx.f32 %f772, %f771; - mul.f32 %f773, %f770, %f772; - setp.gt.f32 %p36, %f761, 0f42D20000; - selp.f32 %f774, 0f00000000, %f773, %p36; - setp.lt.f32 %p37, %f761, 0fC2D20000; - selp.f32 %f775, 0f7F800000, %f774, %p37; - mul.f32 %f776, %f760, %f775; - add.s32 %r100, %r317, %r20; - add.s32 %r101, %r100, %r4; - mul.wide.s32 %rd70, %r101, 4; - add.s64 %rd71, %rd1, %rd70; - ld.global.f32 %f777, [%rd71]; - fma.rn.f32 %f3296, %f777, %f776, %f3296; - add.f32 %f3295, %f3295, %f776; - add.s32 %r318, %r317, 1; - -BB6_29: - sub.s32 %r102, %r315, %r318; - cvt.rn.f32.s32 %f778, %r102; - mul.f32 %f779, %f778, %f778; - setp.gt.f32 %p38, %f3286, 0f42D20000; - selp.f32 %f780, 0f00000000, %f3285, %p38; - setp.lt.f32 %p39, %f3286, 0fC2D20000; - selp.f32 %f781, 0f7F800000, %f780, %p39; - mul.f32 %f782, %f40, %f779; - neg.f32 %f783, %f782; - mul.f32 %f784, %f782, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f785, %f784; - mov.f32 %f786, 0fBF317200; - fma.rn.f32 %f787, %f785, %f786, %f783; - mov.f32 %f788, 0fB5BFBE8E; - fma.rn.f32 %f789, %f785, %f788, %f787; - mul.f32 %f790, %f789, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f791, %f790; - add.f32 %f792, %f785, 0f00000000; - ex2.approx.f32 %f793, %f792; - mul.f32 %f794, %f791, %f793; - setp.gt.f32 %p40, %f782, 0f42D20000; - selp.f32 %f795, 0f00000000, %f794, %p40; - setp.lt.f32 %p41, %f782, 0fC2D20000; - selp.f32 %f796, 0f7F800000, %f795, %p41; - mul.f32 %f797, %f781, %f796; - add.s32 %r103, %r318, %r20; - add.s32 %r104, %r103, %r4; - mul.wide.s32 %rd72, %r104, 4; - add.s64 %rd73, %rd1, %rd72; - ld.global.f32 %f798, [%rd73]; - fma.rn.f32 %f3290, %f798, %f797, %f3296; - add.f32 %f3289, %f3295, %f797; - add.s32 %r320, %r318, 1; - mov.f32 %f3295, %f3289; - mov.f32 %f3296, %f3290; - -BB6_30: - setp.lt.u32 %p42, %r63, 4; - @%p42 bra BB6_33; - - mul.f32 %f799, %f50, %f50; - mul.f32 %f800, %f40, %f799; - neg.f32 %f801, %f800; - mul.f32 %f802, %f800, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f803, %f802; - mov.f32 %f804, 0fBF317200; - fma.rn.f32 %f805, %f803, %f804, %f801; - mov.f32 %f806, 0fB5BFBE8E; - fma.rn.f32 %f807, %f803, %f806, %f805; - mul.f32 %f808, %f807, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f809, %f808; - add.f32 %f810, %f803, 0f00000000; - ex2.approx.f32 %f811, %f810; - mul.f32 %f812, %f809, %f811; - setp.gt.f32 %p43, %f800, 0f42D20000; - selp.f32 %f813, 0f00000000, %f812, %p43; - setp.lt.f32 %p44, %f800, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f813, %p44; - mov.f32 %f3295, %f3289; - mov.f32 %f3296, %f3290; - -BB6_32: - sub.s32 %r105, %r315, %r320; - cvt.rn.f32.s32 %f814, %r105; - mul.f32 %f815, %f814, %f814; - mul.f32 %f816, %f40, %f815; - neg.f32 %f817, %f816; - mul.f32 %f818, %f816, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f819, %f818; - fma.rn.f32 %f821, %f819, %f804, %f817; - fma.rn.f32 %f823, %f819, %f806, %f821; - mul.f32 %f824, %f823, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f825, %f824; - add.f32 %f826, %f819, 0f00000000; - ex2.approx.f32 %f827, %f826; - mul.f32 %f828, %f825, %f827; - setp.gt.f32 %p45, %f816, 0f42D20000; - selp.f32 %f829, 0f00000000, %f828, %p45; - setp.lt.f32 %p46, %f816, 0fC2D20000; - selp.f32 %f830, 0f7F800000, %f829, %p46; - mul.f32 %f831, %f75, %f830; - add.s32 %r106, %r320, %r20; - add.s32 %r107, %r106, %r4; - mul.wide.s32 %rd74, %r107, 4; - add.s64 %rd75, %rd1, %rd74; - ld.global.f32 %f832, [%rd75]; - fma.rn.f32 %f833, %f832, %f831, %f3296; - add.f32 %f834, %f3295, %f831; - add.s32 %r108, %r320, 1; - sub.s32 %r109, %r315, %r108; - cvt.rn.f32.s32 %f835, %r109; - mul.f32 %f836, %f835, %f835; - mul.f32 %f837, %f40, %f836; - neg.f32 %f838, %f837; - mul.f32 %f839, %f837, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f840, %f839; - fma.rn.f32 %f841, %f840, %f804, %f838; - fma.rn.f32 %f842, %f840, %f806, %f841; - mul.f32 %f843, %f842, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f844, %f843; - add.f32 %f845, %f840, 0f00000000; - ex2.approx.f32 %f846, %f845; - mul.f32 %f847, %f844, %f846; - setp.gt.f32 %p47, %f837, 0f42D20000; - selp.f32 %f848, 0f00000000, %f847, %p47; - setp.lt.f32 %p48, %f837, 0fC2D20000; - selp.f32 %f849, 0f7F800000, %f848, %p48; - mul.f32 %f850, %f75, %f849; - ld.global.f32 %f851, [%rd75+4]; - fma.rn.f32 %f852, %f851, %f850, %f833; - add.f32 %f853, %f834, %f850; - add.s32 %r110, %r320, 2; - sub.s32 %r111, %r315, %r110; - cvt.rn.f32.s32 %f854, %r111; - mul.f32 %f855, %f854, %f854; - mul.f32 %f856, %f40, %f855; - neg.f32 %f857, %f856; - mul.f32 %f858, %f856, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f859, %f858; - fma.rn.f32 %f860, %f859, %f804, %f857; - fma.rn.f32 %f861, %f859, %f806, %f860; - mul.f32 %f862, %f861, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f863, %f862; - add.f32 %f864, %f859, 0f00000000; - ex2.approx.f32 %f865, %f864; - mul.f32 %f866, %f863, %f865; - setp.gt.f32 %p49, %f856, 0f42D20000; - selp.f32 %f867, 0f00000000, %f866, %p49; - setp.lt.f32 %p50, %f856, 0fC2D20000; - selp.f32 %f868, 0f7F800000, %f867, %p50; - mul.f32 %f869, %f75, %f868; - ld.global.f32 %f870, [%rd75+8]; - fma.rn.f32 %f871, %f870, %f869, %f852; - add.f32 %f872, %f853, %f869; - add.s32 %r112, %r320, 3; - sub.s32 %r113, %r315, %r112; - cvt.rn.f32.s32 %f873, %r113; - mul.f32 %f874, %f873, %f873; - mul.f32 %f875, %f40, %f874; - neg.f32 %f876, %f875; - mul.f32 %f877, %f875, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f878, %f877; - fma.rn.f32 %f879, %f878, %f804, %f876; - fma.rn.f32 %f880, %f878, %f806, %f879; - mul.f32 %f881, %f880, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f882, %f881; - add.f32 %f883, %f878, 0f00000000; - ex2.approx.f32 %f884, %f883; - mul.f32 %f885, %f882, %f884; - setp.gt.f32 %p51, %f875, 0f42D20000; - selp.f32 %f886, 0f00000000, %f885, %p51; - setp.lt.f32 %p52, %f875, 0fC2D20000; - selp.f32 %f887, 0f7F800000, %f886, %p52; - mul.f32 %f888, %f75, %f887; - ld.global.f32 %f889, [%rd75+12]; - fma.rn.f32 %f3296, %f889, %f888, %f871; - add.f32 %f3295, %f872, %f888; - add.s32 %r320, %r320, 4; - setp.lt.s32 %p53, %r320, %r63; - @%p53 bra BB6_32; - -BB6_33: - add.s32 %r316, %r316, 1; - setp.lt.s32 %p54, %r316, %r63; - @%p54 bra BB6_21; - - div.rn.f32 %f890, %f3296, %f3295; - max.f32 %f3278, %f3278, %f890; - min.f32 %f3277, %f3277, %f890; - add.s32 %r315, %r315, 1; - setp.lt.s32 %p55, %r315, %r63; - @%p55 bra BB6_20; - - add.s32 %r314, %r314, 1; - setp.lt.s32 %p56, %r314, %r63; - @%p56 bra BB6_19; - -BB6_36: - sub.f32 %f892, %f3278, %f3277; - add.f32 %f893, %f892, %f892; - mul.f32 %f894, %f893, 0f40490FD8; - mul.f32 %f895, %f894, %f656; - mul.f32 %f896, %f895, %f663; - mul.f32 %f897, %f896, 0f3FB504F3; - max.f32 %f3368, %f666, %f897; - ld.global.f32 %f3366, [%rd7]; - setp.lt.s32 %p57, %r65, 1; - @%p57 bra BB6_128; - - mul.f32 %f88, %f656, 0f3F000000; - mul.f32 %f89, %f663, 0f3F000000; - mul.f32 %f90, %f657, 0f40400000; - mul.f32 %f91, %f662, %f662; - mul.f32 %f92, %f91, %f662; - mul.f32 %f93, %f659, 0f40800000; - mul.f32 %f94, %f91, %f91; - mul.f32 %f95, %f658, 0f40400000; - mul.f32 %f96, %f660, 0f40800000; - mul.f32 %f97, %f656, 0fBE800000; - mul.f32 %f98, %f663, 0fBE800000; - mov.f32 %f900, 0f40000000; - div.rn.f32 %f99, %f900, %f91; - mul.f32 %f100, %f657, 0f40C00000; - mul.f32 %f101, %f659, 0f41400000; - mul.f32 %f102, %f658, 0f40C00000; - mul.f32 %f103, %f660, 0f41400000; - cvt.rn.f32.s32 %f104, %r64; - mov.u32 %r321, 0; - cvta.to.global.u64 %rd76, %rd47; - cvta.to.global.u64 %rd82, %rd48; - -BB6_38: - mov.f32 %f3315, 0f00000000; - mov.f32 %f3316, %f3315; - mov.f32 %f3317, %f3315; - mov.f32 %f3318, %f3315; - mov.f32 %f3319, %f3315; - mov.f32 %f3320, %f3315; - mov.f32 %f3321, %f3315; - mov.f32 %f3322, %f3315; - mov.f32 %f3323, %f3315; - mov.f32 %f3324, %f3315; - @%p19 bra BB6_127; - - sub.f32 %f921, %f3366, %f661; - div.rn.f32 %f922, %f921, %f662; - mul.f32 %f923, %f922, %f922; - mul.f32 %f924, %f922, %f923; - add.f32 %f925, %f923, 0f3F800000; - fma.rn.f32 %f926, %f924, %f657, %f925; - mul.f32 %f927, %f923, %f923; - fma.rn.f32 %f111, %f927, %f659, %f926; - add.f32 %f928, %f3366, %f661; - div.rn.f32 %f929, %f928, %f662; - mul.f32 %f930, %f929, %f929; - mul.f32 %f931, %f929, %f930; - add.f32 %f932, %f930, 0f3F800000; - fma.rn.f32 %f933, %f931, %f658, %f932; - mul.f32 %f934, %f930, %f930; - fma.rn.f32 %f112, %f934, %f660, %f933; - neg.f32 %f935, %f3368; - div.rn.f32 %f113, %f935, 0f40206C98; - div.rn.f32 %f114, %f3368, 0f40206C98; - add.f32 %f936, %f921, %f921; - div.rn.f32 %f937, %f936, %f91; - mul.f32 %f938, %f921, %f921; - mul.f32 %f939, %f90, %f938; - div.rn.f32 %f940, %f939, %f92; - add.f32 %f941, %f937, %f940; - mul.f32 %f942, %f921, %f938; - mul.f32 %f943, %f93, %f942; - div.rn.f32 %f944, %f943, %f94; - add.f32 %f115, %f941, %f944; - add.f32 %f945, %f928, %f928; - div.rn.f32 %f946, %f945, %f91; - mul.f32 %f947, %f928, %f928; - mul.f32 %f948, %f95, %f947; - div.rn.f32 %f949, %f948, %f92; - add.f32 %f950, %f946, %f949; - mul.f32 %f951, %f928, %f947; - mul.f32 %f952, %f96, %f951; - div.rn.f32 %f953, %f952, %f94; - add.f32 %f116, %f950, %f953; - mul.f32 %f954, %f100, %f921; - div.rn.f32 %f955, %f954, %f92; - add.f32 %f956, %f99, %f955; - mul.f32 %f957, %f101, %f938; - div.rn.f32 %f958, %f957, %f94; - add.f32 %f121, %f956, %f958; - mul.f32 %f959, %f102, %f928; - div.rn.f32 %f960, %f959, %f92; - add.f32 %f961, %f99, %f960; - mul.f32 %f962, %f103, %f947; - div.rn.f32 %f963, %f962, %f94; - add.f32 %f122, %f961, %f963; - mov.u32 %r322, 0; - mov.f32 %f3315, 0f00000000; - mov.f32 %f3316, %f3315; - mov.f32 %f3317, %f3315; - mov.f32 %f3318, %f3315; - mov.f32 %f3319, %f3315; - mov.f32 %f3320, %f3315; - mov.f32 %f3321, %f3315; - mov.f32 %f3322, %f3315; - mov.f32 %f3323, %f3315; - mov.f32 %f3324, %f3315; - -BB6_40: - mov.u32 %r323, 0; - add.f32 %f3194, %f112, %f112; - add.f32 %f3193, %f111, %f111; - sqrt.rn.f32 %f964, %f111; - mul.f32 %f965, %f964, %f656; - sqrt.rn.f32 %f966, %f112; - mul.f32 %f135, %f966, %f663; - div.rn.f32 %f968, %f697, %f965; - div.rn.f32 %f969, %f968, %f965; - cvt.rn.f32.s32 %f136, %r322; - sub.f32 %f137, %f136, %f3370; - add.f32 %f138, %f137, 0f3F800000; - sqrt.rn.f32 %f970, %f969; - mul.f32 %f139, %f138, %f970; - abs.f32 %f140, %f139; - mul.f32 %f141, %f139, %f139; - mul.f32 %f142, %f137, %f970; - abs.f32 %f143, %f142; - div.rn.f32 %f971, %f697, %f135; - div.rn.f32 %f972, %f971, %f135; - sqrt.rn.f32 %f145, %f972; - add.f32 %f973, %f136, 0f3F800000; - sub.f32 %f146, %f973, %f3370; - div.rn.f32 %f147, %f146, %f965; - mov.f32 %f974, 0f3F800000; - cvt.rzi.f32.f32 %f975, %f974; - add.f32 %f976, %f975, %f975; - sub.f32 %f978, %f900, %f976; - abs.f32 %f148, %f978; - setp.eq.f32 %p59, %f148, 0f3F800000; - abs.f32 %f149, %f147; - setp.lt.f32 %p60, %f149, 0f00800000; - mul.f32 %f979, %f149, 0f4B800000; - selp.f32 %f980, 0fC3170000, 0fC2FE0000, %p60; - selp.f32 %f981, %f979, %f149, %p60; - mov.b32 %r117, %f981; - and.b32 %r118, %r117, 8388607; - or.b32 %r119, %r118, 1065353216; - mov.b32 %f982, %r119; - shr.u32 %r120, %r117, 23; - cvt.rn.f32.u32 %f983, %r120; - add.f32 %f984, %f980, %f983; - setp.gt.f32 %p61, %f982, 0f3FB504F3; - mul.f32 %f985, %f982, 0f3F000000; - add.f32 %f986, %f984, 0f3F800000; - selp.f32 %f987, %f985, %f982, %p61; - selp.f32 %f988, %f986, %f984, %p61; - add.f32 %f150, %f987, 0fBF800000; - add.f32 %f151, %f987, 0f3F800000; - add.f32 %f152, %f150, %f150; - mov.f32 %f989, 0f3F317200; - mul.rn.f32 %f153, %f988, %f989; - mov.f32 %f990, 0f35BFBE8E; - mul.rn.f32 %f154, %f988, %f990; - setp.lt.f32 %p62, %f147, 0f00000000; - and.pred %p3, %p62, %p59; - add.f32 %f991, %f147, %f147; - selp.f32 %f155, %f991, 0f00000000, %p59; - div.rn.f32 %f156, %f137, %f965; - abs.f32 %f157, %f156; - setp.lt.f32 %p63, %f157, 0f00800000; - mul.f32 %f993, %f157, 0f4B800000; - selp.f32 %f994, 0fC3170000, 0fC2FE0000, %p63; - selp.f32 %f995, %f993, %f157, %p63; - mov.b32 %r121, %f995; - and.b32 %r122, %r121, 8388607; - or.b32 %r123, %r122, 1065353216; - mov.b32 %f996, %r123; - shr.u32 %r124, %r121, 23; - cvt.rn.f32.u32 %f997, %r124; - add.f32 %f998, %f994, %f997; - setp.gt.f32 %p64, %f996, 0f3FB504F3; - mul.f32 %f999, %f996, 0f3F000000; - add.f32 %f1000, %f998, 0f3F800000; - selp.f32 %f1001, %f999, %f996, %p64; - selp.f32 %f1002, %f1000, %f998, %p64; - add.f32 %f158, %f1001, 0fBF800000; - add.f32 %f159, %f1001, 0f3F800000; - add.f32 %f160, %f158, %f158; - mul.rn.f32 %f161, %f1002, %f989; - mul.rn.f32 %f162, %f1002, %f990; - setp.lt.f32 %p65, %f156, 0f00000000; - and.pred %p4, %p65, %p59; - add.f32 %f1003, %f156, %f156; - selp.f32 %f163, %f1003, 0f00000000, %p59; - div.rn.f32 %f164, %f113, %f965; - div.rn.f32 %f165, %f113, %f135; - div.rn.f32 %f166, %f164, %f965; - mov.f32 %f1005, 0fC0000000; - div.rn.f32 %f167, %f1005, %f965; - div.rn.f32 %f168, %f165, %f135; - div.rn.f32 %f169, %f1005, %f135; - div.rn.f32 %f1006, %f88, %f964; - div.rn.f32 %f1007, %f89, %f966; - mul.f32 %f170, %f1006, %f115; - mul.f32 %f171, %f1007, %f116; - mov.f32 %f1008, 0f3F400000; - cvt.rzi.f32.f32 %f1009, %f1008; - add.f32 %f1010, %f1009, %f1009; - mov.f32 %f1011, 0f3FC00000; - sub.f32 %f1012, %f1011, %f1010; - abs.f32 %f1013, %f1012; - setp.eq.f32 %p66, %f1013, 0f3F800000; - abs.f32 %f172, %f111; - setp.lt.f32 %p67, %f172, 0f00800000; - mul.f32 %f1014, %f172, 0f4B800000; - selp.f32 %f1015, 0fC3170000, 0fC2FE0000, %p67; - selp.f32 %f1016, %f1014, %f172, %p67; - mov.b32 %r125, %f1016; - and.b32 %r126, %r125, 8388607; - or.b32 %r127, %r126, 1065353216; - mov.b32 %f1017, %r127; - shr.u32 %r128, %r125, 23; - cvt.rn.f32.u32 %f1018, %r128; - add.f32 %f1019, %f1015, %f1018; - setp.gt.f32 %p68, %f1017, 0f3FB504F3; - mul.f32 %f1020, %f1017, 0f3F000000; - add.f32 %f1021, %f1019, 0f3F800000; - selp.f32 %f1022, %f1020, %f1017, %p68; - selp.f32 %f1023, %f1021, %f1019, %p68; - add.f32 %f173, %f1022, 0fBF800000; - add.f32 %f174, %f1022, 0f3F800000; - add.f32 %f175, %f173, %f173; - mul.rn.f32 %f176, %f1023, %f989; - mul.rn.f32 %f177, %f1023, %f990; - setp.lt.f32 %p69, %f111, 0f00000000; - and.pred %p5, %p69, %p66; - selp.f32 %f178, %f3193, 0f00000000, %p66; - abs.f32 %f180, %f112; - setp.lt.f32 %p70, %f180, 0f00800000; - mul.f32 %f1024, %f180, 0f4B800000; - selp.f32 %f1025, 0fC3170000, 0fC2FE0000, %p70; - selp.f32 %f1026, %f1024, %f180, %p70; - mov.b32 %r129, %f1026; - and.b32 %r130, %r129, 8388607; - or.b32 %r131, %r130, 1065353216; - mov.b32 %f1027, %r131; - shr.u32 %r132, %r129, 23; - cvt.rn.f32.u32 %f1028, %r132; - add.f32 %f1029, %f1025, %f1028; - setp.gt.f32 %p71, %f1027, 0f3FB504F3; - mul.f32 %f1030, %f1027, 0f3F000000; - add.f32 %f1031, %f1029, 0f3F800000; - selp.f32 %f1032, %f1030, %f1027, %p71; - selp.f32 %f1033, %f1031, %f1029, %p71; - add.f32 %f181, %f1032, 0fBF800000; - add.f32 %f182, %f1032, 0f3F800000; - add.f32 %f183, %f181, %f181; - mul.rn.f32 %f184, %f1033, %f989; - mul.rn.f32 %f185, %f1033, %f990; - setp.lt.f32 %p72, %f112, 0f00000000; - and.pred %p6, %p72, %p66; - selp.f32 %f186, %f3194, 0f00000000, %p66; - mul.f32 %f188, %f1006, %f121; - mul.f32 %f189, %f1007, %f122; - shl.b32 %r137, %r1, 1; - mul.wide.s32 %rd77, %r137, 4; - add.s64 %rd78, %rd76, %rd77; - ld.global.f32 %f192, [%rd78+4]; - ld.global.f32 %f193, [%rd78]; - mul.f32 %f1034, %f965, %f965; - mul.f32 %f1035, %f965, %f1034; - div.rn.f32 %f194, %f113, %f1035; - mul.f32 %f1036, %f135, %f135; - mul.f32 %f1037, %f135, %f1036; - div.rn.f32 %f195, %f113, %f1037; - mul.f32 %f1038, %f1034, %f1034; - mul.f32 %f1039, %f965, %f1038; - div.rn.f32 %f196, %f114, %f1039; - mul.f32 %f1040, %f138, %f138; - mul.f32 %f197, %f138, %f1040; - mul.f32 %f1042, %f1036, %f1036; - mul.f32 %f1043, %f135, %f1042; - div.rn.f32 %f199, %f114, %f1043; - -BB6_41: - setp.ltu.f32 %p73, %f140, 0f3F800000; - @%p73 bra BB6_43; - bra.uni BB6_42; - -BB6_43: - mov.f32 %f1062, 0f3BA0C9F8; - mov.f32 %f1063, 0fBA1268FB; - fma.rn.f32 %f1064, %f1063, %f141, %f1062; - mov.f32 %f1065, 0fBCDABFD4; - fma.rn.f32 %f1066, %f1064, %f141, %f1065; - mov.f32 %f1067, 0f3DE70331; - fma.rn.f32 %f1068, %f1066, %f141, %f1067; - mov.f32 %f1069, 0fBEC09330; - fma.rn.f32 %f1070, %f1068, %f141, %f1069; - mov.f32 %f1071, 0f3F906EBA; - fma.rn.f32 %f1072, %f1070, %f141, %f1071; - mul.f32 %f3325, %f139, %f1072; - bra.uni BB6_44; - -BB6_42: - mov.f32 %f3195, 0f3F800000; - setp.ltu.f32 %p74, %f140, 0f407AD445; - mov.f32 %f1044, 0f3A03BB71; - mov.f32 %f1045, 0fB7B730FB; - fma.rn.f32 %f1046, %f1045, %f140, %f1044; - mov.f32 %f1047, 0fBBACA3B3; - fma.rn.f32 %f1048, %f1046, %f140, %f1047; - mov.f32 %f1049, 0f3D0A7445; - fma.rn.f32 %f1050, %f1048, %f140, %f1049; - mov.f32 %f1051, 0fBE1B3B75; - fma.rn.f32 %f1052, %f1050, %f140, %f1051; - mov.f32 %f1053, 0fBF6B385A; - fma.rn.f32 %f1054, %f1052, %f140, %f1053; - mov.f32 %f1055, 0fBFD0316E; - fma.rn.f32 %f1056, %f1054, %f140, %f1055; - mov.f32 %f1057, 0fBA031CCE; - fma.rn.f32 %f1058, %f1056, %f140, %f1057; - ex2.approx.ftz.f32 %f1059, %f1058; - sub.f32 %f1061, %f3195, %f1059; - mov.b32 %r138, %f1061; - selp.b32 %r139, %r138, 1065353216, %p74; - mov.b32 %r140, %f139; - and.b32 %r141, %r140, -2147483648; - or.b32 %r142, %r139, %r141; - mov.b32 %f3325, %r142; - -BB6_44: - setp.ltu.f32 %p75, %f143, 0f3F800000; - @%p75 bra BB6_46; - bra.uni BB6_45; - -BB6_46: - cvt.rn.f32.s32 %f3233, %r322; - sub.f32 %f3232, %f3233, %f3370; - mul.f32 %f3231, %f3232, %f970; - mul.f32 %f3230, %f3231, %f3231; - mov.f32 %f1091, 0f3BA0C9F8; - mov.f32 %f1092, 0fBA1268FB; - fma.rn.f32 %f1093, %f1092, %f3230, %f1091; - mov.f32 %f1094, 0fBCDABFD4; - fma.rn.f32 %f1095, %f1093, %f3230, %f1094; - mov.f32 %f1096, 0f3DE70331; - fma.rn.f32 %f1097, %f1095, %f3230, %f1096; - mov.f32 %f1098, 0fBEC09330; - fma.rn.f32 %f1099, %f1097, %f3230, %f1098; - mov.f32 %f1100, 0f3F906EBA; - fma.rn.f32 %f1101, %f1099, %f3230, %f1100; - mul.f32 %f3326, %f3231, %f1101; - bra.uni BB6_47; - -BB6_45: - cvt.rn.f32.s32 %f3246, %r322; - sub.f32 %f3245, %f3246, %f3370; - mul.f32 %f3244, %f3245, %f970; - mov.f32 %f3196, 0f3F800000; - setp.ltu.f32 %p76, %f143, 0f407AD445; - mov.f32 %f1073, 0f3A03BB71; - mov.f32 %f1074, 0fB7B730FB; - fma.rn.f32 %f1075, %f1074, %f143, %f1073; - mov.f32 %f1076, 0fBBACA3B3; - fma.rn.f32 %f1077, %f1075, %f143, %f1076; - mov.f32 %f1078, 0f3D0A7445; - fma.rn.f32 %f1079, %f1077, %f143, %f1078; - mov.f32 %f1080, 0fBE1B3B75; - fma.rn.f32 %f1081, %f1079, %f143, %f1080; - mov.f32 %f1082, 0fBF6B385A; - fma.rn.f32 %f1083, %f1081, %f143, %f1082; - mov.f32 %f1084, 0fBFD0316E; - fma.rn.f32 %f1085, %f1083, %f143, %f1084; - mov.f32 %f1086, 0fBA031CCE; - fma.rn.f32 %f1087, %f1085, %f143, %f1086; - ex2.approx.ftz.f32 %f1088, %f1087; - sub.f32 %f1090, %f3196, %f1088; - mov.b32 %r143, %f1090; - selp.b32 %r144, %r143, 1065353216, %p76; - mov.b32 %r145, %f3244; - and.b32 %r146, %r145, -2147483648; - or.b32 %r147, %r144, %r146; - mov.b32 %f3326, %r147; - -BB6_47: - sub.f32 %f1102, %f3325, %f3326; - mul.f32 %f216, %f1102, 0f3F000000; - cvt.rn.f32.s32 %f217, %r323; - sub.f32 %f218, %f217, %f3369; - add.f32 %f219, %f218, 0f3F800000; - mul.f32 %f220, %f219, %f145; - abs.f32 %f221, %f220; - setp.ltu.f32 %p77, %f221, 0f3F800000; - @%p77 bra BB6_49; - bra.uni BB6_48; - -BB6_49: - mul.f32 %f1121, %f220, %f220; - mov.f32 %f1122, 0f3BA0C9F8; - mov.f32 %f1123, 0fBA1268FB; - fma.rn.f32 %f1124, %f1123, %f1121, %f1122; - mov.f32 %f1125, 0fBCDABFD4; - fma.rn.f32 %f1126, %f1124, %f1121, %f1125; - mov.f32 %f1127, 0f3DE70331; - fma.rn.f32 %f1128, %f1126, %f1121, %f1127; - mov.f32 %f1129, 0fBEC09330; - fma.rn.f32 %f1130, %f1128, %f1121, %f1129; - mov.f32 %f1131, 0f3F906EBA; - fma.rn.f32 %f1132, %f1130, %f1121, %f1131; - mul.f32 %f3327, %f220, %f1132; - bra.uni BB6_50; - -BB6_48: - mov.f32 %f3197, 0f3F800000; - mov.f32 %f1103, 0f3A03BB71; - mov.f32 %f1104, 0fB7B730FB; - fma.rn.f32 %f1105, %f1104, %f221, %f1103; - mov.f32 %f1106, 0fBBACA3B3; - fma.rn.f32 %f1107, %f1105, %f221, %f1106; - mov.f32 %f1108, 0f3D0A7445; - fma.rn.f32 %f1109, %f1107, %f221, %f1108; - mov.f32 %f1110, 0fBE1B3B75; - fma.rn.f32 %f1111, %f1109, %f221, %f1110; - mov.f32 %f1112, 0fBF6B385A; - fma.rn.f32 %f1113, %f1111, %f221, %f1112; - mov.f32 %f1114, 0fBFD0316E; - fma.rn.f32 %f1115, %f1113, %f221, %f1114; - mov.f32 %f1116, 0fBA031CCE; - fma.rn.f32 %f1117, %f1115, %f221, %f1116; - ex2.approx.ftz.f32 %f1118, %f1117; - sub.f32 %f1120, %f3197, %f1118; - mov.b32 %r148, %f1120; - setp.ltu.f32 %p78, %f221, 0f407AD445; - selp.b32 %r149, %r148, 1065353216, %p78; - mov.b32 %r150, %f220; - and.b32 %r151, %r150, -2147483648; - or.b32 %r152, %r149, %r151; - mov.b32 %f3327, %r152; - -BB6_50: - cvt.rn.f32.s32 %f3248, %r323; - sub.f32 %f3247, %f3248, %f3369; - mul.f32 %f225, %f3247, %f145; - abs.f32 %f226, %f225; - setp.ltu.f32 %p79, %f226, 0f3F800000; - @%p79 bra BB6_52; - bra.uni BB6_51; - -BB6_52: - mul.f32 %f1151, %f225, %f225; - mov.f32 %f1152, 0f3BA0C9F8; - mov.f32 %f1153, 0fBA1268FB; - fma.rn.f32 %f1154, %f1153, %f1151, %f1152; - mov.f32 %f1155, 0fBCDABFD4; - fma.rn.f32 %f1156, %f1154, %f1151, %f1155; - mov.f32 %f1157, 0f3DE70331; - fma.rn.f32 %f1158, %f1156, %f1151, %f1157; - mov.f32 %f1159, 0fBEC09330; - fma.rn.f32 %f1160, %f1158, %f1151, %f1159; - mov.f32 %f1161, 0f3F906EBA; - fma.rn.f32 %f1162, %f1160, %f1151, %f1161; - mul.f32 %f3328, %f225, %f1162; - bra.uni BB6_53; - -BB6_51: - mov.f32 %f3198, 0f3F800000; - mov.f32 %f1133, 0f3A03BB71; - mov.f32 %f1134, 0fB7B730FB; - fma.rn.f32 %f1135, %f1134, %f226, %f1133; - mov.f32 %f1136, 0fBBACA3B3; - fma.rn.f32 %f1137, %f1135, %f226, %f1136; - mov.f32 %f1138, 0f3D0A7445; - fma.rn.f32 %f1139, %f1137, %f226, %f1138; - mov.f32 %f1140, 0fBE1B3B75; - fma.rn.f32 %f1141, %f1139, %f226, %f1140; - mov.f32 %f1142, 0fBF6B385A; - fma.rn.f32 %f1143, %f1141, %f226, %f1142; - mov.f32 %f1144, 0fBFD0316E; - fma.rn.f32 %f1145, %f1143, %f226, %f1144; - mov.f32 %f1146, 0fBA031CCE; - fma.rn.f32 %f1147, %f1145, %f226, %f1146; - ex2.approx.ftz.f32 %f1148, %f1147; - sub.f32 %f1150, %f3198, %f1148; - mov.b32 %r153, %f1150; - setp.ltu.f32 %p80, %f226, 0f407AD445; - selp.b32 %r154, %r153, 1065353216, %p80; - mov.b32 %r155, %f225; - and.b32 %r156, %r155, -2147483648; - or.b32 %r157, %r154, %r156; - mov.b32 %f3328, %r157; - -BB6_53: - sub.f32 %f1165, %f3327, %f3328; - mul.f32 %f230, %f1165, 0f3F000000; - // inline asm - rcp.approx.ftz.f32 %f1163,%f151; - // inline asm - mul.f32 %f1166, %f1163, %f152; - mul.f32 %f1167, %f1166, %f1166; - mov.f32 %f1168, 0f3C4CAF63; - mov.f32 %f1169, 0f3B18F0FE; - fma.rn.f32 %f1170, %f1169, %f1167, %f1168; - mov.f32 %f1171, 0f3DAAAABD; - fma.rn.f32 %f1172, %f1170, %f1167, %f1171; - mul.rn.f32 %f1173, %f1172, %f1167; - mul.rn.f32 %f1174, %f1173, %f1166; - sub.f32 %f1175, %f150, %f1166; - neg.f32 %f1176, %f1166; - add.f32 %f1177, %f1175, %f1175; - fma.rn.f32 %f1178, %f1176, %f150, %f1177; - mul.rn.f32 %f1179, %f1163, %f1178; - add.f32 %f1180, %f1174, %f1166; - sub.f32 %f1181, %f1166, %f1180; - add.f32 %f1182, %f1174, %f1181; - add.f32 %f1183, %f1179, %f1182; - add.f32 %f1184, %f1180, %f1183; - sub.f32 %f1185, %f1180, %f1184; - add.f32 %f1186, %f1183, %f1185; - add.f32 %f1187, %f153, %f1184; - sub.f32 %f1188, %f153, %f1187; - add.f32 %f1189, %f1184, %f1188; + .reg .pred %p<1292>; + .reg .f32 %f<3376>; + .reg .b32 %r<1378>; + .reg .f64 %fd<1220>; + .reg .b64 %rd<60>; + + + ld.param.u64 %rd4, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_0]; + ld.param.u64 %rd5, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_1]; + ld.param.u64 %rd6, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_2]; + ld.param.u64 %rd7, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_3]; + ld.param.f32 %f547, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_4]; + ld.param.f32 %f548, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_5]; + ld.param.f32 %f549, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_6]; + ld.param.f32 %f550, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_7]; + ld.param.f32 %f551, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_8]; + ld.param.f32 %f552, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_9]; + ld.param.f32 %f553, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_10]; + ld.param.f32 %f554, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_11]; + ld.param.u32 %r182, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_12]; + ld.param.u32 %r183, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_13]; + ld.param.u32 %r184, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_14]; + ld.param.u32 %r185, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_18]; + cvta.to.global.u64 %rd1, %rd4; + mov.u32 %r186, %ntid.x; + mov.u32 %r187, %ctaid.x; + mov.u32 %r188, %tid.x; + mad.lo.s32 %r1, %r187, %r186, %r188; + setp.ge.s32 %p79, %r1, %r185; + @%p79 bra $L__BB6_885; + + cvta.to.global.u64 %rd11, %rd7; + mul.lo.s32 %r189, %r182, %r182; + mul.lo.s32 %r2, %r189, %r1; + mul.wide.s32 %rd12, %r1, 4; + add.s64 %rd2, %rd11, %rd12; + setp.lt.s32 %p80, %r182, 1; + mov.f32 %f557, 0f00000000; + mov.f32 %f3202, %f557; + mov.f32 %f3203, %f557; + mov.f32 %f3204, %f557; + @%p80 bra $L__BB6_11; + + add.s32 %r3, %r182, -1; + and.b32 %r4, %r182, 3; + sub.s32 %r5, %r182, %r4; + shl.b32 %r6, %r182, 2; + mov.u32 %r190, 0; + setp.lt.u32 %p81, %r3, 3; + setp.eq.s32 %p83, %r4, 0; + setp.eq.s32 %p84, %r4, 1; + setp.eq.s32 %p85, %r4, 2; + cvt.s64.s32 %rd15, %r6; + mov.u32 %r1365, %r190; + +$L__BB6_3: + cvt.rn.f32.s32 %f4, %r1365; + mov.u32 %r1368, %r190; + @%p81 bra $L__BB6_6; + + mov.u32 %r1368, %r190; + mov.u32 %r1367, %r5; + +$L__BB6_5: + mad.lo.s32 %r193, %r1368, %r182, %r1365; + add.s32 %r194, %r193, %r2; + mul.wide.s32 %rd13, %r194, 4; + add.s64 %rd14, %rd1, %rd13; + ld.global.f32 %f562, [%rd14]; + fma.rn.f32 %f563, %f562, %f4, %f3202; + cvt.rn.f32.s32 %f564, %r1368; + fma.rn.f32 %f565, %f562, %f564, %f3203; + add.f32 %f566, %f3204, %f562; + add.s64 %rd16, %rd14, %rd15; + ld.global.f32 %f567, [%rd16]; + fma.rn.f32 %f568, %f567, %f4, %f563; + add.s32 %r195, %r1368, 1; + cvt.rn.f32.s32 %f569, %r195; + fma.rn.f32 %f570, %f567, %f569, %f565; + add.f32 %f571, %f566, %f567; + add.s64 %rd17, %rd16, %rd15; + ld.global.f32 %f572, [%rd17]; + fma.rn.f32 %f573, %f572, %f4, %f568; + add.s32 %r196, %r1368, 2; + cvt.rn.f32.s32 %f574, %r196; + fma.rn.f32 %f575, %f572, %f574, %f570; + add.f32 %f576, %f571, %f572; + add.s64 %rd18, %rd17, %rd15; + ld.global.f32 %f577, [%rd18]; + fma.rn.f32 %f3202, %f577, %f4, %f573; + add.s32 %r197, %r1368, 3; + cvt.rn.f32.s32 %f578, %r197; + fma.rn.f32 %f3203, %f577, %f578, %f575; + add.f32 %f3204, %f576, %f577; + add.s32 %r1368, %r1368, 4; + add.s32 %r1367, %r1367, -4; + setp.ne.s32 %p82, %r1367, 0; + @%p82 bra $L__BB6_5; + +$L__BB6_6: + @%p83 bra $L__BB6_10; + + mad.lo.s32 %r13, %r1368, %r182, %r1365; + add.s32 %r198, %r13, %r2; + mul.wide.s32 %rd19, %r198, 4; + add.s64 %rd20, %rd1, %rd19; + ld.global.f32 %f579, [%rd20]; + fma.rn.f32 %f3202, %f579, %f4, %f3202; + cvt.rn.f32.s32 %f580, %r1368; + fma.rn.f32 %f3203, %f579, %f580, %f3203; + add.f32 %f3204, %f3204, %f579; + @%p84 bra $L__BB6_10; + + add.s32 %r14, %r13, %r182; + add.s32 %r199, %r14, %r2; + mul.wide.s32 %rd21, %r199, 4; + add.s64 %rd22, %rd1, %rd21; + ld.global.f32 %f581, [%rd22]; + fma.rn.f32 %f3202, %f581, %f4, %f3202; + add.s32 %r200, %r1368, 1; + cvt.rn.f32.s32 %f582, %r200; + fma.rn.f32 %f3203, %f581, %f582, %f3203; + add.f32 %f3204, %f3204, %f581; + @%p85 bra $L__BB6_10; + + add.s32 %r201, %r1368, 2; + add.s32 %r202, %r14, %r182; + add.s32 %r203, %r202, %r2; + mul.wide.s32 %rd23, %r203, 4; + add.s64 %rd24, %rd1, %rd23; + ld.global.f32 %f583, [%rd24]; + fma.rn.f32 %f3202, %f583, %f4, %f3202; + cvt.rn.f32.s32 %f584, %r201; + fma.rn.f32 %f3203, %f583, %f584, %f3203; + add.f32 %f3204, %f3204, %f583; + +$L__BB6_10: + add.s32 %r1365, %r1365, 1; + setp.lt.s32 %p86, %r1365, %r182; + @%p86 bra $L__BB6_3; + +$L__BB6_11: + div.rn.f32 %f3295, %f3202, %f3204; + div.rn.f32 %f3294, %f3203, %f3204; + mov.f32 %f3292, 0f51BA43B7; + mov.f32 %f3211, %f557; + @%p80 bra $L__BB6_51; + + mov.f32 %f589, 0f3F000000; + div.rn.f32 %f590, %f589, %f547; + div.rn.f32 %f591, %f590, %f547; + cvt.f64.f32 %fd1, %f591; + mov.f64 %fd553, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd553; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p88, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p88; + mov.u32 %r204, 0; + or.b32 %r20, %r19, -2147483648; + setp.eq.s32 %p90, %r17, 1062207488; + setp.lt.s32 %p91, %r16, 0; + setp.ne.s32 %p96, %r18, 1071644672; + setp.eq.s32 %p123, %r18, 2146435072; + mov.u32 %r1369, %r204; + mov.f32 %f3211, %f557; + +$L__BB6_13: + mov.u32 %r1370, %r204; + +$L__BB6_14: + mov.f32 %f3214, 0f00000000; + mov.f32 %f3215, %f3214; + mov.u32 %r1371, %r204; + +$L__BB6_15: + sub.s32 %r24, %r1371, %r1369; + cvt.rn.f32.s32 %f594, %r24; + cvt.f64.f32 %fd2, %f594; + { + .reg .b32 %temp; + mov.b64 {%temp, %r25}, %fd2; + } + abs.f64 %fd554, %fd2; + { // callseq 112, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd554; + .param .b64 param1; + st.param.f64 [param1+0], %fd553; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 112 + setp.lt.s32 %p89, %r25, 0; + and.pred %p1, %p89, %p90; + selp.b32 %r208, %r25, 0, %p90; + or.b32 %r209, %r208, 2146435072; + selp.b32 %r26, %r209, %r208, %p91; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r210}, %fd4; + } + and.b32 %r27, %r210, 2146435072; + setp.ne.s32 %p92, %r27, 2146435072; + setp.gtu.f64 %p93, %fd554, 0d7FF0000000000000; + setp.gt.f64 %p94, %fd554, 0d3FF0000000000000; + selp.b32 %r211, 2146435072, 0, %p94; + xor.b32 %r212, %r211, 2146435072; + selp.b32 %r213, %r212, %r211, %p91; + setp.eq.s32 %p95, %r24, -1; + selp.b32 %r28, 1072693248, %r213, %p95; + and.b32 %r29, %r25, 2147483647; + and.pred %p97, %p96, %p1; + selp.b32 %r30, %r20, %r19, %p97; + mul.lo.s32 %r31, %r1371, %r182; + or.pred %p2, %p92, %p93; + mov.u32 %r1372, %r204; + +$L__BB6_16: + not.pred %p98, %p1; + mov.f64 %fd1071, %fd3; + @%p98 bra $L__BB6_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r214}, %fd3; + } + xor.b32 %r215, %r214, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r216, %temp}, %fd3; + } + mov.b64 %fd1071, {%r216, %r215}; + +$L__BB6_18: + setp.eq.s32 %p99, %r24, 0; + @%p99 bra $L__BB6_22; + + setp.gt.s32 %p100, %r25, -1; + @%p100 bra $L__BB6_23; + + cvt.rzi.f64.f64 %fd557, %fd553; + setp.eq.f64 %p101, %fd557, 0d4000000000000000; + @%p101 bra $L__BB6_23; + + mov.f64 %fd1071, 0dFFF8000000000000; + bra.uni $L__BB6_23; + +$L__BB6_22: + mov.u32 %r217, 0; + mov.b64 %fd1071, {%r217, %r26}; + +$L__BB6_23: + selp.f64 %fd1072, %fd1071, %fd4, %p92; + @%p2 bra $L__BB6_28; + + { + .reg .b32 %temp; + mov.b64 {%r218, %temp}, %fd553; + } + setp.eq.s32 %p104, %r218, 0; + and.pred %p105, %p123, %p104; + @%p105 bra $L__BB6_27; + bra.uni $L__BB6_25; + +$L__BB6_27: + mov.u32 %r221, 0; + mov.b64 %fd1072, {%r221, %r28}; + bra.uni $L__BB6_28; + +$L__BB6_25: + setp.ne.s32 %p106, %r29, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r219, %temp}, %fd2; + } + setp.ne.s32 %p107, %r219, 0; + or.pred %p108, %p106, %p107; + mov.f64 %fd1072, %fd1071; + @%p108 bra $L__BB6_28; + + mov.u32 %r220, 0; + mov.b64 %fd1072, {%r220, %r30}; + +$L__BB6_28: + setp.eq.s32 %p109, %r24, 1; + selp.f64 %fd560, 0d3FF0000000000000, %fd1072, %p109; + mov.f64 %fd561, 0d3FF0000000000000; + mul.f64 %fd13, %fd560, %fd1; + neg.f64 %fd562, %fd13; + mov.f64 %fd563, 0d4338000000000000; + mov.f64 %fd564, 0d3FF71547652B82FE; + fma.rn.f64 %fd565, %fd562, %fd564, %fd563; + { + .reg .b32 %temp; + mov.b64 {%r33, %temp}, %fd565; + } + mov.f64 %fd566, 0dC338000000000000; + add.rn.f64 %fd567, %fd565, %fd566; + mov.f64 %fd568, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd569, %fd567, %fd568, %fd562; + mov.f64 %fd570, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd571, %fd567, %fd570, %fd569; + mov.f64 %fd572, 0d3E928AF3FCA213EA; + mov.f64 %fd573, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd574, %fd573, %fd571, %fd572; + mov.f64 %fd575, 0d3EC71DEE62401315; + fma.rn.f64 %fd576, %fd574, %fd571, %fd575; + mov.f64 %fd577, 0d3EFA01997C89EB71; + fma.rn.f64 %fd578, %fd576, %fd571, %fd577; + mov.f64 %fd579, 0d3F2A01A014761F65; + fma.rn.f64 %fd580, %fd578, %fd571, %fd579; + mov.f64 %fd581, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd582, %fd580, %fd571, %fd581; + mov.f64 %fd583, 0d3F81111111122322; + fma.rn.f64 %fd584, %fd582, %fd571, %fd583; + mov.f64 %fd585, 0d3FA55555555502A1; + fma.rn.f64 %fd586, %fd584, %fd571, %fd585; + mov.f64 %fd587, 0d3FC5555555555511; + fma.rn.f64 %fd588, %fd586, %fd571, %fd587; + mov.f64 %fd589, 0d3FE000000000000B; + fma.rn.f64 %fd590, %fd588, %fd571, %fd589; + fma.rn.f64 %fd591, %fd590, %fd571, %fd561; + fma.rn.f64 %fd592, %fd591, %fd571, %fd561; + { + .reg .b32 %temp; + mov.b64 {%r34, %temp}, %fd592; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r35}, %fd592; + } + shl.b32 %r222, %r33, 20; + add.s32 %r223, %r35, %r222; + mov.b64 %fd1073, {%r34, %r223}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r224}, %fd562; + } + mov.b32 %f595, %r224; + abs.f32 %f42, %f595; + setp.lt.f32 %p110, %f42, 0f4086232B; + @%p110 bra $L__BB6_31; + + setp.gt.f64 %p111, %fd13, 0d8000000000000000; + mov.f64 %fd593, 0d7FF0000000000000; + sub.f64 %fd594, %fd593, %fd13; + selp.f64 %fd1073, 0d0000000000000000, %fd594, %p111; + setp.geu.f32 %p112, %f42, 0f40874800; + @%p112 bra $L__BB6_31; + + shr.u32 %r225, %r33, 31; + add.s32 %r226, %r33, %r225; + shr.s32 %r227, %r226, 1; + shl.b32 %r228, %r227, 20; + add.s32 %r229, %r35, %r228; + mov.b64 %fd595, {%r34, %r229}; + sub.s32 %r230, %r33, %r227; + shl.b32 %r231, %r230, 20; + add.s32 %r232, %r231, 1072693248; + mov.u32 %r233, 0; + mov.b64 %fd596, {%r233, %r232}; + mul.f64 %fd1073, %fd595, %fd596; + +$L__BB6_31: + sub.s32 %r36, %r1370, %r1372; + cvt.rn.f32.s32 %f596, %r36; + cvt.f64.f32 %fd18, %f596; + { + .reg .b32 %temp; + mov.b64 {%temp, %r37}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 113, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd553; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1075, [retval0+0]; + } // callseq 113 + setp.lt.s32 %p113, %r37, 0; + and.pred %p3, %p113, %p90; + not.pred %p115, %p3; + @%p115 bra $L__BB6_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r234}, %fd1075; + } + xor.b32 %r235, %r234, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r236, %temp}, %fd1075; + } + mov.b64 %fd1075, {%r236, %r235}; + +$L__BB6_33: + setp.eq.s32 %p116, %r36, 0; + @%p116 bra $L__BB6_37; + + setp.gt.s32 %p117, %r37, -1; + @%p117 bra $L__BB6_38; + + cvt.rzi.f64.f64 %fd599, %fd553; + setp.eq.f64 %p118, %fd599, 0d4000000000000000; + @%p118 bra $L__BB6_38; + + mov.f64 %fd1075, 0dFFF8000000000000; + bra.uni $L__BB6_38; + +$L__BB6_37: + mov.u32 %r237, 0; + selp.b32 %r238, %r37, 0, %p90; + or.b32 %r239, %r238, 2146435072; + selp.b32 %r240, %r239, %r238, %p91; + mov.b64 %fd1075, {%r237, %r240}; + +$L__BB6_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r241}, %fd25; + } + and.b32 %r242, %r241, 2146435072; + setp.ne.s32 %p121, %r242, 2146435072; + mov.f64 %fd1076, %fd1075; + @%p121 bra $L__BB6_44; + + setp.gtu.f64 %p122, %fd19, 0d7FF0000000000000; + mov.f64 %fd1076, %fd25; + @%p122 bra $L__BB6_44; + + { + .reg .b32 %temp; + mov.b64 {%r243, %temp}, %fd553; + } + setp.eq.s32 %p124, %r243, 0; + and.pred %p125, %p123, %p124; + @%p125 bra $L__BB6_43; + bra.uni $L__BB6_41; + +$L__BB6_43: + mov.u32 %r248, 0; + setp.gt.f64 %p132, %fd19, 0d3FF0000000000000; + selp.b32 %r249, 2146435072, 0, %p132; + xor.b32 %r250, %r249, 2146435072; + selp.b32 %r251, %r250, %r249, %p91; + setp.eq.s32 %p133, %r36, -1; + selp.b32 %r252, 1072693248, %r251, %p133; + mov.b64 %fd1076, {%r248, %r252}; + bra.uni $L__BB6_44; + +$L__BB6_41: + { + .reg .b32 %temp; + mov.b64 {%r244, %temp}, %fd18; + } + and.b32 %r245, %r37, 2147483647; + setp.ne.s32 %p126, %r245, 2146435072; + setp.ne.s32 %p127, %r244, 0; + or.pred %p128, %p126, %p127; + mov.f64 %fd1076, %fd1075; + @%p128 bra $L__BB6_44; + + and.pred %p130, %p96, %p3; + selp.b32 %r246, %r20, %r19, %p130; + mov.u32 %r247, 0; + mov.b64 %fd1076, {%r247, %r246}; + +$L__BB6_44: + setp.eq.s32 %p134, %r36, 1; + selp.f64 %fd602, 0d3FF0000000000000, %fd1076, %p134; + mul.f64 %fd29, %fd602, %fd1; + neg.f64 %fd604, %fd29; + fma.rn.f64 %fd607, %fd604, %fd564, %fd563; + { + .reg .b32 %temp; + mov.b64 {%r38, %temp}, %fd607; + } + add.rn.f64 %fd609, %fd607, %fd566; + fma.rn.f64 %fd611, %fd609, %fd568, %fd604; + fma.rn.f64 %fd613, %fd609, %fd570, %fd611; + fma.rn.f64 %fd616, %fd573, %fd613, %fd572; + fma.rn.f64 %fd618, %fd616, %fd613, %fd575; + fma.rn.f64 %fd620, %fd618, %fd613, %fd577; + fma.rn.f64 %fd622, %fd620, %fd613, %fd579; + fma.rn.f64 %fd624, %fd622, %fd613, %fd581; + fma.rn.f64 %fd626, %fd624, %fd613, %fd583; + fma.rn.f64 %fd628, %fd626, %fd613, %fd585; + fma.rn.f64 %fd630, %fd628, %fd613, %fd587; + fma.rn.f64 %fd632, %fd630, %fd613, %fd589; + fma.rn.f64 %fd633, %fd632, %fd613, %fd561; + fma.rn.f64 %fd634, %fd633, %fd613, %fd561; + { + .reg .b32 %temp; + mov.b64 {%r39, %temp}, %fd634; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r40}, %fd634; + } + shl.b32 %r253, %r38, 20; + add.s32 %r254, %r40, %r253; + mov.b64 %fd1077, {%r39, %r254}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r255}, %fd604; + } + mov.b32 %f597, %r255; + abs.f32 %f43, %f597; + setp.lt.f32 %p135, %f43, 0f4086232B; + @%p135 bra $L__BB6_47; + + setp.gt.f64 %p136, %fd29, 0d8000000000000000; + mov.f64 %fd635, 0d7FF0000000000000; + sub.f64 %fd636, %fd635, %fd29; + selp.f64 %fd1077, 0d0000000000000000, %fd636, %p136; + setp.geu.f32 %p137, %f43, 0f40874800; + @%p137 bra $L__BB6_47; + + shr.u32 %r256, %r38, 31; + add.s32 %r257, %r38, %r256; + shr.s32 %r258, %r257, 1; + shl.b32 %r259, %r258, 20; + add.s32 %r260, %r40, %r259; + mov.b64 %fd637, {%r39, %r260}; + sub.s32 %r261, %r38, %r258; + shl.b32 %r262, %r261, 20; + add.s32 %r263, %r262, 1072693248; + mov.u32 %r264, 0; + mov.b64 %fd638, {%r264, %r263}; + mul.f64 %fd1077, %fd637, %fd638; + +$L__BB6_47: + add.s32 %r265, %r1372, %r31; + add.s32 %r266, %r265, %r2; + mul.wide.s32 %rd25, %r266, 4; + add.s64 %rd26, %rd1, %rd25; + ld.global.f32 %f598, [%rd26]; + cvt.f64.f32 %fd639, %f598; + mul.f64 %fd640, %fd1073, %fd1077; + cvt.f64.f32 %fd641, %f3215; + fma.rn.f64 %fd642, %fd640, %fd639, %fd641; + cvt.rn.f32.f64 %f3215, %fd642; + cvt.f64.f32 %fd643, %f3214; + add.f64 %fd644, %fd640, %fd643; + cvt.rn.f32.f64 %f3214, %fd644; + add.s32 %r1372, %r1372, 1; + setp.lt.s32 %p138, %r1372, %r182; + @%p138 bra $L__BB6_16; + + add.s32 %r1371, %r1371, 1; + setp.lt.s32 %p139, %r1371, %r182; + @%p139 bra $L__BB6_15; + + div.rn.f32 %f599, %f3215, %f3214; + max.f32 %f3211, %f3211, %f599; + min.f32 %f3292, %f3292, %f599; + add.s32 %r1370, %r1370, 1; + setp.lt.s32 %p140, %r1370, %r182; + @%p140 bra $L__BB6_14; + + add.s32 %r1369, %r1369, 1; + setp.lt.s32 %p141, %r1369, %r182; + @%p141 bra $L__BB6_13; + +$L__BB6_51: + sub.f32 %f601, %f3211, %f3292; + add.f32 %f602, %f601, %f601; + mul.f32 %f603, %f602, 0f40490FD8; + mul.f32 %f604, %f603, %f547; + mul.f32 %f605, %f604, %f554; + mul.f32 %f606, %f605, 0f3FB504F3; + max.f32 %f3293, %f557, %f606; + ld.global.f32 %f3291, [%rd2]; + setp.lt.s32 %p142, %r184, 1; + @%p142 bra $L__BB6_623; + + mul.f32 %f52, %f547, 0f3F000000; + mul.f32 %f53, %f554, 0f3F000000; + mul.f32 %f609, %f548, 0f40400000; + cvt.f64.f32 %fd34, %f609; + mul.f32 %f54, %f553, %f553; + mul.f32 %f55, %f54, %f553; + mul.f32 %f610, %f550, 0f40800000; + cvt.f64.f32 %fd35, %f610; + cvt.f64.f32 %fd645, %f553; + add.f64 %fd36, %fd645, 0d4010000000000000; + mul.f32 %f611, %f549, 0f40400000; + cvt.f64.f32 %fd37, %f611; + mul.f32 %f612, %f551, 0f40800000; + cvt.f64.f32 %fd38, %f612; + mul.f32 %f56, %f547, 0fBE800000; + mul.f32 %f57, %f554, 0fBE800000; + mov.f32 %f613, 0f40000000; + div.rn.f32 %f58, %f613, %f54; + mul.f32 %f59, %f548, 0f40C00000; + mul.f32 %f614, %f550, 0f41400000; + cvt.f64.f32 %fd39, %f614; + mul.f32 %f60, %f549, 0f40C00000; + mul.f32 %f615, %f551, 0f41400000; + cvt.f64.f32 %fd40, %f615; + cvt.rn.f32.s32 %f61, %r183; + mov.u32 %r1373, 0; + cvta.to.global.u64 %rd27, %rd5; + setp.eq.f32 %p193, %f553, 0fBF800000; + cvta.to.global.u64 %rd33, %rd6; + +$L__BB6_53: + mov.f32 %f3234, 0f00000000; + mov.f32 %f3235, %f3234; + mov.f32 %f3236, %f3234; + mov.f32 %f3237, %f3234; + mov.f32 %f3238, %f3234; + mov.f32 %f3239, %f3234; + mov.f32 %f3240, %f3234; + mov.f32 %f3241, %f3234; + mov.f32 %f3242, %f3234; + mov.f32 %f3243, %f3234; + @%p80 bra $L__BB6_622; + + sub.f32 %f68, %f3291, %f552; + div.rn.f32 %f69, %f68, %f553; + cvt.f64.f32 %fd41, %f69; + add.f32 %f70, %f3291, %f552; + div.rn.f32 %f71, %f70, %f553; + cvt.f64.f32 %fd42, %f71; + div.rn.f32 %f72, %f3293, 0fC0206C98; + div.rn.f32 %f636, %f3293, 0f40206C98; + cvt.f64.f32 %fd43, %f636; + add.f32 %f637, %f68, %f68; + div.rn.f32 %f638, %f637, %f54; + cvt.f64.f32 %fd44, %f638; + cvt.f64.f32 %fd45, %f68; + add.f64 %fd46, %fd45, 0d4000000000000000; + add.f64 %fd47, %fd45, 0d4008000000000000; + add.f32 %f639, %f70, %f70; + div.rn.f32 %f640, %f639, %f54; + cvt.f64.f32 %fd48, %f640; + cvt.f64.f32 %fd49, %f70; + add.f64 %fd50, %fd49, 0d4000000000000000; + add.f64 %fd51, %fd49, 0d4008000000000000; + mul.f32 %f641, %f59, %f68; + div.rn.f32 %f642, %f641, %f55; + add.f32 %f643, %f58, %f642; + cvt.f64.f32 %fd52, %f643; + mul.f32 %f644, %f60, %f70; + div.rn.f32 %f645, %f644, %f55; + add.f32 %f646, %f58, %f645; + cvt.f64.f32 %fd53, %f646; + shl.b32 %r273, %r1, 1; + mul.wide.s32 %rd28, %r273, 4; + add.s64 %rd29, %rd27, %rd28; + ld.global.f32 %f73, [%rd29+4]; + ld.global.f32 %f74, [%rd29]; + mov.u32 %r1374, 0; + +$L__BB6_55: + cvt.f64.f32 %fd1052, %f69; + cvt.f64.f32 %fd1051, %f71; + mov.u32 %r1375, 0; + add.f32 %f3069, %f3291, %f552; + sub.f32 %f3068, %f3291, %f552; + mov.f64 %fd646, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r47}, %fd646; + } + and.b32 %r48, %r47, 2146435072; + setp.eq.s32 %p144, %r48, 1062207488; + abs.f64 %fd647, %fd1052; + { // callseq 114, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd647; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd54, [retval0+0]; + } // callseq 114 + { + .reg .b32 %temp; + mov.b64 {%temp, %r49}, %fd1052; + } + setp.lt.s32 %p145, %r49, 0; + and.pred %p4, %p145, %p144; + setp.lt.s32 %p146, %r47, 0; + add.f64 %fd648, %fd1052, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r275}, %fd648; + } + and.b32 %r50, %r275, 2146435072; + setp.ne.s32 %p147, %r50, 2146435072; + setp.gtu.f64 %p148, %fd647, 0d7FF0000000000000; + mov.f64 %fd649, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r51}, %fd649; + } + and.b32 %r52, %r51, 2146435072; + setp.eq.s32 %p149, %r52, 1073741824; + { // callseq 115, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd647; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd55, [retval0+0]; + } // callseq 115 + and.pred %p5, %p145, %p149; + and.b32 %r53, %r47, 2147483647; + setp.gt.f64 %p150, %fd647, 0d3FF0000000000000; + selp.b32 %r276, 2146435072, 0, %p150; + xor.b32 %r277, %r276, 2146435072; + selp.b32 %r278, %r277, %r276, %p146; + setp.eq.f32 %p151, %f69, 0fBF800000; + selp.b32 %r54, 1072693248, %r278, %p151; + setp.lt.s32 %p152, %r51, 0; + add.f64 %fd650, %fd1052, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r279}, %fd650; + } + and.b32 %r55, %r279, 2146435072; + setp.ne.s32 %p153, %r55, 2146435072; + setp.gt.s32 %p154, %r47, -1; + selp.b32 %r56, 2146435072, 0, %p154; + setp.ne.s32 %p155, %r53, 1071644672; + or.b32 %r57, %r56, -2147483648; + mov.f64 %fd651, 0d4010000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r280}, %fd651; + } + and.b32 %r281, %r280, 2146435072; + setp.eq.s32 %p156, %r281, 1072693248; + { // callseq 116, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd647; + .param .b64 param1; + st.param.f64 [param1+0], %fd651; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd56, [retval0+0]; + } // callseq 116 + and.pred %p6, %p145, %p156; + and.b32 %r58, %r51, 2147483647; + selp.b32 %r282, %r277, %r276, %p152; + selp.b32 %r59, 1072693248, %r282, %p151; + selp.b32 %r283, %r49, 0, %p156; + setp.lt.s32 %p157, %r280, 0; + or.b32 %r284, %r283, 2146435072; + selp.b32 %r60, %r284, %r283, %p157; + add.f64 %fd652, %fd1052, 0d4010000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r285}, %fd652; + } + and.b32 %r61, %r285, 2146435072; + setp.ne.s32 %p158, %r61, 2146435072; + setp.gt.s32 %p159, %r51, -1; + selp.b32 %r62, 2146435072, 0, %p159; + or.b32 %r63, %r62, -2147483648; + abs.f64 %fd653, %fd1051; + { // callseq 117, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd653; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd57, [retval0+0]; + } // callseq 117 + { + .reg .b32 %temp; + mov.b64 {%temp, %r64}, %fd1051; + } + setp.lt.s32 %p160, %r64, 0; + and.pred %p7, %p160, %p144; + and.b32 %r65, %r280, 2147483647; + selp.b32 %r286, %r277, %r276, %p157; + selp.b32 %r66, 1072693248, %r286, %p151; + add.f64 %fd654, %fd1051, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r287}, %fd654; + } + and.b32 %r67, %r287, 2146435072; + setp.ne.s32 %p161, %r67, 2146435072; + setp.gt.s32 %p162, %r280, -1; + selp.b32 %r288, 2146435072, 0, %p162; + setp.ne.s32 %p163, %r65, 1071644672; + and.pred %p164, %p163, %p6; + or.b32 %r289, %r288, -2147483648; + selp.b32 %r68, %r289, %r288, %p164; + setp.gtu.f64 %p165, %fd653, 0d7FF0000000000000; + { // callseq 118, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd653; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd58, [retval0+0]; + } // callseq 118 + and.pred %p8, %p160, %p149; + setp.gt.f64 %p166, %fd653, 0d3FF0000000000000; + selp.b32 %r290, 2146435072, 0, %p166; + xor.b32 %r291, %r290, 2146435072; + selp.b32 %r292, %r291, %r290, %p146; + setp.eq.f32 %p167, %f71, 0fBF800000; + selp.b32 %r69, 1072693248, %r292, %p167; + add.f64 %fd655, %fd1051, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r293}, %fd655; + } + and.b32 %r70, %r293, 2146435072; + setp.ne.s32 %p168, %r70, 2146435072; + { // callseq 119, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd653; + .param .b64 param1; + st.param.f64 [param1+0], %fd651; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd59, [retval0+0]; + } // callseq 119 + and.pred %p9, %p160, %p156; + selp.b32 %r294, %r291, %r290, %p152; + selp.b32 %r71, 1072693248, %r294, %p167; + selp.b32 %r295, %r64, 0, %p156; + or.b32 %r296, %r295, 2146435072; + selp.b32 %r72, %r296, %r295, %p157; + add.f64 %fd656, %fd1051, 0d4010000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r297}, %fd656; + } + and.b32 %r73, %r297, 2146435072; + setp.ne.s32 %p169, %r73, 2146435072; + cvt.rn.f32.s32 %f85, %r1374; + sub.f32 %f86, %f85, %f3295; + add.f32 %f87, %f86, 0f3F000000; + add.f32 %f88, %f86, 0fBF000000; + selp.b32 %r298, %r291, %r290, %p157; + selp.b32 %r74, 1072693248, %r298, %p167; + and.pred %p170, %p163, %p9; + selp.b32 %r75, %r289, %r288, %p170; + cvt.f64.f32 %fd60, %f87; + { + .reg .b32 %temp; + mov.b64 {%temp, %r76}, %fd60; + } + abs.f64 %fd657, %fd60; + { // callseq 120, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd657; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd61, [retval0+0]; + } // callseq 120 + setp.lt.s32 %p171, %r76, 0; + and.pred %p10, %p171, %p149; + add.f64 %fd62, %fd60, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r299}, %fd62; + } + and.b32 %r77, %r299, 2146435072; + setp.ne.s32 %p172, %r77, 2146435072; + mov.f64 %fd658, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r78}, %fd658; + } + setp.gtu.f64 %p173, %fd657, 0d7FF0000000000000; + cvt.f64.f32 %fd63, %f88; + { + .reg .b32 %temp; + mov.b64 {%temp, %r79}, %fd63; + } + abs.f64 %fd659, %fd63; + { // callseq 121, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd659; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd64, [retval0+0]; + } // callseq 121 + setp.lt.s32 %p174, %r79, 0; + and.pred %p11, %p174, %p149; + setp.gt.f64 %p175, %fd657, 0d3FF0000000000000; + selp.b32 %r300, 2146435072, 0, %p175; + xor.b32 %r301, %r300, 2146435072; + selp.b32 %r302, %r301, %r300, %p152; + setp.eq.f32 %p176, %f87, 0fBF800000; + selp.b32 %r80, 1072693248, %r302, %p176; + add.f64 %fd65, %fd63, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r303}, %fd65; + } + and.b32 %r81, %r303, 2146435072; + setp.ne.s32 %p177, %r81, 2146435072; + setp.gtu.f64 %p178, %fd659, 0d7FF0000000000000; + setp.gt.f64 %p179, %fd659, 0d3FF0000000000000; + selp.b32 %r304, 2146435072, 0, %p179; + xor.b32 %r305, %r304, 2146435072; + selp.b32 %r306, %r305, %r304, %p152; + setp.eq.f32 %p180, %f88, 0fBF800000; + selp.b32 %r82, 1072693248, %r306, %p180; + abs.f64 %fd660, %fd45; + { // callseq 122, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd660; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd66, [retval0+0]; + } // callseq 122 + { + .reg .b32 %temp; + mov.b64 {%temp, %r83}, %fd45; + } + setp.lt.s32 %p181, %r83, 0; + and.pred %p12, %p181, %p144; + selp.b32 %r307, %r83, 0, %p144; + or.b32 %r308, %r307, 2146435072; + selp.b32 %r84, %r308, %r307, %p146; + { + .reg .b32 %temp; + mov.b64 {%temp, %r309}, %fd46; + } + and.b32 %r85, %r309, 2146435072; + setp.ne.s32 %p182, %r85, 2146435072; + setp.gtu.f64 %p183, %fd660, 0d7FF0000000000000; + { // callseq 123, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd660; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd67, [retval0+0]; + } // callseq 123 + and.pred %p13, %p181, %p149; + setp.gt.f64 %p184, %fd660, 0d3FF0000000000000; + selp.b32 %r310, 2146435072, 0, %p184; + xor.b32 %r311, %r310, 2146435072; + selp.b32 %r312, %r311, %r310, %p146; + setp.eq.f32 %p185, %f3068, 0fBF800000; + selp.b32 %r86, 1072693248, %r312, %p185; + { + .reg .b32 %temp; + mov.b64 {%temp, %r313}, %fd47; + } + and.b32 %r87, %r313, 2146435072; + setp.ne.s32 %p186, %r87, 2146435072; + and.pred %p187, %p155, %p12; + selp.b32 %r88, %r57, %r56, %p187; + { + .reg .b32 %temp; + mov.b64 {%temp, %r89}, %fd645; + } + abs.f64 %fd662, %fd645; + { // callseq 124, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd662; + .param .b64 param1; + st.param.f64 [param1+0], %fd651; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd68, [retval0+0]; + } // callseq 124 + setp.lt.s32 %p188, %r89, 0; + and.pred %p14, %p188, %p156; + selp.b32 %r314, %r311, %r310, %p152; + selp.b32 %r90, 1072693248, %r314, %p185; + selp.b32 %r315, %r89, 0, %p156; + or.b32 %r316, %r315, 2146435072; + selp.b32 %r91, %r316, %r315, %p157; + { + .reg .b32 %temp; + mov.b64 {%temp, %r317}, %fd36; + } + and.b32 %r92, %r317, 2146435072; + setp.ne.s32 %p189, %r92, 2146435072; + setp.gtu.f64 %p190, %fd662, 0d7FF0000000000000; + abs.f64 %fd663, %fd49; + { // callseq 125, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd663; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd69, [retval0+0]; + } // callseq 125 + { + .reg .b32 %temp; + mov.b64 {%temp, %r93}, %fd49; + } + setp.lt.s32 %p191, %r93, 0; + and.pred %p15, %p191, %p144; + setp.gt.f64 %p192, %fd662, 0d3FF0000000000000; + selp.b32 %r318, 2146435072, 0, %p192; + xor.b32 %r319, %r318, 2146435072; + selp.b32 %r320, %r319, %r318, %p157; + selp.b32 %r94, 1072693248, %r320, %p193; + selp.b32 %r321, %r93, 0, %p144; + or.b32 %r322, %r321, 2146435072; + selp.b32 %r95, %r322, %r321, %p146; + { + .reg .b32 %temp; + mov.b64 {%temp, %r323}, %fd50; + } + and.b32 %r96, %r323, 2146435072; + setp.ne.s32 %p194, %r96, 2146435072; + and.pred %p195, %p163, %p14; + selp.b32 %r97, %r289, %r288, %p195; + setp.gtu.f64 %p196, %fd663, 0d7FF0000000000000; + { // callseq 126, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd663; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd70, [retval0+0]; + } // callseq 126 + and.pred %p16, %p191, %p149; + setp.gt.f64 %p197, %fd663, 0d3FF0000000000000; + selp.b32 %r324, 2146435072, 0, %p197; + xor.b32 %r325, %r324, 2146435072; + selp.b32 %r326, %r325, %r324, %p146; + setp.eq.f32 %p198, %f3069, 0fBF800000; + selp.b32 %r98, 1072693248, %r326, %p198; + { + .reg .b32 %temp; + mov.b64 {%temp, %r327}, %fd51; + } + and.b32 %r99, %r327, 2146435072; + setp.ne.s32 %p199, %r99, 2146435072; + and.pred %p200, %p155, %p15; + selp.b32 %r100, %r57, %r56, %p200; + selp.b32 %r328, %r325, %r324, %p152; + selp.b32 %r101, 1072693248, %r328, %p198; + or.pred %p17, %p147, %p148; + or.pred %p18, %p153, %p148; + or.pred %p19, %p158, %p148; + or.pred %p20, %p161, %p165; + or.pred %p21, %p168, %p165; + or.pred %p22, %p169, %p165; + or.pred %p23, %p172, %p173; + or.pred %p24, %p177, %p178; + or.pred %p25, %p182, %p183; + or.pred %p26, %p186, %p183; + or.pred %p27, %p189, %p190; + or.pred %p28, %p194, %p196; + or.pred %p29, %p199, %p196; + shr.s32 %r329, %r47, 31; + and.b32 %r102, %r329, 2146435072; + +$L__BB6_56: + not.pred %p201, %p4; + mov.f64 %fd1079, %fd54; + @%p201 bra $L__BB6_58; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r330}, %fd54; + } + xor.b32 %r331, %r330, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r332, %temp}, %fd54; + } + mov.b64 %fd1079, {%r332, %r331}; + +$L__BB6_58: + setp.eq.f32 %p202, %f69, 0f00000000; + @%p202 bra $L__BB6_62; + bra.uni $L__BB6_59; + +$L__BB6_62: + mov.u32 %r333, 0; + selp.b32 %r335, %r49, 0, %p144; + or.b32 %r336, %r335, 2146435072; + selp.b32 %r337, %r336, %r335, %p146; + mov.b64 %fd1079, {%r333, %r337}; + bra.uni $L__BB6_63; + +$L__BB6_59: + setp.gt.s32 %p203, %r49, -1; + @%p203 bra $L__BB6_63; + + cvt.rzi.f64.f64 %fd665, %fd646; + setp.eq.f64 %p204, %fd665, 0d4000000000000000; + @%p204 bra $L__BB6_63; + + mov.f64 %fd1079, 0dFFF8000000000000; + +$L__BB6_63: + cvt.f64.f32 %fd1054, %f69; + add.f64 %fd1053, %fd1052, 0d4000000000000000; + selp.f64 %fd1080, %fd1079, %fd1053, %p147; + @%p17 bra $L__BB6_68; + + setp.eq.s32 %p208, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r338, %temp}, %fd646; + } + setp.eq.s32 %p209, %r338, 0; + and.pred %p210, %p208, %p209; + @%p210 bra $L__BB6_67; + bra.uni $L__BB6_65; + +$L__BB6_67: + mov.u32 %r345, 0; + mov.b64 %fd1080, {%r345, %r54}; + bra.uni $L__BB6_68; + +$L__BB6_65: + and.b32 %r339, %r49, 2147483647; + setp.ne.s32 %p211, %r339, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r340, %temp}, %fd41; + } + setp.ne.s32 %p212, %r340, 0; + or.pred %p213, %p211, %p212; + mov.f64 %fd1080, %fd1079; + @%p213 bra $L__BB6_68; + + and.pred %p215, %p155, %p4; + selp.b32 %r343, %r57, %r56, %p215; + mov.u32 %r344, 0; + mov.b64 %fd1080, {%r344, %r343}; + +$L__BB6_68: + not.pred %p216, %p5; + mov.f64 %fd1082, %fd55; + @%p216 bra $L__BB6_70; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r346}, %fd55; + } + xor.b32 %r347, %r346, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r348, %temp}, %fd55; + } + mov.b64 %fd1082, {%r348, %r347}; + +$L__BB6_70: + @%p202 bra $L__BB6_74; + bra.uni $L__BB6_71; + +$L__BB6_74: + mov.u32 %r349, 0; + selp.b32 %r351, %r49, 0, %p149; + or.b32 %r352, %r351, 2146435072; + selp.b32 %r353, %r352, %r351, %p152; + mov.b64 %fd1082, {%r349, %r353}; + bra.uni $L__BB6_75; + +$L__BB6_71: + setp.gt.s32 %p218, %r49, -1; + @%p218 bra $L__BB6_75; + + cvt.rzi.f64.f64 %fd670, %fd649; + setp.eq.f64 %p219, %fd670, 0d4008000000000000; + @%p219 bra $L__BB6_75; + + mov.f64 %fd1082, 0dFFF8000000000000; + +$L__BB6_75: + cvt.f64.f32 %fd1056, %f69; + add.f64 %fd1055, %fd1052, 0d4008000000000000; + selp.f64 %fd1083, %fd1082, %fd1055, %p153; + @%p18 bra $L__BB6_80; + + setp.eq.s32 %p223, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r354, %temp}, %fd649; + } + setp.eq.s32 %p224, %r354, 0; + and.pred %p225, %p223, %p224; + @%p225 bra $L__BB6_79; + bra.uni $L__BB6_77; + +$L__BB6_79: + mov.u32 %r361, 0; + mov.b64 %fd1083, {%r361, %r59}; + bra.uni $L__BB6_80; + +$L__BB6_77: + and.b32 %r355, %r49, 2147483647; + setp.ne.s32 %p226, %r355, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r356, %temp}, %fd41; + } + setp.ne.s32 %p227, %r356, 0; + or.pred %p228, %p226, %p227; + mov.f64 %fd1083, %fd1082; + @%p228 bra $L__BB6_80; + + setp.ne.s32 %p229, %r58, 1071644672; + and.pred %p230, %p229, %p5; + selp.b32 %r359, %r63, %r62, %p230; + mov.u32 %r360, 0; + mov.b64 %fd1083, {%r360, %r359}; + +$L__BB6_80: + setp.eq.f32 %p231, %f69, 0f3F800000; + selp.f64 %fd674, 0d3FF0000000000000, %fd1083, %p231; + cvt.f64.f32 %fd675, %f548; + add.f64 %fd676, %fd1080, 0d3FF0000000000000; + selp.f64 %fd677, 0d4000000000000000, %fd676, %p231; + fma.rn.f64 %fd87, %fd674, %fd675, %fd677; + not.pred %p232, %p6; + mov.f64 %fd1085, %fd56; + @%p232 bra $L__BB6_82; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r362}, %fd56; + } + xor.b32 %r363, %r362, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r364, %temp}, %fd56; + } + mov.b64 %fd1085, {%r364, %r363}; + +$L__BB6_82: + @%p202 bra $L__BB6_86; + bra.uni $L__BB6_83; + +$L__BB6_86: + mov.u32 %r365, 0; + mov.b64 %fd1085, {%r365, %r60}; + bra.uni $L__BB6_87; + +$L__BB6_83: + setp.gt.s32 %p234, %r49, -1; + @%p234 bra $L__BB6_87; + + cvt.rzi.f64.f64 %fd679, %fd651; + setp.eq.f64 %p235, %fd679, 0d4010000000000000; + @%p235 bra $L__BB6_87; + + mov.f64 %fd1085, 0dFFF8000000000000; + +$L__BB6_87: + cvt.f64.f32 %fd1058, %f69; + add.f64 %fd1057, %fd1052, 0d4010000000000000; + selp.f64 %fd1086, %fd1085, %fd1057, %p158; + @%p19 bra $L__BB6_92; + + setp.eq.s32 %p237, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r366, %temp}, %fd651; + } + setp.eq.s32 %p238, %r366, 0; + and.pred %p239, %p237, %p238; + @%p239 bra $L__BB6_91; + bra.uni $L__BB6_89; + +$L__BB6_91: + mov.u32 %r370, 0; + mov.b64 %fd1086, {%r370, %r66}; + bra.uni $L__BB6_92; + +$L__BB6_89: + and.b32 %r367, %r49, 2147483647; + setp.ne.s32 %p240, %r367, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r368, %temp}, %fd41; + } + setp.ne.s32 %p241, %r368, 0; + or.pred %p242, %p240, %p241; + mov.f64 %fd1086, %fd1085; + @%p242 bra $L__BB6_92; + + mov.u32 %r369, 0; + mov.b64 %fd1086, {%r369, %r68}; + +$L__BB6_92: + selp.f64 %fd683, 0d3FF0000000000000, %fd1086, %p231; + cvt.f64.f32 %fd684, %f550; + fma.rn.f64 %fd685, %fd683, %fd684, %fd87; + cvt.rn.f32.f64 %f102, %fd685; + not.pred %p244, %p7; + mov.f64 %fd1088, %fd57; + @%p244 bra $L__BB6_94; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r371}, %fd57; + } + xor.b32 %r372, %r371, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r373, %temp}, %fd57; + } + mov.b64 %fd1088, {%r373, %r372}; + +$L__BB6_94: + setp.eq.f32 %p245, %f71, 0f00000000; + @%p245 bra $L__BB6_98; + bra.uni $L__BB6_95; + +$L__BB6_98: + mov.u32 %r374, 0; + selp.b32 %r376, %r64, 0, %p144; + or.b32 %r377, %r376, 2146435072; + selp.b32 %r378, %r377, %r376, %p146; + mov.b64 %fd1088, {%r374, %r378}; + bra.uni $L__BB6_99; + +$L__BB6_95: + setp.gt.s32 %p246, %r64, -1; + @%p246 bra $L__BB6_99; + + cvt.rzi.f64.f64 %fd687, %fd646; + setp.eq.f64 %p247, %fd687, 0d4000000000000000; + @%p247 bra $L__BB6_99; + + mov.f64 %fd1088, 0dFFF8000000000000; + +$L__BB6_99: + cvt.f64.f32 %fd1060, %f71; + add.f64 %fd1059, %fd1051, 0d4000000000000000; + selp.f64 %fd1089, %fd1088, %fd1059, %p161; + @%p20 bra $L__BB6_104; + + setp.eq.s32 %p251, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r379, %temp}, %fd646; + } + setp.eq.s32 %p252, %r379, 0; + and.pred %p253, %p251, %p252; + @%p253 bra $L__BB6_103; + bra.uni $L__BB6_101; + +$L__BB6_103: + mov.u32 %r386, 0; + mov.b64 %fd1089, {%r386, %r69}; + bra.uni $L__BB6_104; + +$L__BB6_101: + and.b32 %r380, %r64, 2147483647; + setp.ne.s32 %p254, %r380, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r381, %temp}, %fd42; + } + setp.ne.s32 %p255, %r381, 0; + or.pred %p256, %p254, %p255; + mov.f64 %fd1089, %fd1088; + @%p256 bra $L__BB6_104; + + and.pred %p258, %p155, %p7; + selp.b32 %r384, %r57, %r56, %p258; + mov.u32 %r385, 0; + mov.b64 %fd1089, {%r385, %r384}; + +$L__BB6_104: + not.pred %p259, %p8; + mov.f64 %fd1091, %fd58; + @%p259 bra $L__BB6_106; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r387}, %fd58; + } + xor.b32 %r388, %r387, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r389, %temp}, %fd58; + } + mov.b64 %fd1091, {%r389, %r388}; + +$L__BB6_106: + @%p245 bra $L__BB6_110; + bra.uni $L__BB6_107; + +$L__BB6_110: + mov.u32 %r390, 0; + selp.b32 %r392, %r64, 0, %p149; + or.b32 %r393, %r392, 2146435072; + selp.b32 %r394, %r393, %r392, %p152; + mov.b64 %fd1091, {%r390, %r394}; + bra.uni $L__BB6_111; + +$L__BB6_107: + setp.gt.s32 %p261, %r64, -1; + @%p261 bra $L__BB6_111; + + cvt.rzi.f64.f64 %fd692, %fd649; + setp.eq.f64 %p262, %fd692, 0d4008000000000000; + @%p262 bra $L__BB6_111; + + mov.f64 %fd1091, 0dFFF8000000000000; + +$L__BB6_111: + cvt.f64.f32 %fd1062, %f71; + add.f64 %fd1061, %fd1051, 0d4008000000000000; + selp.f64 %fd1092, %fd1091, %fd1061, %p168; + @%p21 bra $L__BB6_116; + + setp.eq.s32 %p266, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r395, %temp}, %fd649; + } + setp.eq.s32 %p267, %r395, 0; + and.pred %p268, %p266, %p267; + @%p268 bra $L__BB6_115; + bra.uni $L__BB6_113; + +$L__BB6_115: + mov.u32 %r402, 0; + mov.b64 %fd1092, {%r402, %r71}; + bra.uni $L__BB6_116; + +$L__BB6_113: + and.b32 %r396, %r64, 2147483647; + setp.ne.s32 %p269, %r396, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r397, %temp}, %fd42; + } + setp.ne.s32 %p270, %r397, 0; + or.pred %p271, %p269, %p270; + mov.f64 %fd1092, %fd1091; + @%p271 bra $L__BB6_116; + + setp.ne.s32 %p272, %r58, 1071644672; + and.pred %p273, %p272, %p8; + selp.b32 %r400, %r63, %r62, %p273; + mov.u32 %r401, 0; + mov.b64 %fd1092, {%r401, %r400}; + +$L__BB6_116: + setp.eq.f32 %p274, %f71, 0f3F800000; + selp.f64 %fd696, 0d3FF0000000000000, %fd1092, %p274; + cvt.f64.f32 %fd697, %f549; + add.f64 %fd698, %fd1089, 0d3FF0000000000000; + selp.f64 %fd699, 0d4000000000000000, %fd698, %p274; + fma.rn.f64 %fd112, %fd696, %fd697, %fd699; + not.pred %p275, %p9; + mov.f64 %fd1094, %fd59; + @%p275 bra $L__BB6_118; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r403}, %fd59; + } + xor.b32 %r404, %r403, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r405, %temp}, %fd59; + } + mov.b64 %fd1094, {%r405, %r404}; + +$L__BB6_118: + @%p245 bra $L__BB6_122; + bra.uni $L__BB6_119; + +$L__BB6_122: + mov.u32 %r406, 0; + mov.b64 %fd1094, {%r406, %r72}; + bra.uni $L__BB6_123; + +$L__BB6_119: + setp.gt.s32 %p277, %r64, -1; + @%p277 bra $L__BB6_123; + + cvt.rzi.f64.f64 %fd701, %fd651; + setp.eq.f64 %p278, %fd701, 0d4010000000000000; + @%p278 bra $L__BB6_123; + + mov.f64 %fd1094, 0dFFF8000000000000; + +$L__BB6_123: + cvt.f64.f32 %fd1064, %f71; + add.f64 %fd1063, %fd1051, 0d4010000000000000; + selp.f64 %fd1095, %fd1094, %fd1063, %p169; + @%p22 bra $L__BB6_128; + + setp.eq.s32 %p280, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r407, %temp}, %fd651; + } + setp.eq.s32 %p281, %r407, 0; + and.pred %p282, %p280, %p281; + @%p282 bra $L__BB6_127; + bra.uni $L__BB6_125; + +$L__BB6_127: + mov.u32 %r411, 0; + mov.b64 %fd1095, {%r411, %r74}; + bra.uni $L__BB6_128; + +$L__BB6_125: + and.b32 %r408, %r64, 2147483647; + setp.ne.s32 %p283, %r408, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r409, %temp}, %fd42; + } + setp.ne.s32 %p284, %r409, 0; + or.pred %p285, %p283, %p284; + mov.f64 %fd1095, %fd1094; + @%p285 bra $L__BB6_128; + + mov.u32 %r410, 0; + mov.b64 %fd1095, {%r410, %r75}; + +$L__BB6_128: + selp.f64 %fd705, 0d3FF0000000000000, %fd1095, %p274; + cvt.f64.f32 %fd706, %f551; + fma.rn.f64 %fd707, %fd705, %fd706, %fd112; + cvt.rn.f32.f64 %f103, %fd707; + sqrt.rn.f32 %f104, %f102; + mul.f32 %f105, %f104, %f547; + sqrt.rn.f32 %f106, %f103; + mul.f32 %f107, %f106, %f554; + mov.f32 %f658, 0f3F000000; + div.rn.f32 %f659, %f658, %f105; + div.rn.f32 %f660, %f659, %f105; + sqrt.rn.f32 %f108, %f660; + mul.f32 %f109, %f108, %f87; + abs.f32 %f661, %f109; + setp.ltu.f32 %p287, %f661, 0f3F8060FE; + setp.ge.f32 %p288, %f661, 0f3F8060FE; + mul.f32 %f662, %f109, %f109; + selp.f32 %f663, %f661, %f662, %p288; + selp.f32 %f664, 0f3789CA3C, 0f38B1E96A, %p288; + selp.f32 %f665, 0fB9F560B9, 0fBA574D20, %p288; + fma.rn.f32 %f666, %f664, %f663, %f665; + selp.f32 %f667, 0f3BAC840B, 0f3BAAD5EA, %p288; + fma.rn.f32 %f668, %f666, %f663, %f667; + selp.f32 %f669, 0fBD0C8162, 0fBCDC1BE7, %p288; + fma.rn.f32 %f670, %f668, %f663, %f669; + selp.f32 %f671, 0f3E1CF906, 0f3DE718AF, %p288; + fma.rn.f32 %f672, %f670, %f663, %f671; + selp.f32 %f673, 0f3F6A937E, 0fBEC093AC, %p288; + fma.rn.f32 %f674, %f672, %f663, %f673; + selp.f32 %f675, 0f3F20D842, 0f3E0375D3, %p288; + fma.rn.f32 %f676, %f674, %f663, %f675; + neg.f32 %f677, %f661; + selp.f32 %f678, %f677, %f109, %p288; + fma.rn.f32 %f3244, %f676, %f678, %f678; + @%p287 bra $L__BB6_130; + + mov.f32 %f3147, 0f3F800000; + ex2.approx.ftz.f32 %f679, %f3244; + sub.f32 %f681, %f3147, %f679; + mov.b32 %r412, %f681; + mov.b32 %r413, %f109; + and.b32 %r414, %r413, -2147483648; + or.b32 %r415, %r414, %r412; + mov.b32 %f3244, %r415; + +$L__BB6_130: + mul.f32 %f113, %f108, %f88; + abs.f32 %f682, %f113; + setp.ltu.f32 %p289, %f682, 0f3F8060FE; + setp.ge.f32 %p290, %f682, 0f3F8060FE; + mul.f32 %f683, %f113, %f113; + selp.f32 %f684, %f682, %f683, %p290; + selp.f32 %f685, 0f3789CA3C, 0f38B1E96A, %p290; + selp.f32 %f686, 0fB9F560B9, 0fBA574D20, %p290; + fma.rn.f32 %f687, %f685, %f684, %f686; + selp.f32 %f688, 0f3BAC840B, 0f3BAAD5EA, %p290; + fma.rn.f32 %f689, %f687, %f684, %f688; + selp.f32 %f690, 0fBD0C8162, 0fBCDC1BE7, %p290; + fma.rn.f32 %f691, %f689, %f684, %f690; + selp.f32 %f692, 0f3E1CF906, 0f3DE718AF, %p290; + fma.rn.f32 %f693, %f691, %f684, %f692; + selp.f32 %f694, 0f3F6A937E, 0fBEC093AC, %p290; + fma.rn.f32 %f695, %f693, %f684, %f694; + selp.f32 %f696, 0f3F20D842, 0f3E0375D3, %p290; + fma.rn.f32 %f697, %f695, %f684, %f696; + neg.f32 %f698, %f682; + selp.f32 %f699, %f698, %f113, %p290; + fma.rn.f32 %f3245, %f697, %f699, %f699; + @%p289 bra $L__BB6_132; + + mov.f32 %f3146, 0f3F800000; + ex2.approx.ftz.f32 %f700, %f3245; + sub.f32 %f702, %f3146, %f700; + mov.b32 %r416, %f702; + mov.b32 %r417, %f113; + and.b32 %r418, %r417, -2147483648; + or.b32 %r419, %r418, %r416; + mov.b32 %f3245, %r419; + +$L__BB6_132: + mov.f32 %f3070, 0f3F000000; + sub.f32 %f703, %f3244, %f3245; + mul.f32 %f117, %f703, 0f3F000000; + div.rn.f32 %f705, %f3070, %f107; + div.rn.f32 %f706, %f705, %f107; + cvt.rn.f32.s32 %f118, %r1375; + sub.f32 %f119, %f118, %f3294; + add.f32 %f120, %f119, 0f3F000000; + sqrt.rn.f32 %f121, %f706; + mul.f32 %f122, %f121, %f120; + abs.f32 %f707, %f122; + setp.ltu.f32 %p291, %f707, 0f3F8060FE; + setp.ge.f32 %p292, %f707, 0f3F8060FE; + mul.f32 %f708, %f122, %f122; + selp.f32 %f709, %f707, %f708, %p292; + selp.f32 %f710, 0f3789CA3C, 0f38B1E96A, %p292; + selp.f32 %f711, 0fB9F560B9, 0fBA574D20, %p292; + fma.rn.f32 %f712, %f710, %f709, %f711; + selp.f32 %f713, 0f3BAC840B, 0f3BAAD5EA, %p292; + fma.rn.f32 %f714, %f712, %f709, %f713; + selp.f32 %f715, 0fBD0C8162, 0fBCDC1BE7, %p292; + fma.rn.f32 %f716, %f714, %f709, %f715; + selp.f32 %f717, 0f3E1CF906, 0f3DE718AF, %p292; + fma.rn.f32 %f718, %f716, %f709, %f717; + selp.f32 %f719, 0f3F6A937E, 0fBEC093AC, %p292; + fma.rn.f32 %f720, %f718, %f709, %f719; + selp.f32 %f721, 0f3F20D842, 0f3E0375D3, %p292; + fma.rn.f32 %f722, %f720, %f709, %f721; + neg.f32 %f723, %f707; + selp.f32 %f724, %f723, %f122, %p292; + fma.rn.f32 %f3246, %f722, %f724, %f724; + @%p291 bra $L__BB6_134; + + mov.f32 %f3145, 0f3F800000; + ex2.approx.ftz.f32 %f725, %f3246; + sub.f32 %f727, %f3145, %f725; + mov.b32 %r420, %f727; + mov.b32 %r421, %f122; + and.b32 %r422, %r421, -2147483648; + or.b32 %r423, %r422, %r420; + mov.b32 %f3246, %r423; + +$L__BB6_134: + cvt.rn.f32.s32 %f3072, %r1375; + sub.f32 %f3071, %f3072, %f3294; + add.f32 %f126, %f3071, 0fBF000000; + mul.f32 %f127, %f121, %f126; + abs.f32 %f728, %f127; + setp.ltu.f32 %p293, %f728, 0f3F8060FE; + setp.ge.f32 %p294, %f728, 0f3F8060FE; + mul.f32 %f729, %f127, %f127; + selp.f32 %f730, %f728, %f729, %p294; + selp.f32 %f731, 0f3789CA3C, 0f38B1E96A, %p294; + selp.f32 %f732, 0fB9F560B9, 0fBA574D20, %p294; + fma.rn.f32 %f733, %f731, %f730, %f732; + selp.f32 %f734, 0f3BAC840B, 0f3BAAD5EA, %p294; + fma.rn.f32 %f735, %f733, %f730, %f734; + selp.f32 %f736, 0fBD0C8162, 0fBCDC1BE7, %p294; + fma.rn.f32 %f737, %f735, %f730, %f736; + selp.f32 %f738, 0f3E1CF906, 0f3DE718AF, %p294; + fma.rn.f32 %f739, %f737, %f730, %f738; + selp.f32 %f740, 0f3F6A937E, 0fBEC093AC, %p294; + fma.rn.f32 %f741, %f739, %f730, %f740; + selp.f32 %f742, 0f3F20D842, 0f3E0375D3, %p294; + fma.rn.f32 %f743, %f741, %f730, %f742; + neg.f32 %f744, %f728; + selp.f32 %f745, %f744, %f127, %p294; + fma.rn.f32 %f3247, %f743, %f745, %f745; + @%p293 bra $L__BB6_136; + + mov.f32 %f3144, 0f3F800000; + ex2.approx.ftz.f32 %f746, %f3247; + sub.f32 %f748, %f3144, %f746; + mov.b32 %r424, %f748; + mov.b32 %r425, %f127; + and.b32 %r426, %r425, -2147483648; + or.b32 %r427, %r426, %r424; + mov.b32 %f3247, %r427; + +$L__BB6_136: + cvt.rn.f32.s32 %f3075, %r1374; + add.f32 %f3074, %f3075, 0f3F000000; + sub.f32 %f3073, %f3074, %f3295; + sub.f32 %f750, %f3246, %f3247; + mul.f32 %f131, %f750, 0f3F000000; + div.rn.f32 %f132, %f3073, %f105; + abs.f32 %f133, %f132; + setp.lt.f32 %p295, %f133, 0f00800000; + mul.f32 %f751, %f133, 0f4B800000; + selp.f32 %f752, %f751, %f133, %p295; + selp.f32 %f753, 0fC3170000, 0fC2FE0000, %p295; + mov.b32 %r428, %f752; + and.b32 %r429, %r428, 8388607; + or.b32 %r430, %r429, 1065353216; + mov.b32 %f754, %r430; + shr.u32 %r431, %r428, 23; + cvt.rn.f32.u32 %f755, %r431; + add.f32 %f756, %f753, %f755; + setp.gt.f32 %p296, %f754, 0f3FB504F3; + mul.f32 %f757, %f754, 0f3F000000; + add.f32 %f758, %f756, 0f3F800000; + selp.f32 %f759, %f758, %f756, %p296; + selp.f32 %f760, %f757, %f754, %p296; + add.f32 %f761, %f760, 0fBF800000; + add.f32 %f762, %f760, 0f3F800000; + rcp.approx.ftz.f32 %f763, %f762; + add.f32 %f764, %f761, %f761; + mul.f32 %f766, %f764, %f763; + mul.f32 %f767, %f766, %f766; + mov.f32 %f768, 0f3C4CAF63; + mov.f32 %f769, 0f3B18F0FE; + fma.rn.f32 %f770, %f769, %f767, %f768; + mov.f32 %f771, 0f3DAAAABD; + fma.rn.f32 %f772, %f770, %f767, %f771; + mul.rn.f32 %f773, %f772, %f767; + mul.rn.f32 %f774, %f773, %f766; + sub.f32 %f775, %f761, %f766; + add.f32 %f776, %f775, %f775; + neg.f32 %f777, %f766; + fma.rn.f32 %f778, %f777, %f761, %f776; + mul.rn.f32 %f779, %f763, %f778; + add.f32 %f780, %f774, %f766; + sub.f32 %f781, %f766, %f780; + add.f32 %f782, %f774, %f781; + add.f32 %f783, %f779, %f782; + add.f32 %f784, %f780, %f783; + sub.f32 %f785, %f780, %f784; + add.f32 %f786, %f783, %f785; + mov.f32 %f787, 0f3F317200; + mul.rn.f32 %f788, %f759, %f787; + mov.f32 %f789, 0f35BFBE8E; + mul.rn.f32 %f790, %f759, %f789; + add.f32 %f791, %f788, %f784; + sub.f32 %f792, %f788, %f791; + add.f32 %f793, %f784, %f792; + add.f32 %f794, %f786, %f793; + add.f32 %f795, %f790, %f794; + add.f32 %f796, %f791, %f795; + sub.f32 %f797, %f791, %f796; + add.f32 %f798, %f795, %f797; + mul.rn.f32 %f799, %f613, %f796; + neg.f32 %f800, %f799; + fma.rn.f32 %f801, %f613, %f796, %f800; + fma.rn.f32 %f802, %f613, %f798, %f801; + mov.f32 %f3279, 0f00000000; + fma.rn.f32 %f804, %f3279, %f796, %f802; + add.rn.f32 %f805, %f799, %f804; + neg.f32 %f806, %f805; + add.rn.f32 %f807, %f799, %f806; + add.rn.f32 %f808, %f807, %f804; + mov.b32 %r432, %f805; + setp.eq.s32 %p297, %r432, 1118925336; + add.s32 %r433, %r432, -1; + mov.b32 %f809, %r433; + add.f32 %f810, %f808, 0f37000000; + selp.f32 %f134, %f810, %f808, %p297; + selp.f32 %f811, %f809, %f805, %p297; + mov.f32 %f812, 0f3FB8AA3B; + mul.rn.f32 %f813, %f811, %f812; + cvt.rzi.f32.f32 %f814, %f813; + abs.f32 %f815, %f814; + setp.gt.f32 %p298, %f815, 0f42FC0000; + mov.b32 %r434, %f814; + and.b32 %r435, %r434, -2147483648; + or.b32 %r436, %r435, 1123811328; + mov.b32 %f816, %r436; + selp.f32 %f817, %f816, %f814, %p298; + mov.f32 %f818, 0fBF317218; + fma.rn.f32 %f819, %f817, %f818, %f811; + mov.f32 %f820, 0f3102E308; + fma.rn.f32 %f821, %f817, %f820, %f819; + mul.f32 %f822, %f821, 0f3FB8AA3B; + add.f32 %f823, %f817, 0f4B40007F; + mov.b32 %r437, %f823; + shl.b32 %r438, %r437, 23; + mov.b32 %f824, %r438; + ex2.approx.ftz.f32 %f825, %f822; + mul.f32 %f135, %f825, %f824; + setp.eq.f32 %p299, %f135, 0f7F800000; + mov.f32 %f3248, 0f7F800000; + @%p299 bra $L__BB6_138; + + fma.rn.f32 %f3248, %f135, %f134, %f135; + +$L__BB6_138: + mov.f32 %f3080, 0f3F800000; + cvt.rzi.f32.f32 %f3079, %f3080; + add.f32 %f3078, %f3079, %f3079; + sub.f32 %f3077, %f613, %f3078; + abs.f32 %f3076, %f3077; + setp.lt.f32 %p300, %f132, 0f00000000; + setp.eq.f32 %p301, %f3076, 0f3F800000; + and.pred %p30, %p300, %p301; + setp.eq.f32 %p302, %f132, 0f00000000; + @%p302 bra $L__BB6_142; + bra.uni $L__BB6_139; + +$L__BB6_142: + add.f32 %f830, %f132, %f132; + selp.f32 %f3250, %f830, 0f00000000, %p301; + bra.uni $L__BB6_143; + +$L__BB6_139: + mov.b32 %r439, %f3248; + xor.b32 %r440, %r439, -2147483648; + mov.b32 %f826, %r440; + selp.f32 %f3250, %f826, %f3248, %p30; + setp.geu.f32 %p303, %f132, 0f00000000; + @%p303 bra $L__BB6_143; + + cvt.rzi.f32.f32 %f828, %f613; + setp.eq.f32 %p304, %f828, 0f40000000; + @%p304 bra $L__BB6_143; + + mov.f32 %f3250, 0f7FFFFFFF; + +$L__BB6_143: + add.f32 %f831, %f133, 0f40000000; + mov.b32 %r441, %f831; + setp.lt.s32 %p306, %r441, 2139095040; + @%p306 bra $L__BB6_148; + + setp.gtu.f32 %p307, %f133, 0f7F800000; + @%p307 bra $L__BB6_147; + bra.uni $L__BB6_145; + +$L__BB6_147: + add.f32 %f3250, %f132, 0f40000000; + bra.uni $L__BB6_148; + +$L__BB6_145: + setp.neu.f32 %p308, %f133, 0f7F800000; + @%p308 bra $L__BB6_148; + + selp.f32 %f3250, 0fFF800000, 0f7F800000, %p30; + +$L__BB6_148: + mov.f32 %f3088, 0f3102E308; + mov.f32 %f3087, 0fBF317218; + mov.f32 %f3086, 0f35BFBE8E; + mov.f32 %f3085, 0f3F317200; + mov.f32 %f3084, 0f3DAAAABD; + mov.f32 %f3083, 0f3C4CAF63; + mov.f32 %f3082, 0f3B18F0FE; + mov.f32 %f3081, 0f3F000000; + mul.f32 %f833, %f3250, 0fBF000000; + setp.eq.f32 %p309, %f132, 0f3F800000; + selp.f32 %f834, 0fBF000000, %f833, %p309; + mov.f32 %f836, 0f3BBB989D; + fma.rn.f32 %f837, %f834, %f836, %f3081; + mov.f32 %f839, 0f437C0000; + cvt.sat.f32.f32 %f840, %f837; + mov.f32 %f841, 0f4B400001; + fma.rm.f32 %f842, %f840, %f839, %f841; + add.f32 %f843, %f842, 0fCB40007F; + neg.f32 %f844, %f843; + fma.rn.f32 %f845, %f834, %f812, %f844; + mov.f32 %f846, 0f32A57060; + fma.rn.f32 %f847, %f834, %f846, %f845; + mov.b32 %r442, %f842; + shl.b32 %r443, %r442, 23; + mov.b32 %f848, %r443; + ex2.approx.ftz.f32 %f849, %f847; + mul.f32 %f144, %f849, %f848; + div.rn.f32 %f145, %f88, %f105; + abs.f32 %f146, %f145; + setp.lt.f32 %p310, %f146, 0f00800000; + mul.f32 %f850, %f146, 0f4B800000; + selp.f32 %f851, %f850, %f146, %p310; + selp.f32 %f852, 0fC3170000, 0fC2FE0000, %p310; + mov.b32 %r444, %f851; + and.b32 %r445, %r444, 8388607; + or.b32 %r446, %r445, 1065353216; + mov.b32 %f853, %r446; + shr.u32 %r447, %r444, 23; + cvt.rn.f32.u32 %f854, %r447; + add.f32 %f855, %f852, %f854; + setp.gt.f32 %p311, %f853, 0f3FB504F3; + mul.f32 %f856, %f853, 0f3F000000; + add.f32 %f857, %f855, 0f3F800000; + selp.f32 %f858, %f857, %f855, %p311; + selp.f32 %f859, %f856, %f853, %p311; + add.f32 %f860, %f859, 0fBF800000; + add.f32 %f861, %f859, 0f3F800000; + rcp.approx.ftz.f32 %f862, %f861; + add.f32 %f863, %f860, %f860; + mul.f32 %f865, %f863, %f862; + mul.f32 %f866, %f865, %f865; + fma.rn.f32 %f869, %f3082, %f866, %f3083; + fma.rn.f32 %f871, %f869, %f866, %f3084; + mul.rn.f32 %f872, %f871, %f866; + mul.rn.f32 %f873, %f872, %f865; + sub.f32 %f874, %f860, %f865; + add.f32 %f875, %f874, %f874; + neg.f32 %f876, %f865; + fma.rn.f32 %f877, %f876, %f860, %f875; + mul.rn.f32 %f878, %f862, %f877; + add.f32 %f879, %f873, %f865; + sub.f32 %f880, %f865, %f879; + add.f32 %f881, %f873, %f880; + add.f32 %f882, %f878, %f881; + add.f32 %f883, %f879, %f882; + sub.f32 %f884, %f879, %f883; + add.f32 %f885, %f882, %f884; + mul.rn.f32 %f887, %f858, %f3085; + mul.rn.f32 %f889, %f858, %f3086; + add.f32 %f890, %f887, %f883; + sub.f32 %f891, %f887, %f890; + add.f32 %f892, %f883, %f891; + add.f32 %f893, %f885, %f892; + add.f32 %f894, %f889, %f893; + add.f32 %f895, %f890, %f894; + sub.f32 %f896, %f890, %f895; + add.f32 %f897, %f894, %f896; + mul.rn.f32 %f898, %f613, %f895; + neg.f32 %f899, %f898; + fma.rn.f32 %f900, %f613, %f895, %f899; + fma.rn.f32 %f901, %f613, %f897, %f900; + fma.rn.f32 %f903, %f3279, %f895, %f901; + add.rn.f32 %f904, %f898, %f903; + neg.f32 %f905, %f904; + add.rn.f32 %f906, %f898, %f905; + add.rn.f32 %f907, %f906, %f903; + mov.b32 %r448, %f904; + setp.eq.s32 %p312, %r448, 1118925336; + add.s32 %r449, %r448, -1; + mov.b32 %f908, %r449; + add.f32 %f909, %f907, 0f37000000; + selp.f32 %f147, %f909, %f907, %p312; + selp.f32 %f910, %f908, %f904, %p312; + mul.rn.f32 %f911, %f910, %f812; + cvt.rzi.f32.f32 %f912, %f911; + abs.f32 %f913, %f912; + setp.gt.f32 %p313, %f913, 0f42FC0000; + mov.b32 %r450, %f912; + and.b32 %r451, %r450, -2147483648; + or.b32 %r452, %r451, 1123811328; + mov.b32 %f914, %r452; + selp.f32 %f915, %f914, %f912, %p313; + fma.rn.f32 %f917, %f915, %f3087, %f910; + fma.rn.f32 %f919, %f915, %f3088, %f917; + mul.f32 %f920, %f919, 0f3FB8AA3B; + add.f32 %f921, %f915, 0f4B40007F; + mov.b32 %r453, %f921; + shl.b32 %r454, %r453, 23; + mov.b32 %f922, %r454; + ex2.approx.ftz.f32 %f923, %f920; + mul.f32 %f148, %f923, %f922; + setp.eq.f32 %p314, %f148, 0f7F800000; + mov.f32 %f3251, 0f7F800000; + @%p314 bra $L__BB6_150; + + fma.rn.f32 %f3251, %f148, %f147, %f148; + +$L__BB6_150: + setp.lt.f32 %p315, %f145, 0f00000000; + and.pred %p31, %p315, %p301; + setp.eq.f32 %p317, %f145, 0f00000000; + @%p317 bra $L__BB6_154; + bra.uni $L__BB6_151; + +$L__BB6_154: + add.f32 %f928, %f145, %f145; + selp.f32 %f3253, %f928, 0f00000000, %p301; + bra.uni $L__BB6_155; + +$L__BB6_151: + mov.b32 %r455, %f3251; + xor.b32 %r456, %r455, -2147483648; + mov.b32 %f924, %r456; + selp.f32 %f3253, %f924, %f3251, %p31; + setp.geu.f32 %p318, %f145, 0f00000000; + @%p318 bra $L__BB6_155; + + cvt.rzi.f32.f32 %f926, %f613; + setp.eq.f32 %p319, %f926, 0f40000000; + @%p319 bra $L__BB6_155; + + mov.f32 %f3253, 0f7FFFFFFF; + +$L__BB6_155: + add.f32 %f929, %f146, 0f40000000; + mov.b32 %r457, %f929; + setp.lt.s32 %p321, %r457, 2139095040; + @%p321 bra $L__BB6_160; + + setp.gtu.f32 %p322, %f146, 0f7F800000; + @%p322 bra $L__BB6_159; + bra.uni $L__BB6_157; + +$L__BB6_159: + add.f32 %f3253, %f145, 0f40000000; + bra.uni $L__BB6_160; + +$L__BB6_157: + setp.neu.f32 %p323, %f146, 0f7F800000; + @%p323 bra $L__BB6_160; + + selp.f32 %f3253, 0fFF800000, 0f7F800000, %p31; + +$L__BB6_160: + mov.f32 %f3093, 0f32A57060; + mov.f32 %f3092, 0f4B400001; + mov.f32 %f3091, 0f437C0000; + mov.f32 %f3090, 0f3BBB989D; + mov.f32 %f3089, 0f3F000000; + mul.f32 %f930, %f3253, 0fBF000000; + setp.eq.f32 %p324, %f145, 0f3F800000; + selp.f32 %f931, 0fBF000000, %f930, %p324; + fma.rn.f32 %f934, %f931, %f3090, %f3089; + cvt.sat.f32.f32 %f937, %f934; + fma.rm.f32 %f939, %f937, %f3091, %f3092; + add.f32 %f940, %f939, 0fCB40007F; + neg.f32 %f941, %f940; + fma.rn.f32 %f942, %f931, %f812, %f941; + fma.rn.f32 %f944, %f931, %f3093, %f942; + mov.b32 %r458, %f939; + shl.b32 %r459, %r458, 23; + mov.b32 %f945, %r459; + ex2.approx.ftz.f32 %f946, %f944; + mul.f32 %f157, %f946, %f945; + sub.f32 %f947, %f144, %f157; + div.rn.f32 %f158, %f72, %f105; + mul.f32 %f948, %f158, %f947; + mul.f32 %f159, %f131, %f948; + cvt.f64.f32 %fd121, %f105; + { + .reg .b32 %temp; + mov.b64 {%temp, %r104}, %fd121; + } + abs.f64 %fd122, %fd121; + { // callseq 127, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd122; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1097, [retval0+0]; + } // callseq 127 + setp.lt.s32 %p325, %r104, 0; + and.pred %p32, %p325, %p149; + not.pred %p327, %p32; + @%p327 bra $L__BB6_162; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r460}, %fd1097; + } + xor.b32 %r461, %r460, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r462, %temp}, %fd1097; + } + mov.b64 %fd1097, {%r462, %r461}; + +$L__BB6_162: + setp.eq.f32 %p328, %f105, 0f00000000; + @%p328 bra $L__BB6_166; + bra.uni $L__BB6_163; + +$L__BB6_166: + mov.u32 %r463, 0; + selp.b32 %r464, %r104, 0, %p149; + or.b32 %r465, %r464, 2146435072; + selp.b32 %r466, %r465, %r464, %p152; + mov.b64 %fd1097, {%r463, %r466}; + bra.uni $L__BB6_167; + +$L__BB6_163: + setp.gt.s32 %p329, %r104, -1; + @%p329 bra $L__BB6_167; + + cvt.rzi.f64.f64 %fd710, %fd649; + setp.eq.f64 %p330, %fd710, 0d4008000000000000; + @%p330 bra $L__BB6_167; + + mov.f64 %fd1097, 0dFFF8000000000000; + +$L__BB6_167: + add.f64 %fd128, %fd121, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r467}, %fd128; + } + and.b32 %r468, %r467, 2146435072; + setp.ne.s32 %p333, %r468, 2146435072; + mov.f64 %fd1098, %fd1097; + @%p333 bra $L__BB6_173; + + setp.gtu.f64 %p334, %fd122, 0d7FF0000000000000; + mov.f64 %fd1098, %fd128; + @%p334 bra $L__BB6_173; + + setp.eq.s32 %p335, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r469, %temp}, %fd649; + } + setp.eq.s32 %p336, %r469, 0; + and.pred %p337, %p335, %p336; + @%p337 bra $L__BB6_172; + bra.uni $L__BB6_170; + +$L__BB6_172: + mov.u32 %r474, 0; + setp.gt.f64 %p344, %fd122, 0d3FF0000000000000; + selp.b32 %r475, 2146435072, 0, %p344; + xor.b32 %r476, %r475, 2146435072; + selp.b32 %r477, %r476, %r475, %p152; + setp.eq.f32 %p345, %f105, 0fBF800000; + selp.b32 %r478, 1072693248, %r477, %p345; + mov.b64 %fd1098, {%r474, %r478}; + bra.uni $L__BB6_173; + +$L__BB6_170: + { + .reg .b32 %temp; + mov.b64 {%r470, %temp}, %fd121; + } + and.b32 %r471, %r104, 2147483647; + setp.ne.s32 %p338, %r471, 2146435072; + setp.ne.s32 %p339, %r470, 0; + or.pred %p340, %p338, %p339; + mov.f64 %fd1098, %fd1097; + @%p340 bra $L__BB6_173; + + setp.ne.s32 %p341, %r58, 1071644672; + and.pred %p342, %p341, %p32; + selp.b32 %r472, %r63, %r62, %p342; + mov.u32 %r473, 0; + mov.b64 %fd1098, {%r473, %r472}; + +$L__BB6_173: + cvt.rn.f32.s32 %f3104, %r1375; + mov.f32 %f3103, 0f3102E308; + mov.f32 %f3102, 0fBF317218; + mov.f32 %f3101, 0f35BFBE8E; + mov.f32 %f3100, 0f3F317200; + mov.f32 %f3099, 0f3DAAAABD; + mov.f32 %f3098, 0f3C4CAF63; + mov.f32 %f3097, 0f3B18F0FE; + cvt.rn.f32.s32 %f3096, %r1374; + add.f32 %f3095, %f3096, 0f3F000000; + sub.f32 %f3094, %f3095, %f3295; + setp.eq.f32 %p346, %f105, 0f3F800000; + selp.f64 %fd713, 0d3FF0000000000000, %fd1098, %p346; + cvt.f64.f32 %fd714, %f72; + div.rn.f64 %fd715, %fd714, %fd713; + mul.f32 %f950, %f88, %f157; + mul.f32 %f951, %f3094, %f144; + sub.f32 %f952, %f951, %f950; + cvt.f64.f32 %fd716, %f952; + mul.f64 %fd717, %fd715, %fd716; + cvt.f64.f32 %fd718, %f131; + mul.f64 %fd719, %fd717, %fd718; + cvt.rn.f32.f64 %f160, %fd719; + add.f32 %f953, %f3104, 0f3F000000; + sub.f32 %f161, %f953, %f3294; + div.rn.f32 %f162, %f161, %f107; + abs.f32 %f163, %f162; + setp.lt.f32 %p347, %f163, 0f00800000; + mul.f32 %f954, %f163, 0f4B800000; + selp.f32 %f955, %f954, %f163, %p347; + selp.f32 %f956, 0fC3170000, 0fC2FE0000, %p347; + mov.b32 %r479, %f955; + and.b32 %r480, %r479, 8388607; + or.b32 %r481, %r480, 1065353216; + mov.b32 %f957, %r481; + shr.u32 %r482, %r479, 23; + cvt.rn.f32.u32 %f958, %r482; + add.f32 %f959, %f956, %f958; + setp.gt.f32 %p348, %f957, 0f3FB504F3; + mul.f32 %f960, %f957, 0f3F000000; + add.f32 %f961, %f959, 0f3F800000; + selp.f32 %f962, %f961, %f959, %p348; + selp.f32 %f963, %f960, %f957, %p348; + add.f32 %f964, %f963, 0fBF800000; + add.f32 %f965, %f963, 0f3F800000; + rcp.approx.ftz.f32 %f966, %f965; + add.f32 %f967, %f964, %f964; + mul.f32 %f969, %f967, %f966; + mul.f32 %f970, %f969, %f969; + fma.rn.f32 %f973, %f3097, %f970, %f3098; + fma.rn.f32 %f975, %f973, %f970, %f3099; + mul.rn.f32 %f976, %f975, %f970; + mul.rn.f32 %f977, %f976, %f969; + sub.f32 %f978, %f964, %f969; + add.f32 %f979, %f978, %f978; + neg.f32 %f980, %f969; + fma.rn.f32 %f981, %f980, %f964, %f979; + mul.rn.f32 %f982, %f966, %f981; + add.f32 %f983, %f977, %f969; + sub.f32 %f984, %f969, %f983; + add.f32 %f985, %f977, %f984; + add.f32 %f986, %f982, %f985; + add.f32 %f987, %f983, %f986; + sub.f32 %f988, %f983, %f987; + add.f32 %f989, %f986, %f988; + mul.rn.f32 %f991, %f962, %f3100; + mul.rn.f32 %f993, %f962, %f3101; + add.f32 %f994, %f991, %f987; + sub.f32 %f995, %f991, %f994; + add.f32 %f996, %f987, %f995; + add.f32 %f997, %f989, %f996; + add.f32 %f998, %f993, %f997; + add.f32 %f999, %f994, %f998; + sub.f32 %f1000, %f994, %f999; + add.f32 %f1001, %f998, %f1000; + mul.rn.f32 %f1002, %f613, %f999; + neg.f32 %f1003, %f1002; + fma.rn.f32 %f1004, %f613, %f999, %f1003; + fma.rn.f32 %f1005, %f613, %f1001, %f1004; + fma.rn.f32 %f1007, %f3279, %f999, %f1005; + add.rn.f32 %f1008, %f1002, %f1007; + neg.f32 %f1009, %f1008; + add.rn.f32 %f1010, %f1002, %f1009; + add.rn.f32 %f1011, %f1010, %f1007; + mov.b32 %r483, %f1008; + setp.eq.s32 %p349, %r483, 1118925336; + add.s32 %r484, %r483, -1; + mov.b32 %f1012, %r484; + add.f32 %f1013, %f1011, 0f37000000; + selp.f32 %f164, %f1013, %f1011, %p349; + selp.f32 %f1014, %f1012, %f1008, %p349; + mul.rn.f32 %f1016, %f1014, %f812; + cvt.rzi.f32.f32 %f1017, %f1016; + abs.f32 %f1018, %f1017; + setp.gt.f32 %p350, %f1018, 0f42FC0000; + mov.b32 %r485, %f1017; + and.b32 %r486, %r485, -2147483648; + or.b32 %r487, %r486, 1123811328; + mov.b32 %f1019, %r487; + selp.f32 %f1020, %f1019, %f1017, %p350; + fma.rn.f32 %f1022, %f1020, %f3102, %f1014; + fma.rn.f32 %f1024, %f1020, %f3103, %f1022; + mul.f32 %f1025, %f1024, 0f3FB8AA3B; + add.f32 %f1026, %f1020, 0f4B40007F; + mov.b32 %r488, %f1026; + shl.b32 %r489, %r488, 23; + mov.b32 %f1027, %r489; + ex2.approx.ftz.f32 %f1028, %f1025; + mul.f32 %f165, %f1028, %f1027; + setp.eq.f32 %p351, %f165, 0f7F800000; + mov.f32 %f3254, 0f7F800000; + @%p351 bra $L__BB6_175; + + fma.rn.f32 %f3254, %f165, %f164, %f165; + +$L__BB6_175: + setp.lt.f32 %p352, %f162, 0f00000000; + and.pred %p33, %p352, %p301; + setp.eq.f32 %p354, %f162, 0f00000000; + @%p354 bra $L__BB6_179; + bra.uni $L__BB6_176; + +$L__BB6_179: + add.f32 %f1033, %f162, %f162; + selp.f32 %f3256, %f1033, 0f00000000, %p301; + bra.uni $L__BB6_180; + +$L__BB6_176: + mov.b32 %r490, %f3254; + xor.b32 %r491, %r490, -2147483648; + mov.b32 %f1029, %r491; + selp.f32 %f3256, %f1029, %f3254, %p33; + setp.geu.f32 %p355, %f162, 0f00000000; + @%p355 bra $L__BB6_180; + + cvt.rzi.f32.f32 %f1031, %f613; + setp.eq.f32 %p356, %f1031, 0f40000000; + @%p356 bra $L__BB6_180; + + mov.f32 %f3256, 0f7FFFFFFF; + +$L__BB6_180: + abs.f32 %f3152, %f162; + add.f32 %f1034, %f3152, 0f40000000; + mov.b32 %r492, %f1034; + setp.lt.s32 %p358, %r492, 2139095040; + @%p358 bra $L__BB6_185; + + abs.f32 %f3156, %f162; + setp.gtu.f32 %p359, %f3156, 0f7F800000; + @%p359 bra $L__BB6_184; + bra.uni $L__BB6_182; + +$L__BB6_184: + add.f32 %f3256, %f162, 0f40000000; + bra.uni $L__BB6_185; + +$L__BB6_182: + abs.f32 %f3157, %f162; + setp.neu.f32 %p360, %f3157, 0f7F800000; + @%p360 bra $L__BB6_185; + + selp.f32 %f3256, 0fFF800000, 0f7F800000, %p33; + +$L__BB6_185: + mov.f32 %f3116, 0f32A57060; + mov.f32 %f3115, 0f4B400001; + mov.f32 %f3114, 0f437C0000; + mov.f32 %f3113, 0f3BBB989D; + mov.f32 %f3112, 0f3102E308; + mov.f32 %f3111, 0fBF317218; + mov.f32 %f3110, 0f35BFBE8E; + mov.f32 %f3109, 0f3F317200; + mov.f32 %f3108, 0f3DAAAABD; + mov.f32 %f3107, 0f3C4CAF63; + mov.f32 %f3106, 0f3B18F0FE; + mov.f32 %f3105, 0f3F000000; + mul.f32 %f1036, %f3256, 0fBF000000; + setp.eq.f32 %p361, %f162, 0f3F800000; + selp.f32 %f1037, 0fBF000000, %f1036, %p361; + fma.rn.f32 %f1040, %f1037, %f3113, %f3105; + cvt.sat.f32.f32 %f1043, %f1040; + fma.rm.f32 %f1045, %f1043, %f3114, %f3115; + add.f32 %f1046, %f1045, 0fCB40007F; + neg.f32 %f1047, %f1046; + fma.rn.f32 %f1048, %f1037, %f812, %f1047; + fma.rn.f32 %f1050, %f1037, %f3116, %f1048; + mov.b32 %r493, %f1045; + shl.b32 %r494, %r493, 23; + mov.b32 %f1051, %r494; + ex2.approx.ftz.f32 %f1052, %f1050; + mul.f32 %f174, %f1052, %f1051; + div.rn.f32 %f175, %f126, %f107; + abs.f32 %f176, %f175; + setp.lt.f32 %p362, %f176, 0f00800000; + mul.f32 %f1053, %f176, 0f4B800000; + selp.f32 %f1054, %f1053, %f176, %p362; + selp.f32 %f1055, 0fC3170000, 0fC2FE0000, %p362; + mov.b32 %r495, %f1054; + and.b32 %r496, %r495, 8388607; + or.b32 %r497, %r496, 1065353216; + mov.b32 %f1056, %r497; + shr.u32 %r498, %r495, 23; + cvt.rn.f32.u32 %f1057, %r498; + add.f32 %f1058, %f1055, %f1057; + setp.gt.f32 %p363, %f1056, 0f3FB504F3; + mul.f32 %f1059, %f1056, 0f3F000000; + add.f32 %f1060, %f1058, 0f3F800000; + selp.f32 %f1061, %f1060, %f1058, %p363; + selp.f32 %f1062, %f1059, %f1056, %p363; + add.f32 %f1063, %f1062, 0fBF800000; + add.f32 %f1064, %f1062, 0f3F800000; + rcp.approx.ftz.f32 %f1065, %f1064; + add.f32 %f1066, %f1063, %f1063; + mul.f32 %f1068, %f1066, %f1065; + mul.f32 %f1069, %f1068, %f1068; + fma.rn.f32 %f1072, %f3106, %f1069, %f3107; + fma.rn.f32 %f1074, %f1072, %f1069, %f3108; + mul.rn.f32 %f1075, %f1074, %f1069; + mul.rn.f32 %f1076, %f1075, %f1068; + sub.f32 %f1077, %f1063, %f1068; + add.f32 %f1078, %f1077, %f1077; + neg.f32 %f1079, %f1068; + fma.rn.f32 %f1080, %f1079, %f1063, %f1078; + mul.rn.f32 %f1081, %f1065, %f1080; + add.f32 %f1082, %f1076, %f1068; + sub.f32 %f1083, %f1068, %f1082; + add.f32 %f1084, %f1076, %f1083; + add.f32 %f1085, %f1081, %f1084; + add.f32 %f1086, %f1082, %f1085; + sub.f32 %f1087, %f1082, %f1086; + add.f32 %f1088, %f1085, %f1087; + mul.rn.f32 %f1090, %f1061, %f3109; + mul.rn.f32 %f1092, %f1061, %f3110; + add.f32 %f1093, %f1090, %f1086; + sub.f32 %f1094, %f1090, %f1093; + add.f32 %f1095, %f1086, %f1094; + add.f32 %f1096, %f1088, %f1095; + add.f32 %f1097, %f1092, %f1096; + add.f32 %f1098, %f1093, %f1097; + sub.f32 %f1099, %f1093, %f1098; + add.f32 %f1100, %f1097, %f1099; + mul.rn.f32 %f1101, %f613, %f1098; + neg.f32 %f1102, %f1101; + fma.rn.f32 %f1103, %f613, %f1098, %f1102; + fma.rn.f32 %f1104, %f613, %f1100, %f1103; + fma.rn.f32 %f1106, %f3279, %f1098, %f1104; + add.rn.f32 %f1107, %f1101, %f1106; + neg.f32 %f1108, %f1107; + add.rn.f32 %f1109, %f1101, %f1108; + add.rn.f32 %f1110, %f1109, %f1106; + mov.b32 %r499, %f1107; + setp.eq.s32 %p364, %r499, 1118925336; + add.s32 %r500, %r499, -1; + mov.b32 %f1111, %r500; + add.f32 %f1112, %f1110, 0f37000000; + selp.f32 %f177, %f1112, %f1110, %p364; + selp.f32 %f1113, %f1111, %f1107, %p364; + mul.rn.f32 %f1114, %f1113, %f812; + cvt.rzi.f32.f32 %f1115, %f1114; + abs.f32 %f1116, %f1115; + setp.gt.f32 %p365, %f1116, 0f42FC0000; + mov.b32 %r501, %f1115; + and.b32 %r502, %r501, -2147483648; + or.b32 %r503, %r502, 1123811328; + mov.b32 %f1117, %r503; + selp.f32 %f1118, %f1117, %f1115, %p365; + fma.rn.f32 %f1120, %f1118, %f3111, %f1113; + fma.rn.f32 %f1122, %f1118, %f3112, %f1120; + mul.f32 %f1123, %f1122, 0f3FB8AA3B; + add.f32 %f1124, %f1118, 0f4B40007F; + mov.b32 %r504, %f1124; + shl.b32 %r505, %r504, 23; + mov.b32 %f1125, %r505; + ex2.approx.ftz.f32 %f1126, %f1123; + mul.f32 %f178, %f1126, %f1125; + setp.eq.f32 %p366, %f178, 0f7F800000; + mov.f32 %f3257, 0f7F800000; + @%p366 bra $L__BB6_187; + + fma.rn.f32 %f3257, %f178, %f177, %f178; + +$L__BB6_187: + setp.lt.f32 %p367, %f175, 0f00000000; + and.pred %p34, %p367, %p301; + setp.eq.f32 %p369, %f175, 0f00000000; + @%p369 bra $L__BB6_191; + bra.uni $L__BB6_188; + +$L__BB6_191: + add.f32 %f1131, %f175, %f175; + selp.f32 %f3259, %f1131, 0f00000000, %p301; + bra.uni $L__BB6_192; + +$L__BB6_188: + mov.b32 %r506, %f3257; + xor.b32 %r507, %r506, -2147483648; + mov.b32 %f1127, %r507; + selp.f32 %f3259, %f1127, %f3257, %p34; + setp.geu.f32 %p370, %f175, 0f00000000; + @%p370 bra $L__BB6_192; + + cvt.rzi.f32.f32 %f1129, %f613; + setp.eq.f32 %p371, %f1129, 0f40000000; + @%p371 bra $L__BB6_192; + + mov.f32 %f3259, 0f7FFFFFFF; + +$L__BB6_192: + abs.f32 %f3158, %f175; + add.f32 %f1132, %f3158, 0f40000000; + mov.b32 %r508, %f1132; + setp.lt.s32 %p373, %r508, 2139095040; + @%p373 bra $L__BB6_197; + + abs.f32 %f3159, %f175; + setp.gtu.f32 %p374, %f3159, 0f7F800000; + @%p374 bra $L__BB6_196; + bra.uni $L__BB6_194; + +$L__BB6_196: + add.f32 %f3259, %f175, 0f40000000; + bra.uni $L__BB6_197; + +$L__BB6_194: + abs.f32 %f3160, %f175; + setp.neu.f32 %p375, %f3160, 0f7F800000; + @%p375 bra $L__BB6_197; + + selp.f32 %f3259, 0fFF800000, 0f7F800000, %p34; + +$L__BB6_197: + mov.f32 %f3121, 0f32A57060; + mov.f32 %f3120, 0f4B400001; + mov.f32 %f3119, 0f437C0000; + mov.f32 %f3118, 0f3BBB989D; + mov.f32 %f3117, 0f3F000000; + mul.f32 %f1133, %f3259, 0fBF000000; + setp.eq.f32 %p376, %f175, 0f3F800000; + selp.f32 %f1134, 0fBF000000, %f1133, %p376; + fma.rn.f32 %f1137, %f1134, %f3118, %f3117; + cvt.sat.f32.f32 %f1140, %f1137; + fma.rm.f32 %f1142, %f1140, %f3119, %f3120; + add.f32 %f1143, %f1142, 0fCB40007F; + neg.f32 %f1144, %f1143; + fma.rn.f32 %f1145, %f1134, %f812, %f1144; + fma.rn.f32 %f1147, %f1134, %f3121, %f1145; + mov.b32 %r509, %f1142; + shl.b32 %r510, %r509, 23; + mov.b32 %f1148, %r510; + ex2.approx.ftz.f32 %f1149, %f1147; + mul.f32 %f187, %f1149, %f1148; + sub.f32 %f1150, %f174, %f187; + div.rn.f32 %f188, %f72, %f107; + mul.f32 %f1151, %f188, %f1150; + mul.f32 %f189, %f117, %f1151; + cvt.f64.f32 %fd132, %f107; + { + .reg .b32 %temp; + mov.b64 {%temp, %r105}, %fd132; + } + abs.f64 %fd133, %fd132; + { // callseq 128, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd133; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1100, [retval0+0]; + } // callseq 128 + setp.lt.s32 %p377, %r105, 0; + and.pred %p35, %p377, %p149; + not.pred %p379, %p35; + @%p379 bra $L__BB6_199; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r511}, %fd1100; + } + xor.b32 %r512, %r511, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r513, %temp}, %fd1100; + } + mov.b64 %fd1100, {%r513, %r512}; + +$L__BB6_199: + setp.eq.f32 %p380, %f107, 0f00000000; + @%p380 bra $L__BB6_203; + bra.uni $L__BB6_200; + +$L__BB6_203: + mov.u32 %r514, 0; + selp.b32 %r515, %r105, 0, %p149; + or.b32 %r516, %r515, 2146435072; + selp.b32 %r517, %r516, %r515, %p152; + mov.b64 %fd1100, {%r514, %r517}; + bra.uni $L__BB6_204; + +$L__BB6_200: + setp.gt.s32 %p381, %r105, -1; + @%p381 bra $L__BB6_204; + + cvt.rzi.f64.f64 %fd722, %fd649; + setp.eq.f64 %p382, %fd722, 0d4008000000000000; + @%p382 bra $L__BB6_204; + + mov.f64 %fd1100, 0dFFF8000000000000; + +$L__BB6_204: + add.f64 %fd139, %fd132, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r518}, %fd139; + } + and.b32 %r519, %r518, 2146435072; + setp.ne.s32 %p385, %r519, 2146435072; + mov.f64 %fd1101, %fd1100; + @%p385 bra $L__BB6_210; + + setp.gtu.f64 %p386, %fd133, 0d7FF0000000000000; + mov.f64 %fd1101, %fd139; + @%p386 bra $L__BB6_210; + + setp.eq.s32 %p387, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r520, %temp}, %fd649; + } + setp.eq.s32 %p388, %r520, 0; + and.pred %p389, %p387, %p388; + @%p389 bra $L__BB6_209; + bra.uni $L__BB6_207; + +$L__BB6_209: + mov.u32 %r525, 0; + setp.gt.f64 %p396, %fd133, 0d3FF0000000000000; + selp.b32 %r526, 2146435072, 0, %p396; + xor.b32 %r527, %r526, 2146435072; + selp.b32 %r528, %r527, %r526, %p152; + setp.eq.f32 %p397, %f107, 0fBF800000; + selp.b32 %r529, 1072693248, %r528, %p397; + mov.b64 %fd1101, {%r525, %r529}; + bra.uni $L__BB6_210; + +$L__BB6_207: + { + .reg .b32 %temp; + mov.b64 {%r521, %temp}, %fd132; + } + and.b32 %r522, %r105, 2147483647; + setp.ne.s32 %p390, %r522, 2146435072; + setp.ne.s32 %p391, %r521, 0; + or.pred %p392, %p390, %p391; + mov.f64 %fd1101, %fd1100; + @%p392 bra $L__BB6_210; + + setp.ne.s32 %p393, %r58, 1071644672; + and.pred %p394, %p393, %p35; + selp.b32 %r523, %r63, %r62, %p394; + mov.u32 %r524, 0; + mov.b64 %fd1101, {%r524, %r523}; + +$L__BB6_210: + cvt.rn.f32.s32 %f3155, %r1375; + add.f32 %f3154, %f3155, 0f3F000000; + sub.f32 %f3153, %f3154, %f3294; + cvt.f64.f32 %fd1069, %f72; + cvt.rn.f32.s32 %f3129, %r1374; + mov.f32 %f3128, 0f3102E308; + mov.f32 %f3127, 0fBF317218; + mov.f32 %f3126, 0f35BFBE8E; + mov.f32 %f3125, 0f3F317200; + mov.f32 %f3124, 0f3DAAAABD; + mov.f32 %f3123, 0f3C4CAF63; + mov.f32 %f3122, 0f3B18F0FE; + setp.eq.f32 %p398, %f107, 0f3F800000; + selp.f64 %fd725, 0d3FF0000000000000, %fd1101, %p398; + div.rn.f64 %fd727, %fd1069, %fd725; + mul.f32 %f1153, %f126, %f187; + mul.f32 %f1154, %f3153, %f174; + sub.f32 %f1155, %f1154, %f1153; + cvt.f64.f32 %fd728, %f1155; + mul.f64 %fd729, %fd727, %fd728; + cvt.f64.f32 %fd730, %f117; + mul.f64 %fd731, %fd729, %fd730; + cvt.rn.f32.f64 %f190, %fd731; + add.f32 %f1156, %f3129, 0f3F800000; + sub.f32 %f1157, %f1156, %f3295; + div.rn.f32 %f191, %f1157, %f105; + abs.f32 %f192, %f191; + setp.lt.f32 %p399, %f192, 0f00800000; + mul.f32 %f1158, %f192, 0f4B800000; + selp.f32 %f1159, %f1158, %f192, %p399; + selp.f32 %f1160, 0fC3170000, 0fC2FE0000, %p399; + mov.b32 %r530, %f1159; + and.b32 %r531, %r530, 8388607; + or.b32 %r532, %r531, 1065353216; + mov.b32 %f1161, %r532; + shr.u32 %r533, %r530, 23; + cvt.rn.f32.u32 %f1162, %r533; + add.f32 %f1163, %f1160, %f1162; + setp.gt.f32 %p400, %f1161, 0f3FB504F3; + mul.f32 %f1164, %f1161, 0f3F000000; + add.f32 %f1165, %f1163, 0f3F800000; + selp.f32 %f1166, %f1165, %f1163, %p400; + selp.f32 %f1167, %f1164, %f1161, %p400; + add.f32 %f1168, %f1167, 0fBF800000; + add.f32 %f1169, %f1167, 0f3F800000; + rcp.approx.ftz.f32 %f1170, %f1169; + add.f32 %f1171, %f1168, %f1168; + mul.f32 %f1173, %f1171, %f1170; + mul.f32 %f1174, %f1173, %f1173; + fma.rn.f32 %f1177, %f3122, %f1174, %f3123; + fma.rn.f32 %f1179, %f1177, %f1174, %f3124; + mul.rn.f32 %f1180, %f1179, %f1174; + mul.rn.f32 %f1181, %f1180, %f1173; + sub.f32 %f1182, %f1168, %f1173; + add.f32 %f1183, %f1182, %f1182; + neg.f32 %f1184, %f1173; + fma.rn.f32 %f1185, %f1184, %f1168, %f1183; + mul.rn.f32 %f1186, %f1170, %f1185; + add.f32 %f1187, %f1181, %f1173; + sub.f32 %f1188, %f1173, %f1187; + add.f32 %f1189, %f1181, %f1188; add.f32 %f1190, %f1186, %f1189; - add.f32 %f1191, %f154, %f1190; - add.f32 %f1192, %f1187, %f1191; - sub.f32 %f1193, %f1187, %f1192; - add.f32 %f1194, %f1191, %f1193; - mul.rn.f32 %f1196, %f900, %f1192; - neg.f32 %f1197, %f1196; - fma.rn.f32 %f1198, %f900, %f1192, %f1197; - fma.rn.f32 %f1199, %f900, %f1194, %f1198; - mov.f32 %f3354, 0f00000000; - fma.rn.f32 %f1201, %f3354, %f1192, %f1199; - add.rn.f32 %f1202, %f1196, %f1201; - neg.f32 %f1203, %f1202; - add.rn.f32 %f1204, %f1196, %f1203; - add.rn.f32 %f1205, %f1204, %f1201; - mov.b32 %r158, %f1202; - setp.eq.s32 %p81, %r158, 1118925336; - add.s32 %r159, %r158, -1; - mov.b32 %f1206, %r159; - add.f32 %f1207, %f1205, 0f37000000; - selp.f32 %f1208, %f1206, %f1202, %p81; - selp.f32 %f231, %f1207, %f1205, %p81; - mul.f32 %f1209, %f1208, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1210, %f1209; - mov.f32 %f1211, 0fBF317200; - fma.rn.f32 %f1212, %f1210, %f1211, %f1208; - mov.f32 %f1213, 0fB5BFBE8E; - fma.rn.f32 %f1214, %f1210, %f1213, %f1212; - mul.f32 %f1215, %f1214, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1216, %f1215; - add.f32 %f1217, %f1210, 0f00000000; - ex2.approx.f32 %f1218, %f1217; - mul.f32 %f1219, %f1216, %f1218; - setp.lt.f32 %p82, %f1208, 0fC2D20000; - selp.f32 %f1220, 0f00000000, %f1219, %p82; - setp.gt.f32 %p83, %f1208, 0f42D20000; - selp.f32 %f3329, 0f7F800000, %f1220, %p83; - setp.eq.f32 %p84, %f3329, 0f7F800000; - @%p84 bra BB6_55; - - fma.rn.f32 %f3329, %f3329, %f231, %f3329; - -BB6_55: - setp.geu.f32 %p379, %f147, 0f00000000; - mov.b32 %r160, %f3329; - xor.b32 %r161, %r160, -2147483648; - mov.b32 %f1221, %r161; - selp.f32 %f235, %f1221, %f3329, %p3; - setp.eq.f32 %p85, %f147, 0f00000000; - selp.f32 %f3330, %f155, %f235, %p85; - @%p379 bra BB6_57; - - cvt.rzi.f32.f32 %f1223, %f900; - setp.neu.f32 %p86, %f1223, 0f40000000; - selp.f32 %f3330, 0f7FFFFFFF, %f235, %p86; - -BB6_57: - abs.f32 %f3205, %f147; - add.f32 %f3204, %f3205, 0f40000000; - mov.b32 %r305, %f3204; - mov.f32 %f3203, 0f3DAAAABD; - mov.f32 %f3202, 0f3C4CAF63; - mov.f32 %f3201, 0f3B18F0FE; - mov.f32 %f3200, 0fB5BFBE8E; - mov.f32 %f3199, 0fBF317200; - add.f32 %f1226, %f147, 0f40000000; - setp.gtu.f32 %p87, %f3205, 0f7F800000; - selp.f32 %f1227, %f1226, %f3330, %p87; - selp.f32 %f1228, 0fFF800000, 0f7F800000, %p3; - setp.neu.f32 %p88, %f3205, 0f7F800000; - selp.f32 %f1229, %f1227, %f1228, %p88; - setp.gt.s32 %p89, %r305, 2139095039; - selp.f32 %f1230, %f1229, %f3330, %p89; - mul.f32 %f1231, %f1230, 0fBF000000; - setp.eq.f32 %p90, %f147, 0f3F800000; - selp.f32 %f1232, 0fBF000000, %f1231, %p90; - mul.f32 %f1233, %f1232, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1234, %f1233; - fma.rn.f32 %f1236, %f1234, %f3199, %f1232; - fma.rn.f32 %f1238, %f1234, %f3200, %f1236; - mul.f32 %f1239, %f1238, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1240, %f1239; - add.f32 %f1241, %f1234, 0f00000000; - ex2.approx.f32 %f1242, %f1241; - mul.f32 %f1243, %f1240, %f1242; - setp.lt.f32 %p91, %f1232, 0fC2D20000; - selp.f32 %f1244, 0f00000000, %f1243, %p91; - setp.gt.f32 %p92, %f1232, 0f42D20000; - selp.f32 %f239, 0f7F800000, %f1244, %p92; - // inline asm - rcp.approx.ftz.f32 %f1224,%f159; - // inline asm - mul.f32 %f1245, %f1224, %f160; - mul.f32 %f1246, %f1245, %f1245; - fma.rn.f32 %f1249, %f3201, %f1246, %f3202; - fma.rn.f32 %f1251, %f1249, %f1246, %f3203; - mul.rn.f32 %f1252, %f1251, %f1246; - mul.rn.f32 %f1253, %f1252, %f1245; - sub.f32 %f1254, %f158, %f1245; - neg.f32 %f1255, %f1245; - add.f32 %f1256, %f1254, %f1254; - fma.rn.f32 %f1257, %f1255, %f158, %f1256; - mul.rn.f32 %f1258, %f1224, %f1257; - add.f32 %f1259, %f1253, %f1245; - sub.f32 %f1260, %f1245, %f1259; - add.f32 %f1261, %f1253, %f1260; - add.f32 %f1262, %f1258, %f1261; - add.f32 %f1263, %f1259, %f1262; - sub.f32 %f1264, %f1259, %f1263; - add.f32 %f1265, %f1262, %f1264; - add.f32 %f1266, %f161, %f1263; - sub.f32 %f1267, %f161, %f1266; - add.f32 %f1268, %f1263, %f1267; - add.f32 %f1269, %f1265, %f1268; - add.f32 %f1270, %f162, %f1269; - add.f32 %f1271, %f1266, %f1270; - sub.f32 %f1272, %f1266, %f1271; - add.f32 %f1273, %f1270, %f1272; - mul.rn.f32 %f1275, %f900, %f1271; - neg.f32 %f1276, %f1275; - fma.rn.f32 %f1277, %f900, %f1271, %f1276; - fma.rn.f32 %f1278, %f900, %f1273, %f1277; - fma.rn.f32 %f1280, %f3354, %f1271, %f1278; - add.rn.f32 %f1281, %f1275, %f1280; - neg.f32 %f1282, %f1281; - add.rn.f32 %f1283, %f1275, %f1282; - add.rn.f32 %f1284, %f1283, %f1280; - mov.b32 %r162, %f1281; - setp.eq.s32 %p93, %r162, 1118925336; - add.s32 %r163, %r162, -1; - mov.b32 %f1285, %r163; - add.f32 %f1286, %f1284, 0f37000000; - selp.f32 %f1287, %f1285, %f1281, %p93; - selp.f32 %f240, %f1286, %f1284, %p93; - mul.f32 %f1288, %f1287, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1289, %f1288; - fma.rn.f32 %f1290, %f1289, %f3199, %f1287; - fma.rn.f32 %f1291, %f1289, %f3200, %f1290; - mul.f32 %f1292, %f1291, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1293, %f1292; - add.f32 %f1294, %f1289, 0f00000000; - ex2.approx.f32 %f1295, %f1294; - mul.f32 %f1296, %f1293, %f1295; - setp.lt.f32 %p94, %f1287, 0fC2D20000; - selp.f32 %f1297, 0f00000000, %f1296, %p94; - setp.gt.f32 %p95, %f1287, 0f42D20000; - selp.f32 %f3331, 0f7F800000, %f1297, %p95; - setp.eq.f32 %p96, %f3331, 0f7F800000; - @%p96 bra BB6_59; - - fma.rn.f32 %f3331, %f3331, %f240, %f3331; - -BB6_59: - setp.geu.f32 %p380, %f156, 0f00000000; - mov.b32 %r164, %f3331; - xor.b32 %r165, %r164, -2147483648; - mov.b32 %f1298, %r165; - selp.f32 %f244, %f1298, %f3331, %p4; - setp.eq.f32 %p97, %f156, 0f00000000; - selp.f32 %f3332, %f163, %f244, %p97; - @%p380 bra BB6_61; - - cvt.rzi.f32.f32 %f1300, %f900; - setp.neu.f32 %p98, %f1300, 0f40000000; - selp.f32 %f3332, 0f7FFFFFFF, %f244, %p98; - -BB6_61: - abs.f32 %f3219, %f156; - cvt.rn.f32.s32 %f3218, %r323; - add.f32 %f3217, %f3219, 0f40000000; - mov.b32 %r306, %f3217; - cvt.rn.f32.s32 %f3216, %r322; - mov.f32 %f3215, 0f35BFBE8E; - mov.f32 %f3214, 0f3F317200; - add.f32 %f3213, %f3216, 0f3F800000; - sub.f32 %f3212, %f3213, %f3370; - sub.f32 %f3211, %f3216, %f3370; - mov.f32 %f3210, 0f3DAAAABD; - mov.f32 %f3209, 0f3C4CAF63; - mov.f32 %f3208, 0f3B18F0FE; - mov.f32 %f3207, 0fB5BFBE8E; - mov.f32 %f3206, 0fBF317200; - add.f32 %f1303, %f156, 0f40000000; - setp.gtu.f32 %p99, %f3219, 0f7F800000; - selp.f32 %f1304, %f1303, %f3332, %p99; - selp.f32 %f1305, 0fFF800000, 0f7F800000, %p4; - setp.neu.f32 %p100, %f3219, 0f7F800000; - selp.f32 %f1306, %f1304, %f1305, %p100; - setp.gt.s32 %p101, %r306, 2139095039; - selp.f32 %f1307, %f1306, %f3332, %p101; - mul.f32 %f1308, %f1307, 0fBF000000; - setp.eq.f32 %p102, %f156, 0f3F800000; - selp.f32 %f1309, 0fBF000000, %f1308, %p102; - mul.f32 %f1310, %f1309, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1311, %f1310; - fma.rn.f32 %f1313, %f1311, %f3206, %f1309; - fma.rn.f32 %f1315, %f1311, %f3207, %f1313; - mul.f32 %f1316, %f1315, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1317, %f1316; - add.f32 %f1318, %f1311, 0f00000000; - ex2.approx.f32 %f1319, %f1318; - mul.f32 %f1320, %f1317, %f1319; - setp.lt.f32 %p103, %f1309, 0fC2D20000; - selp.f32 %f1321, 0f00000000, %f1320, %p103; - setp.gt.f32 %p104, %f1309, 0f42D20000; - selp.f32 %f1322, 0f7F800000, %f1321, %p104; - sub.f32 %f1323, %f239, %f1322; - mul.f32 %f1324, %f164, %f1323; - mul.f32 %f248, %f230, %f1324; - mul.f32 %f1325, %f3211, %f1322; - mul.f32 %f1326, %f3212, %f239; - sub.f32 %f1327, %f1326, %f1325; - mul.f32 %f1328, %f1327, %f194; - mul.f32 %f249, %f230, %f1328; - add.f32 %f1329, %f3218, 0f3F800000; - sub.f32 %f250, %f1329, %f3369; - div.rn.f32 %f251, %f250, %f135; - abs.f32 %f252, %f251; - setp.lt.f32 %p105, %f252, 0f00800000; - mul.f32 %f1330, %f252, 0f4B800000; - selp.f32 %f1331, 0fC3170000, 0fC2FE0000, %p105; - selp.f32 %f1332, %f1330, %f252, %p105; - mov.b32 %r166, %f1332; - and.b32 %r167, %r166, 8388607; - or.b32 %r168, %r167, 1065353216; - mov.b32 %f1333, %r168; - shr.u32 %r169, %r166, 23; - cvt.rn.f32.u32 %f1334, %r169; - add.f32 %f1335, %f1331, %f1334; - setp.gt.f32 %p106, %f1333, 0f3FB504F3; - mul.f32 %f1336, %f1333, 0f3F000000; - add.f32 %f1337, %f1335, 0f3F800000; - selp.f32 %f1338, %f1336, %f1333, %p106; - selp.f32 %f1339, %f1337, %f1335, %p106; - add.f32 %f253, %f1338, 0fBF800000; - add.f32 %f1302, %f1338, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1301,%f1302; - // inline asm - add.f32 %f255, %f253, %f253; - mul.f32 %f1340, %f1301, %f255; - mul.f32 %f1341, %f1340, %f1340; - fma.rn.f32 %f1344, %f3208, %f1341, %f3209; - fma.rn.f32 %f1346, %f1344, %f1341, %f3210; - mul.rn.f32 %f1347, %f1346, %f1341; - mul.rn.f32 %f1348, %f1347, %f1340; - sub.f32 %f1349, %f253, %f1340; - neg.f32 %f1350, %f1340; - add.f32 %f1351, %f1349, %f1349; - fma.rn.f32 %f1352, %f1350, %f253, %f1351; - mul.rn.f32 %f1353, %f1301, %f1352; - add.f32 %f1354, %f1348, %f1340; - sub.f32 %f1355, %f1340, %f1354; - add.f32 %f1356, %f1348, %f1355; - add.f32 %f1357, %f1353, %f1356; - add.f32 %f1358, %f1354, %f1357; - sub.f32 %f1359, %f1354, %f1358; - add.f32 %f1360, %f1357, %f1359; - mul.rn.f32 %f256, %f1339, %f3214; - mul.rn.f32 %f257, %f1339, %f3215; - add.f32 %f1363, %f256, %f1358; - sub.f32 %f1364, %f256, %f1363; - add.f32 %f1365, %f1358, %f1364; - add.f32 %f1366, %f1360, %f1365; - add.f32 %f1367, %f257, %f1366; - add.f32 %f1368, %f1363, %f1367; - sub.f32 %f1369, %f1363, %f1368; - add.f32 %f1370, %f1367, %f1369; - mul.rn.f32 %f1372, %f900, %f1368; - neg.f32 %f1373, %f1372; - fma.rn.f32 %f1374, %f900, %f1368, %f1373; - fma.rn.f32 %f1375, %f900, %f1370, %f1374; - fma.rn.f32 %f1377, %f3354, %f1368, %f1375; - add.rn.f32 %f1378, %f1372, %f1377; - neg.f32 %f1379, %f1378; - add.rn.f32 %f1380, %f1372, %f1379; - add.rn.f32 %f1381, %f1380, %f1377; - mov.b32 %r170, %f1378; - setp.eq.s32 %p107, %r170, 1118925336; - add.s32 %r171, %r170, -1; - mov.b32 %f1382, %r171; - add.f32 %f1383, %f1381, 0f37000000; - selp.f32 %f1384, %f1382, %f1378, %p107; - selp.f32 %f258, %f1383, %f1381, %p107; - mul.f32 %f1385, %f1384, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1386, %f1385; - fma.rn.f32 %f1387, %f1386, %f3206, %f1384; - fma.rn.f32 %f1388, %f1386, %f3207, %f1387; - mul.f32 %f1389, %f1388, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1390, %f1389; - add.f32 %f1391, %f1386, 0f00000000; - ex2.approx.f32 %f1392, %f1391; - mul.f32 %f1393, %f1390, %f1392; - setp.lt.f32 %p108, %f1384, 0fC2D20000; - selp.f32 %f1394, 0f00000000, %f1393, %p108; - setp.gt.f32 %p109, %f1384, 0f42D20000; - selp.f32 %f3333, 0f7F800000, %f1394, %p109; - setp.eq.f32 %p110, %f3333, 0f7F800000; - @%p110 bra BB6_63; - - fma.rn.f32 %f3333, %f3333, %f258, %f3333; - -BB6_63: - setp.lt.f32 %p111, %f251, 0f00000000; - and.pred %p9, %p111, %p59; - mov.b32 %r172, %f3333; - xor.b32 %r173, %r172, -2147483648; - mov.b32 %f1395, %r173; - selp.f32 %f3335, %f1395, %f3333, %p9; - setp.eq.f32 %p113, %f251, 0f00000000; - @%p113 bra BB6_66; - bra.uni BB6_64; - -BB6_66: - add.f32 %f1398, %f251, %f251; - selp.f32 %f3335, %f1398, 0f00000000, %p59; - bra.uni BB6_67; - -BB6_64: - setp.geu.f32 %p114, %f251, 0f00000000; - @%p114 bra BB6_67; - - cvt.rzi.f32.f32 %f1397, %f900; - setp.neu.f32 %p115, %f1397, 0f40000000; - selp.f32 %f3335, 0f7FFFFFFF, %f3335, %p115; - -BB6_67: - abs.f32 %f3220, %f251; - add.f32 %f1399, %f3220, 0f40000000; - mov.b32 %r36, %f1399; - setp.lt.s32 %p117, %r36, 2139095040; - @%p117 bra BB6_72; - - abs.f32 %f3228, %f251; - setp.gtu.f32 %p118, %f3228, 0f7F800000; - @%p118 bra BB6_71; - bra.uni BB6_69; - -BB6_71: - add.f32 %f3335, %f251, 0f40000000; - bra.uni BB6_72; - -BB6_69: - abs.f32 %f3229, %f251; - setp.neu.f32 %p119, %f3229, 0f7F800000; - @%p119 bra BB6_72; - - selp.f32 %f3335, 0fFF800000, 0f7F800000, %p9; - -BB6_72: - cvt.rn.f32.s32 %f3241, %r323; - sub.f32 %f3240, %f3241, %f3369; - mov.f32 %f3227, 0f35BFBE8E; - mov.f32 %f3226, 0f3F317200; - mov.f32 %f3225, 0f3DAAAABD; - mov.f32 %f3224, 0f3C4CAF63; - mov.f32 %f3223, 0f3B18F0FE; - mov.f32 %f3222, 0fB5BFBE8E; - mov.f32 %f3221, 0fBF317200; - mul.f32 %f1402, %f3335, 0fBF000000; - setp.eq.f32 %p120, %f251, 0f3F800000; - selp.f32 %f1403, 0fBF000000, %f1402, %p120; - mul.f32 %f1404, %f1403, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1405, %f1404; - fma.rn.f32 %f1407, %f1405, %f3221, %f1403; - fma.rn.f32 %f1409, %f1405, %f3222, %f1407; - mul.f32 %f1410, %f1409, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1411, %f1410; - add.f32 %f1412, %f1405, 0f00000000; - ex2.approx.f32 %f1413, %f1412; - mul.f32 %f1414, %f1411, %f1413; - setp.lt.f32 %p121, %f1403, 0fC2D20000; - selp.f32 %f1415, 0f00000000, %f1414, %p121; - setp.gt.f32 %p122, %f1403, 0f42D20000; - selp.f32 %f269, 0f7F800000, %f1415, %p122; - div.rn.f32 %f270, %f3240, %f135; - abs.f32 %f271, %f270; - setp.lt.f32 %p123, %f271, 0f00800000; - mul.f32 %f1416, %f271, 0f4B800000; - selp.f32 %f1417, 0fC3170000, 0fC2FE0000, %p123; - selp.f32 %f1418, %f1416, %f271, %p123; - mov.b32 %r174, %f1418; - and.b32 %r175, %r174, 8388607; - or.b32 %r176, %r175, 1065353216; - mov.b32 %f1419, %r176; - shr.u32 %r177, %r174, 23; - cvt.rn.f32.u32 %f1420, %r177; - add.f32 %f1421, %f1417, %f1420; - setp.gt.f32 %p124, %f1419, 0f3FB504F3; - mul.f32 %f1422, %f1419, 0f3F000000; - add.f32 %f1423, %f1421, 0f3F800000; - selp.f32 %f1424, %f1422, %f1419, %p124; - selp.f32 %f1425, %f1423, %f1421, %p124; - add.f32 %f272, %f1424, 0fBF800000; - add.f32 %f1401, %f1424, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1400,%f1401; - // inline asm - add.f32 %f274, %f272, %f272; - mul.f32 %f1426, %f1400, %f274; - mul.f32 %f1427, %f1426, %f1426; - fma.rn.f32 %f1430, %f3223, %f1427, %f3224; - fma.rn.f32 %f1432, %f1430, %f1427, %f3225; - mul.rn.f32 %f1433, %f1432, %f1427; - mul.rn.f32 %f1434, %f1433, %f1426; - sub.f32 %f1435, %f272, %f1426; - neg.f32 %f1436, %f1426; - add.f32 %f1437, %f1435, %f1435; - fma.rn.f32 %f1438, %f1436, %f272, %f1437; - mul.rn.f32 %f1439, %f1400, %f1438; - add.f32 %f1440, %f1434, %f1426; - sub.f32 %f1441, %f1426, %f1440; - add.f32 %f1442, %f1434, %f1441; - add.f32 %f1443, %f1439, %f1442; - add.f32 %f1444, %f1440, %f1443; - sub.f32 %f1445, %f1440, %f1444; - add.f32 %f1446, %f1443, %f1445; - mul.rn.f32 %f275, %f1425, %f3226; - mul.rn.f32 %f276, %f1425, %f3227; - add.f32 %f1449, %f275, %f1444; - sub.f32 %f1450, %f275, %f1449; - add.f32 %f1451, %f1444, %f1450; - add.f32 %f1452, %f1446, %f1451; - add.f32 %f1453, %f276, %f1452; - add.f32 %f1454, %f1449, %f1453; - sub.f32 %f1455, %f1449, %f1454; - add.f32 %f1456, %f1453, %f1455; - mul.rn.f32 %f1458, %f900, %f1454; + add.f32 %f1191, %f1187, %f1190; + sub.f32 %f1192, %f1187, %f1191; + add.f32 %f1193, %f1190, %f1192; + mul.rn.f32 %f1195, %f1166, %f3125; + mul.rn.f32 %f1197, %f1166, %f3126; + add.f32 %f1198, %f1195, %f1191; + sub.f32 %f1199, %f1195, %f1198; + add.f32 %f1200, %f1191, %f1199; + add.f32 %f1201, %f1193, %f1200; + add.f32 %f1202, %f1197, %f1201; + add.f32 %f1203, %f1198, %f1202; + sub.f32 %f1204, %f1198, %f1203; + add.f32 %f1205, %f1202, %f1204; + mul.rn.f32 %f1206, %f613, %f1203; + neg.f32 %f1207, %f1206; + fma.rn.f32 %f1208, %f613, %f1203, %f1207; + fma.rn.f32 %f1209, %f613, %f1205, %f1208; + fma.rn.f32 %f1211, %f3279, %f1203, %f1209; + add.rn.f32 %f1212, %f1206, %f1211; + neg.f32 %f1213, %f1212; + add.rn.f32 %f1214, %f1206, %f1213; + add.rn.f32 %f1215, %f1214, %f1211; + mov.b32 %r534, %f1212; + setp.eq.s32 %p401, %r534, 1118925336; + add.s32 %r535, %r534, -1; + mov.b32 %f1216, %r535; + add.f32 %f1217, %f1215, 0f37000000; + selp.f32 %f193, %f1217, %f1215, %p401; + selp.f32 %f1218, %f1216, %f1212, %p401; + mul.rn.f32 %f1220, %f1218, %f812; + cvt.rzi.f32.f32 %f1221, %f1220; + abs.f32 %f1222, %f1221; + setp.gt.f32 %p402, %f1222, 0f42FC0000; + mov.b32 %r536, %f1221; + and.b32 %r537, %r536, -2147483648; + or.b32 %r538, %r537, 1123811328; + mov.b32 %f1223, %r538; + selp.f32 %f1224, %f1223, %f1221, %p402; + fma.rn.f32 %f1226, %f1224, %f3127, %f1218; + fma.rn.f32 %f1228, %f1224, %f3128, %f1226; + mul.f32 %f1229, %f1228, 0f3FB8AA3B; + add.f32 %f1230, %f1224, 0f4B40007F; + mov.b32 %r539, %f1230; + shl.b32 %r540, %r539, 23; + mov.b32 %f1231, %r540; + ex2.approx.ftz.f32 %f1232, %f1229; + mul.f32 %f194, %f1232, %f1231; + setp.eq.f32 %p403, %f194, 0f7F800000; + mov.f32 %f3260, 0f7F800000; + @%p403 bra $L__BB6_212; + + fma.rn.f32 %f3260, %f194, %f193, %f194; + +$L__BB6_212: + setp.lt.f32 %p404, %f191, 0f00000000; + and.pred %p36, %p404, %p301; + setp.eq.f32 %p406, %f191, 0f00000000; + @%p406 bra $L__BB6_216; + bra.uni $L__BB6_213; + +$L__BB6_216: + add.f32 %f1237, %f191, %f191; + selp.f32 %f3262, %f1237, 0f00000000, %p301; + bra.uni $L__BB6_217; + +$L__BB6_213: + mov.b32 %r541, %f3260; + xor.b32 %r542, %r541, -2147483648; + mov.b32 %f1233, %r542; + selp.f32 %f3262, %f1233, %f3260, %p36; + setp.geu.f32 %p407, %f191, 0f00000000; + @%p407 bra $L__BB6_217; + + cvt.rzi.f32.f32 %f1235, %f613; + setp.eq.f32 %p408, %f1235, 0f40000000; + @%p408 bra $L__BB6_217; + + mov.f32 %f3262, 0f7FFFFFFF; + +$L__BB6_217: + abs.f32 %f3161, %f191; + add.f32 %f1238, %f3161, 0f40000000; + mov.b32 %r543, %f1238; + setp.lt.s32 %p410, %r543, 2139095040; + @%p410 bra $L__BB6_222; + + abs.f32 %f3162, %f191; + setp.gtu.f32 %p411, %f3162, 0f7F800000; + @%p411 bra $L__BB6_221; + bra.uni $L__BB6_219; + +$L__BB6_221: + add.f32 %f3262, %f191, 0f40000000; + bra.uni $L__BB6_222; + +$L__BB6_219: + abs.f32 %f3163, %f191; + setp.neu.f32 %p412, %f3163, 0f7F800000; + @%p412 bra $L__BB6_222; + + selp.f32 %f3262, 0fFF800000, 0f7F800000, %p36; + +$L__BB6_222: + cvt.rn.f32.s32 %f3143, %r1374; + sub.f32 %f3142, %f3143, %f3295; + mov.f32 %f3141, 0f32A57060; + mov.f32 %f3140, 0f4B400001; + mov.f32 %f3139, 0f437C0000; + mov.f32 %f3138, 0f3BBB989D; + mov.f32 %f3137, 0f3102E308; + mov.f32 %f3136, 0fBF317218; + mov.f32 %f3135, 0f35BFBE8E; + mov.f32 %f3134, 0f3F317200; + mov.f32 %f3133, 0f3DAAAABD; + mov.f32 %f3132, 0f3C4CAF63; + mov.f32 %f3131, 0f3B18F0FE; + mov.f32 %f3130, 0f3F000000; + mul.f32 %f1240, %f3262, 0fBF000000; + setp.eq.f32 %p413, %f191, 0f3F800000; + selp.f32 %f1241, 0fBF000000, %f1240, %p413; + fma.rn.f32 %f1244, %f1241, %f3138, %f3130; + cvt.sat.f32.f32 %f1247, %f1244; + fma.rm.f32 %f1249, %f1247, %f3139, %f3140; + add.f32 %f1250, %f1249, 0fCB40007F; + neg.f32 %f1251, %f1250; + fma.rn.f32 %f1252, %f1241, %f812, %f1251; + fma.rn.f32 %f1254, %f1241, %f3141, %f1252; + mov.b32 %r544, %f1249; + shl.b32 %r545, %r544, 23; + mov.b32 %f1255, %r545; + ex2.approx.ftz.f32 %f1256, %f1254; + mul.f32 %f203, %f1256, %f1255; + div.rn.f32 %f204, %f3142, %f105; + abs.f32 %f205, %f204; + setp.lt.f32 %p414, %f205, 0f00800000; + mul.f32 %f1257, %f205, 0f4B800000; + selp.f32 %f1258, %f1257, %f205, %p414; + selp.f32 %f1259, 0fC3170000, 0fC2FE0000, %p414; + mov.b32 %r546, %f1258; + and.b32 %r547, %r546, 8388607; + or.b32 %r548, %r547, 1065353216; + mov.b32 %f1260, %r548; + shr.u32 %r549, %r546, 23; + cvt.rn.f32.u32 %f1261, %r549; + add.f32 %f1262, %f1259, %f1261; + setp.gt.f32 %p415, %f1260, 0f3FB504F3; + mul.f32 %f1263, %f1260, 0f3F000000; + add.f32 %f1264, %f1262, 0f3F800000; + selp.f32 %f1265, %f1264, %f1262, %p415; + selp.f32 %f1266, %f1263, %f1260, %p415; + add.f32 %f1267, %f1266, 0fBF800000; + add.f32 %f1268, %f1266, 0f3F800000; + rcp.approx.ftz.f32 %f1269, %f1268; + add.f32 %f1270, %f1267, %f1267; + mul.f32 %f1272, %f1270, %f1269; + mul.f32 %f1273, %f1272, %f1272; + fma.rn.f32 %f1276, %f3131, %f1273, %f3132; + fma.rn.f32 %f1278, %f1276, %f1273, %f3133; + mul.rn.f32 %f1279, %f1278, %f1273; + mul.rn.f32 %f1280, %f1279, %f1272; + sub.f32 %f1281, %f1267, %f1272; + add.f32 %f1282, %f1281, %f1281; + neg.f32 %f1283, %f1272; + fma.rn.f32 %f1284, %f1283, %f1267, %f1282; + mul.rn.f32 %f1285, %f1269, %f1284; + add.f32 %f1286, %f1280, %f1272; + sub.f32 %f1287, %f1272, %f1286; + add.f32 %f1288, %f1280, %f1287; + add.f32 %f1289, %f1285, %f1288; + add.f32 %f1290, %f1286, %f1289; + sub.f32 %f1291, %f1286, %f1290; + add.f32 %f1292, %f1289, %f1291; + mul.rn.f32 %f1294, %f1265, %f3134; + mul.rn.f32 %f1296, %f1265, %f3135; + add.f32 %f1297, %f1294, %f1290; + sub.f32 %f1298, %f1294, %f1297; + add.f32 %f1299, %f1290, %f1298; + add.f32 %f1300, %f1292, %f1299; + add.f32 %f1301, %f1296, %f1300; + add.f32 %f1302, %f1297, %f1301; + sub.f32 %f1303, %f1297, %f1302; + add.f32 %f1304, %f1301, %f1303; + mul.rn.f32 %f1305, %f613, %f1302; + neg.f32 %f1306, %f1305; + fma.rn.f32 %f1307, %f613, %f1302, %f1306; + fma.rn.f32 %f1308, %f613, %f1304, %f1307; + fma.rn.f32 %f1310, %f3279, %f1302, %f1308; + add.rn.f32 %f1311, %f1305, %f1310; + neg.f32 %f1312, %f1311; + add.rn.f32 %f1313, %f1305, %f1312; + add.rn.f32 %f1314, %f1313, %f1310; + mov.b32 %r550, %f1311; + setp.eq.s32 %p416, %r550, 1118925336; + add.s32 %r551, %r550, -1; + mov.b32 %f1315, %r551; + add.f32 %f1316, %f1314, 0f37000000; + selp.f32 %f206, %f1316, %f1314, %p416; + selp.f32 %f1317, %f1315, %f1311, %p416; + mul.rn.f32 %f1318, %f1317, %f812; + cvt.rzi.f32.f32 %f1319, %f1318; + abs.f32 %f1320, %f1319; + setp.gt.f32 %p417, %f1320, 0f42FC0000; + mov.b32 %r552, %f1319; + and.b32 %r553, %r552, -2147483648; + or.b32 %r554, %r553, 1123811328; + mov.b32 %f1321, %r554; + selp.f32 %f1322, %f1321, %f1319, %p417; + fma.rn.f32 %f1324, %f1322, %f3136, %f1317; + fma.rn.f32 %f1326, %f1322, %f3137, %f1324; + mul.f32 %f1327, %f1326, 0f3FB8AA3B; + add.f32 %f1328, %f1322, 0f4B40007F; + mov.b32 %r555, %f1328; + shl.b32 %r556, %r555, 23; + mov.b32 %f1329, %r556; + ex2.approx.ftz.f32 %f1330, %f1327; + mul.f32 %f207, %f1330, %f1329; + setp.eq.f32 %p418, %f207, 0f7F800000; + mov.f32 %f3263, 0f7F800000; + @%p418 bra $L__BB6_224; + + fma.rn.f32 %f3263, %f207, %f206, %f207; + +$L__BB6_224: + setp.lt.f32 %p419, %f204, 0f00000000; + and.pred %p37, %p419, %p301; + setp.eq.f32 %p421, %f204, 0f00000000; + @%p421 bra $L__BB6_228; + bra.uni $L__BB6_225; + +$L__BB6_228: + add.f32 %f1335, %f204, %f204; + selp.f32 %f3265, %f1335, 0f00000000, %p301; + bra.uni $L__BB6_229; + +$L__BB6_225: + mov.b32 %r557, %f3263; + xor.b32 %r558, %r557, -2147483648; + mov.b32 %f1331, %r558; + selp.f32 %f3265, %f1331, %f3263, %p37; + setp.geu.f32 %p422, %f204, 0f00000000; + @%p422 bra $L__BB6_229; + + cvt.rzi.f32.f32 %f1333, %f613; + setp.eq.f32 %p423, %f1333, 0f40000000; + @%p423 bra $L__BB6_229; + + mov.f32 %f3265, 0f7FFFFFFF; + +$L__BB6_229: + abs.f32 %f2997, %f204; + add.f32 %f1336, %f2997, 0f40000000; + mov.b32 %r559, %f1336; + setp.lt.s32 %p425, %r559, 2139095040; + @%p425 bra $L__BB6_234; + + abs.f32 %f3150, %f204; + setp.gtu.f32 %p426, %f3150, 0f7F800000; + @%p426 bra $L__BB6_233; + bra.uni $L__BB6_231; + +$L__BB6_233: + add.f32 %f3265, %f204, 0f40000000; + bra.uni $L__BB6_234; + +$L__BB6_231: + abs.f32 %f3151, %f204; + setp.neu.f32 %p427, %f3151, 0f7F800000; + @%p427 bra $L__BB6_234; + + selp.f32 %f3265, 0fFF800000, 0f7F800000, %p37; + +$L__BB6_234: + cvt.f64.f32 %fd1044, %f105; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1352}, %fd1044; + } + setp.lt.s32 %p1283, %r1352, 0; + mov.f64 %fd1043, 0d4014000000000000; + cvt.rn.f32.s32 %f3004, %r1374; + sub.f32 %f3003, %f3004, %f3295; + mov.f32 %f3002, 0f32A57060; + mov.f32 %f3001, 0f4B400001; + mov.f32 %f3000, 0f437C0000; + mov.f32 %f2999, 0f3BBB989D; + mov.f32 %f2998, 0f3F000000; + and.b32 %r560, %r78, 2146435072; + setp.eq.s32 %p428, %r560, 1074790400; + mul.f32 %f1337, %f3265, 0fBF000000; + setp.eq.f32 %p429, %f204, 0f3F800000; + selp.f32 %f1338, 0fBF000000, %f1337, %p429; + fma.rn.f32 %f1341, %f1338, %f2999, %f2998; + cvt.sat.f32.f32 %f1344, %f1341; + fma.rm.f32 %f1346, %f1344, %f3000, %f3001; + add.f32 %f1347, %f1346, 0fCB40007F; + neg.f32 %f1348, %f1347; + fma.rn.f32 %f1349, %f1338, %f812, %f1348; + fma.rn.f32 %f1351, %f1338, %f3002, %f1349; + mov.b32 %r561, %f1346; + shl.b32 %r562, %r561, 23; + mov.b32 %f1352, %r562; + ex2.approx.ftz.f32 %f1353, %f1351; + mul.f32 %f216, %f1353, %f1352; + add.f32 %f1354, %f3003, 0f3F800000; + mul.f32 %f1355, %f1354, %f203; + mul.f32 %f1356, %f3003, %f216; + sub.f32 %f1357, %f1355, %f1356; + div.rn.f32 %f1358, %f158, %f105; + mul.f32 %f1359, %f1358, %f1357; + mul.f32 %f217, %f131, %f1359; + { // callseq 129, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd122; + .param .b64 param1; + st.param.f64 [param1+0], %fd1043; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1103, [retval0+0]; + } // callseq 129 + and.pred %p38, %p1283, %p428; + not.pred %p431, %p38; + @%p431 bra $L__BB6_236; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r563}, %fd1103; + } + xor.b32 %r564, %r563, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r565, %temp}, %fd1103; + } + mov.b64 %fd1103, {%r565, %r564}; + +$L__BB6_236: + setp.eq.f32 %p1284, %f105, 0f00000000; + @%p1284 bra $L__BB6_240; + bra.uni $L__BB6_237; + +$L__BB6_240: + setp.lt.s32 %p435, %r78, 0; + mov.u32 %r566, 0; + selp.b32 %r568, %r104, 0, %p428; + or.b32 %r569, %r568, 2146435072; + selp.b32 %r570, %r569, %r568, %p435; + mov.b64 %fd1103, {%r566, %r570}; + bra.uni $L__BB6_241; + +$L__BB6_237: + setp.gt.s32 %p433, %r104, -1; + @%p433 bra $L__BB6_241; + + mov.f64 %fd1068, 0d4014000000000000; + cvt.rzi.f64.f64 %fd734, %fd1068; + setp.eq.f64 %p434, %fd734, 0d4014000000000000; + @%p434 bra $L__BB6_241; + + mov.f64 %fd1103, 0dFFF8000000000000; + +$L__BB6_241: + add.f64 %fd148, %fd121, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r571}, %fd148; + } + and.b32 %r572, %r571, 2146435072; + setp.ne.s32 %p437, %r572, 2146435072; + mov.f64 %fd1104, %fd1103; + @%p437 bra $L__BB6_247; + + setp.gtu.f64 %p438, %fd122, 0d7FF0000000000000; + mov.f64 %fd1104, %fd148; + @%p438 bra $L__BB6_247; + + mov.f64 %fd1067, 0d4014000000000000; + and.b32 %r573, %r78, 2147483647; + setp.eq.s32 %p439, %r573, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r574, %temp}, %fd1067; + } + setp.eq.s32 %p440, %r574, 0; + and.pred %p441, %p439, %p440; + @%p441 bra $L__BB6_246; + bra.uni $L__BB6_244; + +$L__BB6_246: + setp.lt.s32 %p448, %r78, 0; + mov.u32 %r582, 0; + setp.gt.f64 %p449, %fd122, 0d3FF0000000000000; + selp.b32 %r583, 2146435072, 0, %p449; + xor.b32 %r584, %r583, 2146435072; + selp.b32 %r585, %r584, %r583, %p448; + setp.eq.f32 %p450, %f105, 0fBF800000; + selp.b32 %r586, 1072693248, %r585, %p450; + mov.b64 %fd1104, {%r582, %r586}; + bra.uni $L__BB6_247; + +$L__BB6_244: + { + .reg .b32 %temp; + mov.b64 {%r575, %temp}, %fd121; + } + and.b32 %r576, %r104, 2147483647; + setp.ne.s32 %p442, %r576, 2146435072; + setp.ne.s32 %p443, %r575, 0; + or.pred %p444, %p442, %p443; + mov.f64 %fd1104, %fd1103; + @%p444 bra $L__BB6_247; + + setp.ne.s32 %p445, %r573, 1071644672; + and.pred %p446, %p445, %p38; + setp.gt.s32 %p447, %r78, -1; + selp.b32 %r578, 2146435072, 0, %p447; + mov.u32 %r579, 0; + or.b32 %r580, %r578, -2147483648; + selp.b32 %r581, %r580, %r578, %p446; + mov.b64 %fd1104, {%r579, %r581}; + +$L__BB6_247: + not.pred %p451, %p10; + mov.f64 %fd1106, %fd61; + @%p451 bra $L__BB6_249; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r587}, %fd61; + } + xor.b32 %r588, %r587, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r589, %temp}, %fd61; + } + mov.b64 %fd1106, {%r589, %r588}; + +$L__BB6_249: + setp.eq.f32 %p1285, %f105, 0f3F800000; + setp.eq.f32 %p452, %f87, 0f00000000; + selp.f64 %fd154, 0d3FF0000000000000, %fd1104, %p1285; + @%p452 bra $L__BB6_253; + bra.uni $L__BB6_250; + +$L__BB6_253: + mov.u32 %r590, 0; + selp.b32 %r592, %r76, 0, %p149; + or.b32 %r593, %r592, 2146435072; + selp.b32 %r594, %r593, %r592, %p152; + mov.b64 %fd1106, {%r590, %r594}; + bra.uni $L__BB6_254; + +$L__BB6_250: + setp.gt.s32 %p454, %r76, -1; + @%p454 bra $L__BB6_254; + + cvt.rzi.f64.f64 %fd738, %fd649; + setp.eq.f64 %p455, %fd738, 0d4008000000000000; + @%p455 bra $L__BB6_254; + + mov.f64 %fd1106, 0dFFF8000000000000; + +$L__BB6_254: + selp.f64 %fd1107, %fd1106, %fd62, %p172; + @%p23 bra $L__BB6_259; + + setp.eq.s32 %p459, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r595, %temp}, %fd649; + } + setp.eq.s32 %p460, %r595, 0; + and.pred %p461, %p459, %p460; + @%p461 bra $L__BB6_258; + bra.uni $L__BB6_256; + +$L__BB6_258: + mov.u32 %r602, 0; + mov.b64 %fd1107, {%r602, %r80}; + bra.uni $L__BB6_259; + +$L__BB6_256: + cvt.rn.f32.s32 %f3007, %r1374; + sub.f32 %f3006, %f3007, %f3295; + add.f32 %f3005, %f3006, 0f3F000000; + cvt.f64.f32 %fd1045, %f3005; + and.b32 %r596, %r76, 2147483647; + setp.ne.s32 %p462, %r596, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r597, %temp}, %fd1045; + } + setp.ne.s32 %p463, %r597, 0; + or.pred %p464, %p462, %p463; + mov.f64 %fd1107, %fd1106; + @%p464 bra $L__BB6_259; + + setp.ne.s32 %p465, %r58, 1071644672; + and.pred %p466, %p465, %p10; + selp.b32 %r600, %r63, %r62, %p466; + mov.u32 %r601, 0; + mov.b64 %fd1107, {%r601, %r600}; + +$L__BB6_259: + setp.eq.f32 %p467, %f87, 0f3F800000; + selp.f64 %fd741, 0d3FF0000000000000, %fd1107, %p467; + cvt.f64.f32 %fd742, %f203; + mul.f64 %fd161, %fd741, %fd742; + not.pred %p468, %p11; + mov.f64 %fd1109, %fd64; + @%p468 bra $L__BB6_261; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r603}, %fd64; + } + xor.b32 %r604, %r603, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r605, %temp}, %fd64; + } + mov.b64 %fd1109, {%r605, %r604}; + +$L__BB6_261: + setp.eq.f32 %p469, %f88, 0f00000000; + @%p469 bra $L__BB6_265; + bra.uni $L__BB6_262; + +$L__BB6_265: + mov.u32 %r606, 0; + selp.b32 %r608, %r79, 0, %p149; + or.b32 %r609, %r608, 2146435072; + selp.b32 %r610, %r609, %r608, %p152; + mov.b64 %fd1109, {%r606, %r610}; + bra.uni $L__BB6_266; + +$L__BB6_262: + setp.gt.s32 %p470, %r79, -1; + @%p470 bra $L__BB6_266; + + cvt.rzi.f64.f64 %fd744, %fd649; + setp.eq.f64 %p471, %fd744, 0d4008000000000000; + @%p471 bra $L__BB6_266; + + mov.f64 %fd1109, 0dFFF8000000000000; + +$L__BB6_266: + selp.f64 %fd1110, %fd1109, %fd65, %p177; + @%p24 bra $L__BB6_271; + + setp.eq.s32 %p475, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r611, %temp}, %fd649; + } + setp.eq.s32 %p476, %r611, 0; + and.pred %p477, %p475, %p476; + @%p477 bra $L__BB6_270; + bra.uni $L__BB6_268; + +$L__BB6_270: + mov.u32 %r618, 0; + mov.b64 %fd1110, {%r618, %r82}; + bra.uni $L__BB6_271; + +$L__BB6_268: + cvt.rn.f32.s32 %f3010, %r1374; + sub.f32 %f3009, %f3010, %f3295; + add.f32 %f3008, %f3009, 0fBF000000; + cvt.f64.f32 %fd1046, %f3008; + and.b32 %r612, %r79, 2147483647; + setp.ne.s32 %p478, %r612, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r613, %temp}, %fd1046; + } + setp.ne.s32 %p479, %r613, 0; + or.pred %p480, %p478, %p479; + mov.f64 %fd1110, %fd1109; + @%p480 bra $L__BB6_271; + + setp.ne.s32 %p481, %r58, 1071644672; + and.pred %p482, %p481, %p11; + selp.b32 %r616, %r63, %r62, %p482; + mov.u32 %r617, 0; + mov.b64 %fd1110, {%r617, %r616}; + +$L__BB6_271: + cvt.f64.f32 %fd1047, %f131; + cvt.rn.f32.s32 %f3018, %r1375; + mov.f32 %f3017, 0f3102E308; + mov.f32 %f3016, 0fBF317218; + mov.f32 %f3015, 0f35BFBE8E; + mov.f32 %f3014, 0f3F317200; + mov.f32 %f3013, 0f3DAAAABD; + mov.f32 %f3012, 0f3C4CAF63; + mov.f32 %f3011, 0f3B18F0FE; + setp.eq.f32 %p483, %f88, 0f3F800000; + selp.f64 %fd747, 0d3FF0000000000000, %fd1110, %p483; + cvt.f64.f32 %fd748, %f216; + mul.f64 %fd749, %fd747, %fd748; + sub.f64 %fd750, %fd161, %fd749; + div.rn.f64 %fd751, %fd43, %fd154; + mul.f64 %fd752, %fd751, %fd750; + mul.f64 %fd754, %fd752, %fd1047; + mov.f32 %f1361, 0fC0000000; + div.rn.f32 %f1362, %f1361, %f105; + mul.f32 %f1363, %f1362, %f217; + cvt.f64.f32 %fd755, %f1363; + sub.f64 %fd756, %fd755, %fd754; + cvt.rn.f32.f64 %f218, %fd756; + add.f32 %f1364, %f3018, 0f3F800000; + sub.f32 %f1365, %f1364, %f3294; + div.rn.f32 %f219, %f1365, %f107; + abs.f32 %f220, %f219; + setp.lt.f32 %p484, %f220, 0f00800000; + mul.f32 %f1366, %f220, 0f4B800000; + selp.f32 %f1367, %f1366, %f220, %p484; + selp.f32 %f1368, 0fC3170000, 0fC2FE0000, %p484; + mov.b32 %r619, %f1367; + and.b32 %r620, %r619, 8388607; + or.b32 %r621, %r620, 1065353216; + mov.b32 %f1369, %r621; + shr.u32 %r622, %r619, 23; + cvt.rn.f32.u32 %f1370, %r622; + add.f32 %f1371, %f1368, %f1370; + setp.gt.f32 %p485, %f1369, 0f3FB504F3; + mul.f32 %f1372, %f1369, 0f3F000000; + add.f32 %f1373, %f1371, 0f3F800000; + selp.f32 %f1374, %f1373, %f1371, %p485; + selp.f32 %f1375, %f1372, %f1369, %p485; + add.f32 %f1376, %f1375, 0fBF800000; + add.f32 %f1377, %f1375, 0f3F800000; + rcp.approx.ftz.f32 %f1378, %f1377; + add.f32 %f1379, %f1376, %f1376; + mul.f32 %f1381, %f1379, %f1378; + mul.f32 %f1382, %f1381, %f1381; + fma.rn.f32 %f1385, %f3011, %f1382, %f3012; + fma.rn.f32 %f1387, %f1385, %f1382, %f3013; + mul.rn.f32 %f1388, %f1387, %f1382; + mul.rn.f32 %f1389, %f1388, %f1381; + sub.f32 %f1390, %f1376, %f1381; + add.f32 %f1391, %f1390, %f1390; + neg.f32 %f1392, %f1381; + fma.rn.f32 %f1393, %f1392, %f1376, %f1391; + mul.rn.f32 %f1394, %f1378, %f1393; + add.f32 %f1395, %f1389, %f1381; + sub.f32 %f1396, %f1381, %f1395; + add.f32 %f1397, %f1389, %f1396; + add.f32 %f1398, %f1394, %f1397; + add.f32 %f1399, %f1395, %f1398; + sub.f32 %f1400, %f1395, %f1399; + add.f32 %f1401, %f1398, %f1400; + mul.rn.f32 %f1403, %f1374, %f3014; + mul.rn.f32 %f1405, %f1374, %f3015; + add.f32 %f1406, %f1403, %f1399; + sub.f32 %f1407, %f1403, %f1406; + add.f32 %f1408, %f1399, %f1407; + add.f32 %f1409, %f1401, %f1408; + add.f32 %f1410, %f1405, %f1409; + add.f32 %f1411, %f1406, %f1410; + sub.f32 %f1412, %f1406, %f1411; + add.f32 %f1413, %f1410, %f1412; + mul.rn.f32 %f1414, %f613, %f1411; + neg.f32 %f1415, %f1414; + fma.rn.f32 %f1416, %f613, %f1411, %f1415; + fma.rn.f32 %f1417, %f613, %f1413, %f1416; + fma.rn.f32 %f1419, %f3279, %f1411, %f1417; + add.rn.f32 %f1420, %f1414, %f1419; + neg.f32 %f1421, %f1420; + add.rn.f32 %f1422, %f1414, %f1421; + add.rn.f32 %f1423, %f1422, %f1419; + mov.b32 %r623, %f1420; + setp.eq.s32 %p486, %r623, 1118925336; + add.s32 %r624, %r623, -1; + mov.b32 %f1424, %r624; + add.f32 %f1425, %f1423, 0f37000000; + selp.f32 %f221, %f1425, %f1423, %p486; + selp.f32 %f1426, %f1424, %f1420, %p486; + mul.rn.f32 %f1428, %f1426, %f812; + cvt.rzi.f32.f32 %f1429, %f1428; + abs.f32 %f1430, %f1429; + setp.gt.f32 %p487, %f1430, 0f42FC0000; + mov.b32 %r625, %f1429; + and.b32 %r626, %r625, -2147483648; + or.b32 %r627, %r626, 1123811328; + mov.b32 %f1431, %r627; + selp.f32 %f1432, %f1431, %f1429, %p487; + fma.rn.f32 %f1434, %f1432, %f3016, %f1426; + fma.rn.f32 %f1436, %f1432, %f3017, %f1434; + mul.f32 %f1437, %f1436, 0f3FB8AA3B; + add.f32 %f1438, %f1432, 0f4B40007F; + mov.b32 %r628, %f1438; + shl.b32 %r629, %r628, 23; + mov.b32 %f1439, %r629; + ex2.approx.ftz.f32 %f1440, %f1437; + mul.f32 %f222, %f1440, %f1439; + setp.eq.f32 %p488, %f222, 0f7F800000; + mov.f32 %f3266, 0f7F800000; + @%p488 bra $L__BB6_273; + + fma.rn.f32 %f3266, %f222, %f221, %f222; + +$L__BB6_273: + setp.lt.f32 %p489, %f219, 0f00000000; + and.pred %p39, %p489, %p301; + setp.eq.f32 %p491, %f219, 0f00000000; + @%p491 bra $L__BB6_277; + bra.uni $L__BB6_274; + +$L__BB6_277: + add.f32 %f1445, %f219, %f219; + selp.f32 %f3268, %f1445, 0f00000000, %p301; + bra.uni $L__BB6_278; + +$L__BB6_274: + mov.b32 %r630, %f3266; + xor.b32 %r631, %r630, -2147483648; + mov.b32 %f1441, %r631; + selp.f32 %f3268, %f1441, %f3266, %p39; + setp.geu.f32 %p492, %f219, 0f00000000; + @%p492 bra $L__BB6_278; + + cvt.rzi.f32.f32 %f1443, %f613; + setp.eq.f32 %p493, %f1443, 0f40000000; + @%p493 bra $L__BB6_278; + + mov.f32 %f3268, 0f7FFFFFFF; + +$L__BB6_278: + abs.f32 %f3164, %f219; + add.f32 %f1446, %f3164, 0f40000000; + mov.b32 %r632, %f1446; + setp.lt.s32 %p495, %r632, 2139095040; + @%p495 bra $L__BB6_283; + + abs.f32 %f3166, %f219; + setp.gtu.f32 %p496, %f3166, 0f7F800000; + @%p496 bra $L__BB6_282; + bra.uni $L__BB6_280; + +$L__BB6_282: + add.f32 %f3268, %f219, 0f40000000; + bra.uni $L__BB6_283; + +$L__BB6_280: + abs.f32 %f3167, %f219; + setp.neu.f32 %p497, %f3167, 0f7F800000; + @%p497 bra $L__BB6_283; + + selp.f32 %f3268, 0fFF800000, 0f7F800000, %p39; + +$L__BB6_283: + mov.f32 %f3032, 0f32A57060; + mov.f32 %f3031, 0f4B400001; + mov.f32 %f3030, 0f437C0000; + mov.f32 %f3029, 0f3BBB989D; + mov.f32 %f3028, 0f3102E308; + mov.f32 %f3027, 0fBF317218; + mov.f32 %f3026, 0f35BFBE8E; + mov.f32 %f3025, 0f3F317200; + mov.f32 %f3024, 0f3DAAAABD; + mov.f32 %f3023, 0f3C4CAF63; + mov.f32 %f3022, 0f3B18F0FE; + cvt.rn.f32.s32 %f3021, %r1375; + sub.f32 %f3020, %f3021, %f3294; + mov.f32 %f3019, 0f3F000000; + mul.f32 %f1448, %f3268, 0fBF000000; + setp.eq.f32 %p498, %f219, 0f3F800000; + selp.f32 %f1449, 0fBF000000, %f1448, %p498; + fma.rn.f32 %f1452, %f1449, %f3029, %f3019; + cvt.sat.f32.f32 %f1455, %f1452; + fma.rm.f32 %f1457, %f1455, %f3030, %f3031; + add.f32 %f1458, %f1457, 0fCB40007F; neg.f32 %f1459, %f1458; - fma.rn.f32 %f1460, %f900, %f1454, %f1459; - fma.rn.f32 %f1461, %f900, %f1456, %f1460; - fma.rn.f32 %f1463, %f3354, %f1454, %f1461; - add.rn.f32 %f1464, %f1458, %f1463; - neg.f32 %f1465, %f1464; - add.rn.f32 %f1466, %f1458, %f1465; - add.rn.f32 %f1467, %f1466, %f1463; - mov.b32 %r178, %f1464; - setp.eq.s32 %p125, %r178, 1118925336; - add.s32 %r179, %r178, -1; - mov.b32 %f1468, %r179; - add.f32 %f1469, %f1467, 0f37000000; - selp.f32 %f1470, %f1468, %f1464, %p125; - selp.f32 %f277, %f1469, %f1467, %p125; - mul.f32 %f1471, %f1470, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1472, %f1471; - fma.rn.f32 %f1473, %f1472, %f3221, %f1470; - fma.rn.f32 %f1474, %f1472, %f3222, %f1473; - mul.f32 %f1475, %f1474, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1476, %f1475; - add.f32 %f1477, %f1472, 0f00000000; - ex2.approx.f32 %f1478, %f1477; - mul.f32 %f1479, %f1476, %f1478; - setp.lt.f32 %p126, %f1470, 0fC2D20000; - selp.f32 %f1480, 0f00000000, %f1479, %p126; - setp.gt.f32 %p127, %f1470, 0f42D20000; - selp.f32 %f3336, 0f7F800000, %f1480, %p127; - setp.eq.f32 %p128, %f3336, 0f7F800000; - @%p128 bra BB6_74; - - fma.rn.f32 %f3336, %f3336, %f277, %f3336; - -BB6_74: - setp.lt.f32 %p129, %f270, 0f00000000; - and.pred %p10, %p129, %p59; - mov.b32 %r180, %f3336; - xor.b32 %r181, %r180, -2147483648; - mov.b32 %f1481, %r181; - selp.f32 %f3338, %f1481, %f3336, %p10; - setp.eq.f32 %p131, %f270, 0f00000000; - @%p131 bra BB6_77; - bra.uni BB6_75; - -BB6_77: - add.f32 %f1484, %f270, %f270; - selp.f32 %f3338, %f1484, 0f00000000, %p59; - bra.uni BB6_78; - -BB6_75: - setp.geu.f32 %p132, %f270, 0f00000000; - @%p132 bra BB6_78; - - cvt.rzi.f32.f32 %f1483, %f900; - setp.neu.f32 %p133, %f1483, 0f40000000; - selp.f32 %f3338, 0f7FFFFFFF, %f3338, %p133; - -BB6_78: - abs.f32 %f3125, %f270; - add.f32 %f1485, %f3125, 0f40000000; - mov.b32 %r37, %f1485; - setp.lt.s32 %p135, %r37, 2139095040; - @%p135 bra BB6_83; - - abs.f32 %f3238, %f270; - setp.gtu.f32 %p136, %f3238, 0f7F800000; - @%p136 bra BB6_82; - bra.uni BB6_80; - -BB6_82: - add.f32 %f3338, %f270, 0f40000000; - bra.uni BB6_83; - -BB6_80: - abs.f32 %f3239, %f270; - setp.neu.f32 %p137, %f3239, 0f7F800000; - @%p137 bra BB6_83; - - selp.f32 %f3338, 0fFF800000, 0f7F800000, %p10; - -BB6_83: - cvt.rn.f32.s32 %f3243, %r323; - sub.f32 %f3242, %f3243, %f3369; - cvt.rn.f32.s32 %f3133, %r323; - add.f32 %f3132, %f3133, 0f3F800000; - sub.f32 %f3131, %f3132, %f3369; - mov.f32 %f3130, 0f3DAAAABD; - mov.f32 %f3129, 0f3C4CAF63; - mov.f32 %f3128, 0f3B18F0FE; - mov.f32 %f3127, 0fB5BFBE8E; - mov.f32 %f3126, 0fBF317200; - mul.f32 %f1488, %f3338, 0fBF000000; - setp.eq.f32 %p138, %f270, 0f3F800000; - selp.f32 %f1489, 0fBF000000, %f1488, %p138; - mul.f32 %f1490, %f1489, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1491, %f1490; - fma.rn.f32 %f1493, %f1491, %f3126, %f1489; - fma.rn.f32 %f1495, %f1491, %f3127, %f1493; - mul.f32 %f1496, %f1495, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1497, %f1496; - add.f32 %f1498, %f1491, 0f00000000; - ex2.approx.f32 %f1499, %f1498; - mul.f32 %f1500, %f1497, %f1499; - setp.lt.f32 %p139, %f1489, 0fC2D20000; - selp.f32 %f1501, 0f00000000, %f1500, %p139; - setp.gt.f32 %p140, %f1489, 0f42D20000; - selp.f32 %f1502, 0f7F800000, %f1501, %p140; - sub.f32 %f1503, %f269, %f1502; - mul.f32 %f1504, %f165, %f1503; - mul.f32 %f288, %f216, %f1504; - mul.f32 %f1505, %f3242, %f1502; - mul.f32 %f1506, %f3131, %f269; - sub.f32 %f1507, %f1506, %f1505; - mul.f32 %f1508, %f1507, %f195; - mul.f32 %f289, %f216, %f1508; - // inline asm - rcp.approx.ftz.f32 %f1486,%f151; - // inline asm - mul.f32 %f1509, %f1486, %f152; - mul.f32 %f1510, %f1509, %f1509; - fma.rn.f32 %f1513, %f3128, %f1510, %f3129; - fma.rn.f32 %f1515, %f1513, %f1510, %f3130; - mul.rn.f32 %f1516, %f1515, %f1510; - mul.rn.f32 %f1517, %f1516, %f1509; - sub.f32 %f1518, %f150, %f1509; - neg.f32 %f1519, %f1509; - add.f32 %f1520, %f1518, %f1518; - fma.rn.f32 %f1521, %f1519, %f150, %f1520; - mul.rn.f32 %f1522, %f1486, %f1521; - add.f32 %f1523, %f1517, %f1509; - sub.f32 %f1524, %f1509, %f1523; - add.f32 %f1525, %f1517, %f1524; - add.f32 %f1526, %f1522, %f1525; - add.f32 %f1527, %f1523, %f1526; - sub.f32 %f1528, %f1523, %f1527; - add.f32 %f1529, %f1526, %f1528; - add.f32 %f1530, %f153, %f1527; - sub.f32 %f1531, %f153, %f1530; - add.f32 %f1532, %f1527, %f1531; - add.f32 %f1533, %f1529, %f1532; - add.f32 %f1534, %f154, %f1533; - add.f32 %f1535, %f1530, %f1534; - sub.f32 %f1536, %f1530, %f1535; - add.f32 %f1537, %f1534, %f1536; - mul.rn.f32 %f1539, %f900, %f1535; - neg.f32 %f1540, %f1539; - fma.rn.f32 %f1541, %f900, %f1535, %f1540; - fma.rn.f32 %f1542, %f900, %f1537, %f1541; - fma.rn.f32 %f1544, %f3354, %f1535, %f1542; - add.rn.f32 %f1545, %f1539, %f1544; - neg.f32 %f1546, %f1545; - add.rn.f32 %f1547, %f1539, %f1546; - add.rn.f32 %f1548, %f1547, %f1544; - mov.b32 %r182, %f1545; - setp.eq.s32 %p141, %r182, 1118925336; - add.s32 %r183, %r182, -1; - mov.b32 %f1549, %r183; - add.f32 %f1550, %f1548, 0f37000000; - selp.f32 %f1551, %f1549, %f1545, %p141; - selp.f32 %f290, %f1550, %f1548, %p141; - mul.f32 %f1552, %f1551, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1553, %f1552; - fma.rn.f32 %f1554, %f1553, %f3126, %f1551; - fma.rn.f32 %f1555, %f1553, %f3127, %f1554; - mul.f32 %f1556, %f1555, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1557, %f1556; - add.f32 %f1558, %f1553, 0f00000000; - ex2.approx.f32 %f1559, %f1558; - mul.f32 %f1560, %f1557, %f1559; - setp.lt.f32 %p142, %f1551, 0fC2D20000; - selp.f32 %f1561, 0f00000000, %f1560, %p142; - setp.gt.f32 %p143, %f1551, 0f42D20000; - selp.f32 %f3339, 0f7F800000, %f1561, %p143; - setp.eq.f32 %p144, %f3339, 0f7F800000; - @%p144 bra BB6_85; - - fma.rn.f32 %f3339, %f3339, %f290, %f3339; - -BB6_85: - setp.eq.f32 %p363, %f147, 0f00000000; - setp.geu.f32 %p362, %f147, 0f00000000; - mov.b32 %r184, %f3339; - xor.b32 %r185, %r184, -2147483648; - mov.b32 %f1562, %r185; - selp.f32 %f294, %f1562, %f3339, %p3; - selp.f32 %f3340, %f155, %f294, %p363; - @%p362 bra BB6_87; - - cvt.rzi.f32.f32 %f1564, %f900; - setp.neu.f32 %p146, %f1564, 0f40000000; - selp.f32 %f3340, 0f7FFFFFFF, %f294, %p146; - -BB6_87: - abs.f32 %f3142, %f147; - setp.eq.f32 %p367, %f147, 0f3F800000; - add.f32 %f3141, %f3142, 0f40000000; - mov.b32 %r300, %f3141; - setp.gt.s32 %p366, %r300, 2139095039; - setp.neu.f32 %p365, %f3142, 0f7F800000; - selp.f32 %f3140, 0fFF800000, 0f7F800000, %p3; - setp.gtu.f32 %p364, %f3142, 0f7F800000; - add.f32 %f3139, %f147, 0f40000000; - mov.f32 %f3138, 0f3DAAAABD; - mov.f32 %f3137, 0f3C4CAF63; - mov.f32 %f3136, 0f3B18F0FE; - mov.f32 %f3135, 0fB5BFBE8E; - mov.f32 %f3134, 0fBF317200; - selp.f32 %f1568, %f3139, %f3340, %p364; - selp.f32 %f1570, %f1568, %f3140, %p365; - selp.f32 %f1571, %f1570, %f3340, %p366; - mul.f32 %f1572, %f1571, 0fBF000000; - selp.f32 %f1573, 0fBF000000, %f1572, %p367; - mul.f32 %f1574, %f1573, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1575, %f1574; - fma.rn.f32 %f1577, %f1575, %f3134, %f1573; - fma.rn.f32 %f1579, %f1575, %f3135, %f1577; - mul.f32 %f1580, %f1579, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1581, %f1580; - add.f32 %f1582, %f1575, 0f00000000; - ex2.approx.f32 %f1583, %f1582; - mul.f32 %f1584, %f1581, %f1583; - setp.lt.f32 %p151, %f1573, 0fC2D20000; - selp.f32 %f1585, 0f00000000, %f1584, %p151; - setp.gt.f32 %p152, %f1573, 0f42D20000; - selp.f32 %f298, 0f7F800000, %f1585, %p152; - // inline asm - rcp.approx.ftz.f32 %f1565,%f159; - // inline asm - mul.f32 %f1586, %f1565, %f160; - mul.f32 %f1587, %f1586, %f1586; - fma.rn.f32 %f1590, %f3136, %f1587, %f3137; - fma.rn.f32 %f1592, %f1590, %f1587, %f3138; - mul.rn.f32 %f1593, %f1592, %f1587; - mul.rn.f32 %f1594, %f1593, %f1586; - sub.f32 %f1595, %f158, %f1586; - neg.f32 %f1596, %f1586; - add.f32 %f1597, %f1595, %f1595; - fma.rn.f32 %f1598, %f1596, %f158, %f1597; - mul.rn.f32 %f1599, %f1565, %f1598; - add.f32 %f1600, %f1594, %f1586; - sub.f32 %f1601, %f1586, %f1600; - add.f32 %f1602, %f1594, %f1601; - add.f32 %f1603, %f1599, %f1602; + fma.rn.f32 %f1460, %f1449, %f812, %f1459; + fma.rn.f32 %f1462, %f1449, %f3032, %f1460; + mov.b32 %r633, %f1457; + shl.b32 %r634, %r633, 23; + mov.b32 %f1463, %r634; + ex2.approx.ftz.f32 %f1464, %f1462; + mul.f32 %f231, %f1464, %f1463; + div.rn.f32 %f232, %f3020, %f107; + abs.f32 %f233, %f232; + setp.lt.f32 %p499, %f233, 0f00800000; + mul.f32 %f1465, %f233, 0f4B800000; + selp.f32 %f1466, %f1465, %f233, %p499; + selp.f32 %f1467, 0fC3170000, 0fC2FE0000, %p499; + mov.b32 %r635, %f1466; + and.b32 %r636, %r635, 8388607; + or.b32 %r637, %r636, 1065353216; + mov.b32 %f1468, %r637; + shr.u32 %r638, %r635, 23; + cvt.rn.f32.u32 %f1469, %r638; + add.f32 %f1470, %f1467, %f1469; + setp.gt.f32 %p500, %f1468, 0f3FB504F3; + mul.f32 %f1471, %f1468, 0f3F000000; + add.f32 %f1472, %f1470, 0f3F800000; + selp.f32 %f1473, %f1472, %f1470, %p500; + selp.f32 %f1474, %f1471, %f1468, %p500; + add.f32 %f1475, %f1474, 0fBF800000; + add.f32 %f1476, %f1474, 0f3F800000; + rcp.approx.ftz.f32 %f1477, %f1476; + add.f32 %f1478, %f1475, %f1475; + mul.f32 %f1480, %f1478, %f1477; + mul.f32 %f1481, %f1480, %f1480; + fma.rn.f32 %f1484, %f3022, %f1481, %f3023; + fma.rn.f32 %f1486, %f1484, %f1481, %f3024; + mul.rn.f32 %f1487, %f1486, %f1481; + mul.rn.f32 %f1488, %f1487, %f1480; + sub.f32 %f1489, %f1475, %f1480; + add.f32 %f1490, %f1489, %f1489; + neg.f32 %f1491, %f1480; + fma.rn.f32 %f1492, %f1491, %f1475, %f1490; + mul.rn.f32 %f1493, %f1477, %f1492; + add.f32 %f1494, %f1488, %f1480; + sub.f32 %f1495, %f1480, %f1494; + add.f32 %f1496, %f1488, %f1495; + add.f32 %f1497, %f1493, %f1496; + add.f32 %f1498, %f1494, %f1497; + sub.f32 %f1499, %f1494, %f1498; + add.f32 %f1500, %f1497, %f1499; + mul.rn.f32 %f1502, %f1473, %f3025; + mul.rn.f32 %f1504, %f1473, %f3026; + add.f32 %f1505, %f1502, %f1498; + sub.f32 %f1506, %f1502, %f1505; + add.f32 %f1507, %f1498, %f1506; + add.f32 %f1508, %f1500, %f1507; + add.f32 %f1509, %f1504, %f1508; + add.f32 %f1510, %f1505, %f1509; + sub.f32 %f1511, %f1505, %f1510; + add.f32 %f1512, %f1509, %f1511; + mul.rn.f32 %f1513, %f613, %f1510; + neg.f32 %f1514, %f1513; + fma.rn.f32 %f1515, %f613, %f1510, %f1514; + fma.rn.f32 %f1516, %f613, %f1512, %f1515; + fma.rn.f32 %f1518, %f3279, %f1510, %f1516; + add.rn.f32 %f1519, %f1513, %f1518; + neg.f32 %f1520, %f1519; + add.rn.f32 %f1521, %f1513, %f1520; + add.rn.f32 %f1522, %f1521, %f1518; + mov.b32 %r639, %f1519; + setp.eq.s32 %p501, %r639, 1118925336; + add.s32 %r640, %r639, -1; + mov.b32 %f1523, %r640; + add.f32 %f1524, %f1522, 0f37000000; + selp.f32 %f234, %f1524, %f1522, %p501; + selp.f32 %f1525, %f1523, %f1519, %p501; + mul.rn.f32 %f1526, %f1525, %f812; + cvt.rzi.f32.f32 %f1527, %f1526; + abs.f32 %f1528, %f1527; + setp.gt.f32 %p502, %f1528, 0f42FC0000; + mov.b32 %r641, %f1527; + and.b32 %r642, %r641, -2147483648; + or.b32 %r643, %r642, 1123811328; + mov.b32 %f1529, %r643; + selp.f32 %f1530, %f1529, %f1527, %p502; + fma.rn.f32 %f1532, %f1530, %f3027, %f1525; + fma.rn.f32 %f1534, %f1530, %f3028, %f1532; + mul.f32 %f1535, %f1534, 0f3FB8AA3B; + add.f32 %f1536, %f1530, 0f4B40007F; + mov.b32 %r644, %f1536; + shl.b32 %r645, %r644, 23; + mov.b32 %f1537, %r645; + ex2.approx.ftz.f32 %f1538, %f1535; + mul.f32 %f235, %f1538, %f1537; + setp.eq.f32 %p503, %f235, 0f7F800000; + mov.f32 %f3269, 0f7F800000; + @%p503 bra $L__BB6_285; + + fma.rn.f32 %f3269, %f235, %f234, %f235; + +$L__BB6_285: + setp.lt.f32 %p504, %f232, 0f00000000; + and.pred %p40, %p504, %p301; + setp.eq.f32 %p506, %f232, 0f00000000; + @%p506 bra $L__BB6_289; + bra.uni $L__BB6_286; + +$L__BB6_289: + add.f32 %f1543, %f232, %f232; + selp.f32 %f3271, %f1543, 0f00000000, %p301; + bra.uni $L__BB6_290; + +$L__BB6_286: + mov.b32 %r646, %f3269; + xor.b32 %r647, %r646, -2147483648; + mov.b32 %f1539, %r647; + selp.f32 %f3271, %f1539, %f3269, %p40; + setp.geu.f32 %p507, %f232, 0f00000000; + @%p507 bra $L__BB6_290; + + cvt.rzi.f32.f32 %f1541, %f613; + setp.eq.f32 %p508, %f1541, 0f40000000; + @%p508 bra $L__BB6_290; + + mov.f32 %f3271, 0f7FFFFFFF; + +$L__BB6_290: + abs.f32 %f3168, %f232; + add.f32 %f1544, %f3168, 0f40000000; + mov.b32 %r648, %f1544; + setp.lt.s32 %p510, %r648, 2139095040; + @%p510 bra $L__BB6_295; + + abs.f32 %f3169, %f232; + setp.gtu.f32 %p511, %f3169, 0f7F800000; + @%p511 bra $L__BB6_294; + bra.uni $L__BB6_292; + +$L__BB6_294: + add.f32 %f3271, %f232, 0f40000000; + bra.uni $L__BB6_295; + +$L__BB6_292: + abs.f32 %f3170, %f232; + setp.neu.f32 %p512, %f3170, 0f7F800000; + @%p512 bra $L__BB6_295; + + selp.f32 %f3271, 0fFF800000, 0f7F800000, %p40; + +$L__BB6_295: + cvt.f64.f32 %fd1049, %f107; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1353}, %fd1049; + } + setp.lt.s32 %p1286, %r1353, 0; + mov.f64 %fd1048, 0d4014000000000000; + mov.f32 %f3039, 0f32A57060; + mov.f32 %f3038, 0f4B400001; + mov.f32 %f3037, 0f437C0000; + mov.f32 %f3036, 0f3BBB989D; + cvt.rn.f32.s32 %f3035, %r1375; + sub.f32 %f3034, %f3035, %f3294; + mov.f32 %f3033, 0f3F000000; + mul.f32 %f1545, %f3271, 0fBF000000; + setp.eq.f32 %p514, %f232, 0f3F800000; + selp.f32 %f1546, 0fBF000000, %f1545, %p514; + fma.rn.f32 %f1549, %f1546, %f3036, %f3033; + cvt.sat.f32.f32 %f1552, %f1549; + fma.rm.f32 %f1554, %f1552, %f3037, %f3038; + add.f32 %f1555, %f1554, 0fCB40007F; + neg.f32 %f1556, %f1555; + fma.rn.f32 %f1557, %f1546, %f812, %f1556; + fma.rn.f32 %f1559, %f1546, %f3039, %f1557; + mov.b32 %r650, %f1554; + shl.b32 %r651, %r650, 23; + mov.b32 %f1560, %r651; + ex2.approx.ftz.f32 %f1561, %f1559; + mul.f32 %f244, %f1561, %f1560; + add.f32 %f1562, %f3034, 0f3F800000; + mul.f32 %f1563, %f1562, %f231; + mul.f32 %f1564, %f3034, %f244; + sub.f32 %f1565, %f1563, %f1564; + div.rn.f32 %f1566, %f188, %f107; + mul.f32 %f1567, %f1566, %f1565; + mul.f32 %f245, %f117, %f1567; + { // callseq 130, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd133; + .param .b64 param1; + st.param.f64 [param1+0], %fd1048; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1112, [retval0+0]; + } // callseq 130 + and.pred %p41, %p1286, %p428; + not.pred %p516, %p41; + @%p516 bra $L__BB6_297; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r652}, %fd1112; + } + xor.b32 %r653, %r652, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r654, %temp}, %fd1112; + } + mov.b64 %fd1112, {%r654, %r653}; + +$L__BB6_297: + setp.eq.f32 %p1287, %f107, 0f00000000; + @%p1287 bra $L__BB6_301; + bra.uni $L__BB6_298; + +$L__BB6_301: + setp.lt.s32 %p520, %r78, 0; + mov.u32 %r655, 0; + selp.b32 %r657, %r105, 0, %p428; + or.b32 %r658, %r657, 2146435072; + selp.b32 %r659, %r658, %r657, %p520; + mov.b64 %fd1112, {%r655, %r659}; + bra.uni $L__BB6_302; + +$L__BB6_298: + setp.gt.s32 %p518, %r105, -1; + @%p518 bra $L__BB6_302; + + mov.f64 %fd1066, 0d4014000000000000; + cvt.rzi.f64.f64 %fd759, %fd1066; + setp.eq.f64 %p519, %fd759, 0d4014000000000000; + @%p519 bra $L__BB6_302; + + mov.f64 %fd1112, 0dFFF8000000000000; + +$L__BB6_302: + add.f64 %fd175, %fd132, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r660}, %fd175; + } + and.b32 %r661, %r660, 2146435072; + setp.ne.s32 %p522, %r661, 2146435072; + mov.f64 %fd1113, %fd1112; + @%p522 bra $L__BB6_308; + + setp.gtu.f64 %p523, %fd133, 0d7FF0000000000000; + mov.f64 %fd1113, %fd175; + @%p523 bra $L__BB6_308; + + mov.f64 %fd1065, 0d4014000000000000; + and.b32 %r662, %r78, 2147483647; + setp.eq.s32 %p524, %r662, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r663, %temp}, %fd1065; + } + setp.eq.s32 %p525, %r663, 0; + and.pred %p526, %p524, %p525; + @%p526 bra $L__BB6_307; + bra.uni $L__BB6_305; + +$L__BB6_307: + setp.lt.s32 %p533, %r78, 0; + mov.u32 %r671, 0; + setp.gt.f64 %p534, %fd133, 0d3FF0000000000000; + selp.b32 %r672, 2146435072, 0, %p534; + xor.b32 %r673, %r672, 2146435072; + selp.b32 %r674, %r673, %r672, %p533; + setp.eq.f32 %p535, %f107, 0fBF800000; + selp.b32 %r675, 1072693248, %r674, %p535; + mov.b64 %fd1113, {%r671, %r675}; + bra.uni $L__BB6_308; + +$L__BB6_305: + { + .reg .b32 %temp; + mov.b64 {%r664, %temp}, %fd132; + } + and.b32 %r665, %r105, 2147483647; + setp.ne.s32 %p527, %r665, 2146435072; + setp.ne.s32 %p528, %r664, 0; + or.pred %p529, %p527, %p528; + mov.f64 %fd1113, %fd1112; + @%p529 bra $L__BB6_308; + + setp.ne.s32 %p530, %r662, 1071644672; + and.pred %p531, %p530, %p41; + setp.gt.s32 %p532, %r78, -1; + selp.b32 %r667, 2146435072, 0, %p532; + mov.u32 %r668, 0; + or.b32 %r669, %r667, -2147483648; + selp.b32 %r670, %r669, %r667, %p531; + mov.b64 %fd1113, {%r668, %r670}; + +$L__BB6_308: + cvt.f64.f32 %fd179, %f120; + { + .reg .b32 %temp; + mov.b64 {%temp, %r106}, %fd179; + } + abs.f64 %fd180, %fd179; + { // callseq 131, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd180; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1115, [retval0+0]; + } // callseq 131 + setp.lt.s32 %p536, %r106, 0; + and.pred %p42, %p536, %p149; + not.pred %p538, %p42; + @%p538 bra $L__BB6_310; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r676}, %fd1115; + } + xor.b32 %r677, %r676, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r678, %temp}, %fd1115; + } + mov.b64 %fd1115, {%r678, %r677}; + +$L__BB6_310: + setp.eq.f32 %p1288, %f107, 0f3F800000; + setp.eq.f32 %p539, %f120, 0f00000000; + selp.f64 %fd184, 0d3FF0000000000000, %fd1113, %p1288; + @%p539 bra $L__BB6_314; + bra.uni $L__BB6_311; + +$L__BB6_314: + mov.u32 %r679, 0; + selp.b32 %r680, %r106, 0, %p149; + or.b32 %r681, %r680, 2146435072; + selp.b32 %r682, %r681, %r680, %p152; + mov.b64 %fd1115, {%r679, %r682}; + bra.uni $L__BB6_315; + +$L__BB6_311: + setp.gt.s32 %p541, %r106, -1; + @%p541 bra $L__BB6_315; + + cvt.rzi.f64.f64 %fd764, %fd649; + setp.eq.f64 %p542, %fd764, 0d4008000000000000; + @%p542 bra $L__BB6_315; + + mov.f64 %fd1115, 0dFFF8000000000000; + +$L__BB6_315: + add.f64 %fd187, %fd179, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r683}, %fd187; + } + and.b32 %r684, %r683, 2146435072; + setp.ne.s32 %p545, %r684, 2146435072; + mov.f64 %fd1116, %fd1115; + @%p545 bra $L__BB6_321; + + setp.gtu.f64 %p546, %fd180, 0d7FF0000000000000; + mov.f64 %fd1116, %fd187; + @%p546 bra $L__BB6_321; + + setp.eq.s32 %p547, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r685, %temp}, %fd649; + } + setp.eq.s32 %p548, %r685, 0; + and.pred %p549, %p547, %p548; + @%p549 bra $L__BB6_320; + bra.uni $L__BB6_318; + +$L__BB6_320: + mov.u32 %r690, 0; + setp.gt.f64 %p556, %fd180, 0d3FF0000000000000; + selp.b32 %r691, 2146435072, 0, %p556; + xor.b32 %r692, %r691, 2146435072; + selp.b32 %r693, %r692, %r691, %p152; + setp.eq.f32 %p557, %f120, 0fBF800000; + selp.b32 %r694, 1072693248, %r693, %p557; + mov.b64 %fd1116, {%r690, %r694}; + bra.uni $L__BB6_321; + +$L__BB6_318: + { + .reg .b32 %temp; + mov.b64 {%r686, %temp}, %fd179; + } + and.b32 %r687, %r106, 2147483647; + setp.ne.s32 %p550, %r687, 2146435072; + setp.ne.s32 %p551, %r686, 0; + or.pred %p552, %p550, %p551; + mov.f64 %fd1116, %fd1115; + @%p552 bra $L__BB6_321; + + setp.ne.s32 %p553, %r58, 1071644672; + and.pred %p554, %p553, %p42; + selp.b32 %r688, %r63, %r62, %p554; + mov.u32 %r689, 0; + mov.b64 %fd1116, {%r689, %r688}; + +$L__BB6_321: + setp.eq.f32 %p558, %f120, 0f3F800000; + selp.f64 %fd767, 0d3FF0000000000000, %fd1116, %p558; + cvt.f64.f32 %fd768, %f231; + mul.f64 %fd191, %fd767, %fd768; + cvt.f64.f32 %fd192, %f126; + { + .reg .b32 %temp; + mov.b64 {%temp, %r107}, %fd192; + } + abs.f64 %fd193, %fd192; + { // callseq 132, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd193; + .param .b64 param1; + st.param.f64 [param1+0], %fd649; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1118, [retval0+0]; + } // callseq 132 + setp.lt.s32 %p559, %r107, 0; + and.pred %p43, %p559, %p149; + not.pred %p561, %p43; + @%p561 bra $L__BB6_323; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r695}, %fd1118; + } + xor.b32 %r696, %r695, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r697, %temp}, %fd1118; + } + mov.b64 %fd1118, {%r697, %r696}; + +$L__BB6_323: + setp.eq.f32 %p562, %f126, 0f00000000; + @%p562 bra $L__BB6_327; + bra.uni $L__BB6_324; + +$L__BB6_327: + mov.u32 %r698, 0; + selp.b32 %r699, %r107, 0, %p149; + or.b32 %r700, %r699, 2146435072; + selp.b32 %r701, %r700, %r699, %p152; + mov.b64 %fd1118, {%r698, %r701}; + bra.uni $L__BB6_328; + +$L__BB6_324: + setp.gt.s32 %p563, %r107, -1; + @%p563 bra $L__BB6_328; + + cvt.rzi.f64.f64 %fd771, %fd649; + setp.eq.f64 %p564, %fd771, 0d4008000000000000; + @%p564 bra $L__BB6_328; + + mov.f64 %fd1118, 0dFFF8000000000000; + +$L__BB6_328: + add.f64 %fd199, %fd192, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r702}, %fd199; + } + and.b32 %r703, %r702, 2146435072; + setp.ne.s32 %p567, %r703, 2146435072; + mov.f64 %fd1119, %fd1118; + @%p567 bra $L__BB6_334; + + setp.gtu.f64 %p568, %fd193, 0d7FF0000000000000; + mov.f64 %fd1119, %fd199; + @%p568 bra $L__BB6_334; + + setp.eq.s32 %p569, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r704, %temp}, %fd649; + } + setp.eq.s32 %p570, %r704, 0; + and.pred %p571, %p569, %p570; + @%p571 bra $L__BB6_333; + bra.uni $L__BB6_331; + +$L__BB6_333: + mov.u32 %r709, 0; + setp.gt.f64 %p578, %fd193, 0d3FF0000000000000; + selp.b32 %r710, 2146435072, 0, %p578; + xor.b32 %r711, %r710, 2146435072; + selp.b32 %r712, %r711, %r710, %p152; + setp.eq.f32 %p579, %f126, 0fBF800000; + selp.b32 %r713, 1072693248, %r712, %p579; + mov.b64 %fd1119, {%r709, %r713}; + bra.uni $L__BB6_334; + +$L__BB6_331: + { + .reg .b32 %temp; + mov.b64 {%r705, %temp}, %fd192; + } + and.b32 %r706, %r107, 2147483647; + setp.ne.s32 %p572, %r706, 2146435072; + setp.ne.s32 %p573, %r705, 0; + or.pred %p574, %p572, %p573; + mov.f64 %fd1119, %fd1118; + @%p574 bra $L__BB6_334; + + setp.ne.s32 %p575, %r58, 1071644672; + and.pred %p576, %p575, %p43; + selp.b32 %r707, %r63, %r62, %p576; + mov.u32 %r708, 0; + mov.b64 %fd1119, {%r708, %r707}; + +$L__BB6_334: + mov.f32 %f3165, 0fC0000000; + cvt.f64.f32 %fd1050, %f117; + setp.eq.f32 %p580, %f126, 0f3F800000; + selp.f64 %fd774, 0d3FF0000000000000, %fd1119, %p580; + cvt.f64.f32 %fd775, %f244; + mul.f64 %fd776, %fd774, %fd775; + sub.f64 %fd777, %fd191, %fd776; + div.rn.f64 %fd778, %fd43, %fd184; + mul.f64 %fd779, %fd778, %fd777; + mul.f64 %fd781, %fd779, %fd1050; + div.rn.f32 %f1569, %f3165, %f107; + mul.f32 %f1570, %f1569, %f245; + cvt.f64.f32 %fd782, %f1570; + sub.f64 %fd203, %fd782, %fd781; + div.rn.f32 %f246, %f52, %f104; + div.rn.f32 %f247, %f53, %f106; + not.pred %p581, %p12; + mov.f64 %fd1121, %fd66; + @%p581 bra $L__BB6_336; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r714}, %fd66; + } + xor.b32 %r715, %r714, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r716, %temp}, %fd66; + } + mov.b64 %fd1121, {%r716, %r715}; + +$L__BB6_336: + sub.f32 %f3040, %f3291, %f552; + setp.eq.f32 %p582, %f3040, 0f00000000; + @%p582 bra $L__BB6_340; + bra.uni $L__BB6_337; + +$L__BB6_340: + mov.u32 %r717, 0; + mov.b64 %fd1121, {%r717, %r84}; + bra.uni $L__BB6_341; + +$L__BB6_337: + setp.gt.s32 %p583, %r83, -1; + @%p583 bra $L__BB6_341; + + cvt.rzi.f64.f64 %fd784, %fd646; + setp.eq.f64 %p584, %fd784, 0d4000000000000000; + @%p584 bra $L__BB6_341; + + mov.f64 %fd1121, 0dFFF8000000000000; + +$L__BB6_341: + selp.f64 %fd1122, %fd1121, %fd46, %p182; + @%p25 bra $L__BB6_346; + + setp.eq.s32 %p586, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r718, %temp}, %fd646; + } + setp.eq.s32 %p587, %r718, 0; + and.pred %p588, %p586, %p587; + @%p588 bra $L__BB6_345; + bra.uni $L__BB6_343; + +$L__BB6_345: + mov.u32 %r722, 0; + mov.b64 %fd1122, {%r722, %r86}; + bra.uni $L__BB6_346; + +$L__BB6_343: + and.b32 %r719, %r83, 2147483647; + setp.ne.s32 %p589, %r719, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r720, %temp}, %fd45; + } + setp.ne.s32 %p590, %r720, 0; + or.pred %p591, %p589, %p590; + mov.f64 %fd1122, %fd1121; + @%p591 bra $L__BB6_346; + + mov.u32 %r721, 0; + mov.b64 %fd1122, {%r721, %r88}; + +$L__BB6_346: + cvt.f64.f32 %fd212, %f55; + not.pred %p592, %p13; + mov.f64 %fd1124, %fd67; + @%p592 bra $L__BB6_348; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r723}, %fd67; + } + xor.b32 %r724, %r723, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r725, %temp}, %fd67; + } + mov.b64 %fd1124, {%r725, %r724}; + +$L__BB6_348: + @%p582 bra $L__BB6_352; + bra.uni $L__BB6_349; + +$L__BB6_352: + mov.u32 %r726, 0; + selp.b32 %r728, %r83, 0, %p149; + or.b32 %r729, %r728, 2146435072; + selp.b32 %r730, %r729, %r728, %p152; + mov.b64 %fd1124, {%r726, %r730}; + bra.uni $L__BB6_353; + +$L__BB6_349: + setp.gt.s32 %p594, %r83, -1; + @%p594 bra $L__BB6_353; + + cvt.rzi.f64.f64 %fd788, %fd649; + setp.eq.f64 %p595, %fd788, 0d4008000000000000; + @%p595 bra $L__BB6_353; + + mov.f64 %fd1124, 0dFFF8000000000000; + +$L__BB6_353: + selp.f64 %fd1125, %fd1124, %fd47, %p186; + @%p26 bra $L__BB6_358; + + setp.eq.s32 %p599, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r731, %temp}, %fd649; + } + setp.eq.s32 %p600, %r731, 0; + and.pred %p601, %p599, %p600; + @%p601 bra $L__BB6_357; + bra.uni $L__BB6_355; + +$L__BB6_357: + mov.u32 %r738, 0; + mov.b64 %fd1125, {%r738, %r90}; + bra.uni $L__BB6_358; + +$L__BB6_355: + and.b32 %r732, %r83, 2147483647; + setp.ne.s32 %p602, %r732, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r733, %temp}, %fd45; + } + setp.ne.s32 %p603, %r733, 0; + or.pred %p604, %p602, %p603; + mov.f64 %fd1125, %fd1124; + @%p604 bra $L__BB6_358; + + setp.ne.s32 %p605, %r58, 1071644672; + and.pred %p606, %p605, %p13; + selp.b32 %r736, %r63, %r62, %p606; + mov.u32 %r737, 0; + mov.b64 %fd1125, {%r737, %r736}; + +$L__BB6_358: + not.pred %p607, %p14; + mov.f64 %fd1127, %fd68; + @%p607 bra $L__BB6_360; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r739}, %fd68; + } + xor.b32 %r740, %r739, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r741, %temp}, %fd68; + } + mov.b64 %fd1127, {%r741, %r740}; + +$L__BB6_360: + setp.eq.f32 %p608, %f553, 0f00000000; + @%p608 bra $L__BB6_364; + bra.uni $L__BB6_361; + +$L__BB6_364: + mov.u32 %r742, 0; + mov.b64 %fd1127, {%r742, %r91}; + bra.uni $L__BB6_365; + +$L__BB6_361: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1354}, %fd645; + } + setp.gt.s32 %p609, %r1354, -1; + @%p609 bra $L__BB6_365; + + cvt.rzi.f64.f64 %fd792, %fd651; + setp.eq.f64 %p610, %fd792, 0d4010000000000000; + @%p610 bra $L__BB6_365; + + mov.f64 %fd1127, 0dFFF8000000000000; + +$L__BB6_365: + selp.f64 %fd1128, %fd1127, %fd36, %p189; + @%p27 bra $L__BB6_370; + + setp.eq.s32 %p612, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r743, %temp}, %fd651; + } + setp.eq.s32 %p613, %r743, 0; + and.pred %p614, %p612, %p613; + @%p614 bra $L__BB6_369; + bra.uni $L__BB6_367; + +$L__BB6_369: + mov.u32 %r747, 0; + mov.b64 %fd1128, {%r747, %r94}; + bra.uni $L__BB6_370; + +$L__BB6_367: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1355}, %fd645; + } + and.b32 %r744, %r1355, 2147483647; + setp.ne.s32 %p615, %r744, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r745, %temp}, %fd645; + } + setp.ne.s32 %p616, %r745, 0; + or.pred %p617, %p615, %p616; + mov.f64 %fd1128, %fd1127; + @%p617 bra $L__BB6_370; + + mov.u32 %r746, 0; + mov.b64 %fd1128, {%r746, %r97}; + +$L__BB6_370: + sub.f32 %f3041, %f3291, %f552; + setp.eq.f32 %p618, %f553, 0f3F800000; + selp.f64 %fd796, 0d3FF0000000000000, %fd1128, %p618; + setp.eq.f32 %p619, %f3041, 0f3F800000; + selp.f64 %fd797, 0d3FF0000000000000, %fd1125, %p619; + mul.f64 %fd798, %fd797, %fd35; + div.rn.f64 %fd799, %fd798, %fd796; + selp.f64 %fd800, 0d3FF0000000000000, %fd1122, %p619; + mul.f64 %fd801, %fd800, %fd34; + div.rn.f64 %fd802, %fd801, %fd212; + add.f64 %fd803, %fd802, %fd44; + add.f64 %fd804, %fd803, %fd799; + cvt.rn.f32.f64 %f248, %fd804; + mul.f32 %f249, %f246, %f248; + not.pred %p620, %p15; + mov.f64 %fd1130, %fd69; + @%p620 bra $L__BB6_372; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r748}, %fd69; + } + xor.b32 %r749, %r748, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r750, %temp}, %fd69; + } + mov.b64 %fd1130, {%r750, %r749}; + +$L__BB6_372: + add.f32 %f3042, %f3291, %f552; + setp.eq.f32 %p621, %f3042, 0f00000000; + @%p621 bra $L__BB6_376; + bra.uni $L__BB6_373; + +$L__BB6_376: + mov.u32 %r751, 0; + mov.b64 %fd1130, {%r751, %r95}; + bra.uni $L__BB6_377; + +$L__BB6_373: + setp.gt.s32 %p622, %r93, -1; + @%p622 bra $L__BB6_377; + + cvt.rzi.f64.f64 %fd806, %fd646; + setp.eq.f64 %p623, %fd806, 0d4000000000000000; + @%p623 bra $L__BB6_377; + + mov.f64 %fd1130, 0dFFF8000000000000; + +$L__BB6_377: + selp.f64 %fd1131, %fd1130, %fd50, %p194; + @%p28 bra $L__BB6_382; + + setp.eq.s32 %p625, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r752, %temp}, %fd646; + } + setp.eq.s32 %p626, %r752, 0; + and.pred %p627, %p625, %p626; + @%p627 bra $L__BB6_381; + bra.uni $L__BB6_379; + +$L__BB6_381: + mov.u32 %r756, 0; + mov.b64 %fd1131, {%r756, %r98}; + bra.uni $L__BB6_382; + +$L__BB6_379: + and.b32 %r753, %r93, 2147483647; + setp.ne.s32 %p628, %r753, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r754, %temp}, %fd49; + } + setp.ne.s32 %p629, %r754, 0; + or.pred %p630, %p628, %p629; + mov.f64 %fd1131, %fd1130; + @%p630 bra $L__BB6_382; + + mov.u32 %r755, 0; + mov.b64 %fd1131, {%r755, %r100}; + +$L__BB6_382: + add.f32 %f3043, %f3291, %f552; + setp.eq.f32 %p631, %f3043, 0f3F800000; + selp.f64 %fd809, 0d3FF0000000000000, %fd1131, %p631; + mul.f64 %fd810, %fd809, %fd37; + div.rn.f64 %fd237, %fd810, %fd212; + not.pred %p632, %p16; + mov.f64 %fd1133, %fd70; + @%p632 bra $L__BB6_384; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r757}, %fd70; + } + xor.b32 %r758, %r757, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r759, %temp}, %fd70; + } + mov.b64 %fd1133, {%r759, %r758}; + +$L__BB6_384: + @%p621 bra $L__BB6_388; + bra.uni $L__BB6_385; + +$L__BB6_388: + mov.u32 %r760, 0; + selp.b32 %r762, %r93, 0, %p149; + or.b32 %r763, %r762, 2146435072; + selp.b32 %r764, %r763, %r762, %p152; + mov.b64 %fd1133, {%r760, %r764}; + bra.uni $L__BB6_389; + +$L__BB6_385: + setp.gt.s32 %p634, %r93, -1; + @%p634 bra $L__BB6_389; + + cvt.rzi.f64.f64 %fd812, %fd649; + setp.eq.f64 %p635, %fd812, 0d4008000000000000; + @%p635 bra $L__BB6_389; + + mov.f64 %fd1133, 0dFFF8000000000000; + +$L__BB6_389: + selp.f64 %fd1134, %fd1133, %fd51, %p199; + @%p29 bra $L__BB6_394; + + setp.eq.s32 %p639, %r58, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r765, %temp}, %fd649; + } + setp.eq.s32 %p640, %r765, 0; + and.pred %p641, %p639, %p640; + @%p641 bra $L__BB6_393; + bra.uni $L__BB6_391; + +$L__BB6_393: + mov.u32 %r772, 0; + mov.b64 %fd1134, {%r772, %r101}; + bra.uni $L__BB6_394; + +$L__BB6_391: + and.b32 %r766, %r93, 2147483647; + setp.ne.s32 %p642, %r766, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r767, %temp}, %fd49; + } + setp.ne.s32 %p643, %r767, 0; + or.pred %p644, %p642, %p643; + mov.f64 %fd1134, %fd1133; + @%p644 bra $L__BB6_394; + + setp.ne.s32 %p645, %r58, 1071644672; + and.pred %p646, %p645, %p16; + selp.b32 %r770, %r63, %r62, %p646; + mov.u32 %r771, 0; + mov.b64 %fd1134, {%r771, %r770}; + +$L__BB6_394: + mov.f64 %fd1136, %fd68; + @%p607 bra $L__BB6_396; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r773}, %fd68; + } + xor.b32 %r774, %r773, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r775, %temp}, %fd68; + } + mov.b64 %fd1136, {%r775, %r774}; + +$L__BB6_396: + @%p608 bra $L__BB6_400; + bra.uni $L__BB6_397; + +$L__BB6_400: + mov.u32 %r776, 0; + mov.b64 %fd1136, {%r776, %r91}; + bra.uni $L__BB6_401; + +$L__BB6_397: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1356}, %fd645; + } + setp.gt.s32 %p649, %r1356, -1; + @%p649 bra $L__BB6_401; + + cvt.rzi.f64.f64 %fd816, %fd651; + setp.eq.f64 %p650, %fd816, 0d4010000000000000; + @%p650 bra $L__BB6_401; + + mov.f64 %fd1136, 0dFFF8000000000000; + +$L__BB6_401: + selp.f64 %fd1137, %fd1136, %fd36, %p189; + @%p27 bra $L__BB6_406; + + setp.eq.s32 %p652, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r777, %temp}, %fd651; + } + setp.eq.s32 %p653, %r777, 0; + and.pred %p654, %p652, %p653; + @%p654 bra $L__BB6_405; + bra.uni $L__BB6_403; + +$L__BB6_405: + mov.u32 %r781, 0; + mov.b64 %fd1137, {%r781, %r94}; + bra.uni $L__BB6_406; + +$L__BB6_403: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1357}, %fd645; + } + and.b32 %r778, %r1357, 2147483647; + setp.ne.s32 %p655, %r778, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r779, %temp}, %fd645; + } + setp.ne.s32 %p656, %r779, 0; + or.pred %p657, %p655, %p656; + mov.f64 %fd1137, %fd1136; + @%p657 bra $L__BB6_406; + + mov.u32 %r780, 0; + mov.b64 %fd1137, {%r780, %r97}; + +$L__BB6_406: + mov.f32 %f3051, 0f3FC00000; + mov.f32 %f3050, 0f3102E308; + mov.f32 %f3049, 0fBF317218; + mov.f32 %f3048, 0f35BFBE8E; + mov.f32 %f3047, 0f3F317200; + mov.f32 %f3046, 0f3DAAAABD; + mov.f32 %f3045, 0f3C4CAF63; + mov.f32 %f3044, 0f3B18F0FE; + selp.f64 %fd820, 0d3FF0000000000000, %fd1137, %p618; + selp.f64 %fd821, 0d3FF0000000000000, %fd1134, %p631; + mul.f64 %fd822, %fd821, %fd38; + div.rn.f64 %fd823, %fd822, %fd820; + add.f64 %fd824, %fd237, %fd48; + add.f64 %fd825, %fd824, %fd823; + cvt.rn.f32.f64 %f250, %fd825; + mul.f32 %f251, %f247, %f250; + mul.f32 %f1572, %f245, %f251; + fma.rn.f32 %f252, %f217, %f249, %f1572; + abs.f32 %f253, %f102; + setp.lt.f32 %p660, %f253, 0f00800000; + mul.f32 %f1573, %f253, 0f4B800000; + selp.f32 %f1574, %f1573, %f253, %p660; + selp.f32 %f1575, 0fC3170000, 0fC2FE0000, %p660; + mov.b32 %r782, %f1574; + and.b32 %r783, %r782, 8388607; + or.b32 %r784, %r783, 1065353216; + mov.b32 %f1576, %r784; + shr.u32 %r785, %r782, 23; + cvt.rn.f32.u32 %f1577, %r785; + add.f32 %f1578, %f1575, %f1577; + setp.gt.f32 %p661, %f1576, 0f3FB504F3; + mul.f32 %f1579, %f1576, 0f3F000000; + add.f32 %f1580, %f1578, 0f3F800000; + selp.f32 %f1581, %f1580, %f1578, %p661; + selp.f32 %f1582, %f1579, %f1576, %p661; + add.f32 %f1583, %f1582, 0fBF800000; + add.f32 %f1584, %f1582, 0f3F800000; + rcp.approx.ftz.f32 %f1585, %f1584; + add.f32 %f1586, %f1583, %f1583; + mul.f32 %f1587, %f1586, %f1585; + mul.f32 %f1588, %f1587, %f1587; + fma.rn.f32 %f1591, %f3044, %f1588, %f3045; + fma.rn.f32 %f1593, %f1591, %f1588, %f3046; + mul.rn.f32 %f1594, %f1593, %f1588; + mul.rn.f32 %f1595, %f1594, %f1587; + sub.f32 %f1596, %f1583, %f1587; + add.f32 %f1597, %f1596, %f1596; + neg.f32 %f1598, %f1587; + fma.rn.f32 %f1599, %f1598, %f1583, %f1597; + mul.rn.f32 %f1600, %f1585, %f1599; + add.f32 %f1601, %f1595, %f1587; + sub.f32 %f1602, %f1587, %f1601; + add.f32 %f1603, %f1595, %f1602; add.f32 %f1604, %f1600, %f1603; - sub.f32 %f1605, %f1600, %f1604; - add.f32 %f1606, %f1603, %f1605; - add.f32 %f1607, %f161, %f1604; - sub.f32 %f1608, %f161, %f1607; - add.f32 %f1609, %f1604, %f1608; - add.f32 %f1610, %f1606, %f1609; - add.f32 %f1611, %f162, %f1610; - add.f32 %f1612, %f1607, %f1611; - sub.f32 %f1613, %f1607, %f1612; - add.f32 %f1614, %f1611, %f1613; - mul.rn.f32 %f1616, %f900, %f1612; - neg.f32 %f1617, %f1616; - fma.rn.f32 %f1618, %f900, %f1612, %f1617; - fma.rn.f32 %f1619, %f900, %f1614, %f1618; - fma.rn.f32 %f1621, %f3354, %f1612, %f1619; - add.rn.f32 %f1622, %f1616, %f1621; - neg.f32 %f1623, %f1622; - add.rn.f32 %f1624, %f1616, %f1623; - add.rn.f32 %f1625, %f1624, %f1621; - mov.b32 %r186, %f1622; - setp.eq.s32 %p153, %r186, 1118925336; - add.s32 %r187, %r186, -1; - mov.b32 %f1626, %r187; - add.f32 %f1627, %f1625, 0f37000000; - selp.f32 %f1628, %f1626, %f1622, %p153; - selp.f32 %f299, %f1627, %f1625, %p153; - mul.f32 %f1629, %f1628, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1630, %f1629; - fma.rn.f32 %f1631, %f1630, %f3134, %f1628; - fma.rn.f32 %f1632, %f1630, %f3135, %f1631; - mul.f32 %f1633, %f1632, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1634, %f1633; - add.f32 %f1635, %f1630, 0f00000000; - ex2.approx.f32 %f1636, %f1635; - mul.f32 %f1637, %f1634, %f1636; - setp.lt.f32 %p154, %f1628, 0fC2D20000; - selp.f32 %f1638, 0f00000000, %f1637, %p154; - setp.gt.f32 %p155, %f1628, 0f42D20000; - selp.f32 %f3341, 0f7F800000, %f1638, %p155; - setp.eq.f32 %p156, %f3341, 0f7F800000; - @%p156 bra BB6_89; - - fma.rn.f32 %f3341, %f3341, %f299, %f3341; - -BB6_89: - setp.eq.f32 %p369, %f156, 0f00000000; - setp.geu.f32 %p368, %f156, 0f00000000; - mov.b32 %r188, %f3341; - xor.b32 %r189, %r188, -2147483648; - mov.b32 %f1639, %r189; - selp.f32 %f303, %f1639, %f3341, %p4; - selp.f32 %f3342, %f163, %f303, %p369; - @%p368 bra BB6_91; - - cvt.rzi.f32.f32 %f1641, %f900; - setp.neu.f32 %p158, %f1641, 0f40000000; - selp.f32 %f3342, 0f7FFFFFFF, %f303, %p158; - -BB6_91: - abs.f32 %f3156, %f156; - cvt.rn.f32.s32 %f3155, %r322; - sub.f32 %f3154, %f3155, %f3370; - mul.f32 %f3153, %f3154, %f3154; - mul.f32 %f3152, %f3154, %f3153; - add.f32 %f3151, %f3154, 0f3F800000; - setp.eq.f32 %p373, %f156, 0f3F800000; - add.f32 %f3150, %f3156, 0f40000000; - mov.b32 %r301, %f3150; - setp.gt.s32 %p372, %r301, 2139095039; - setp.neu.f32 %p371, %f3156, 0f7F800000; - selp.f32 %f3149, 0fFF800000, 0f7F800000, %p4; - setp.gtu.f32 %p370, %f3156, 0f7F800000; - add.f32 %f3148, %f156, 0f40000000; - mov.f32 %f3147, 0f3DAAAABD; - mov.f32 %f3146, 0f3C4CAF63; - mov.f32 %f3145, 0f3B18F0FE; - mov.f32 %f3144, 0fB5BFBE8E; - mov.f32 %f3143, 0fBF317200; - selp.f32 %f1645, %f3148, %f3342, %p370; - selp.f32 %f1647, %f1645, %f3149, %p371; - selp.f32 %f1648, %f1647, %f3342, %p372; - mul.f32 %f1649, %f1648, 0fBF000000; - selp.f32 %f1650, 0fBF000000, %f1649, %p373; - mul.f32 %f1651, %f1650, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1652, %f1651; - fma.rn.f32 %f1654, %f1652, %f3143, %f1650; - fma.rn.f32 %f1656, %f1652, %f3144, %f1654; - mul.f32 %f1657, %f1656, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1658, %f1657; - add.f32 %f1659, %f1652, 0f00000000; - ex2.approx.f32 %f1660, %f1659; - mul.f32 %f1661, %f1658, %f1660; - setp.lt.f32 %p163, %f1650, 0fC2D20000; - selp.f32 %f1662, 0f00000000, %f1661, %p163; - setp.gt.f32 %p164, %f1650, 0f42D20000; - selp.f32 %f1663, 0f7F800000, %f1662, %p164; - mul.f32 %f1664, %f3154, %f1663; - mul.f32 %f1665, %f3151, %f298; - sub.f32 %f1666, %f1665, %f1664; - mul.f32 %f1667, %f166, %f1666; - mul.f32 %f307, %f230, %f1667; - mul.f32 %f1668, %f167, %f307; - mul.f32 %f1669, %f1663, %f3152; - mul.f32 %f1670, %f298, %f197; - sub.f32 %f1671, %f1670, %f1669; - mul.f32 %f1672, %f196, %f1671; - mul.f32 %f1673, %f230, %f1672; - sub.f32 %f308, %f1668, %f1673; - // inline asm - rcp.approx.ftz.f32 %f1642,%f1302; - // inline asm - mul.f32 %f1674, %f1642, %f255; - mul.f32 %f1675, %f1674, %f1674; - fma.rn.f32 %f1678, %f3145, %f1675, %f3146; - fma.rn.f32 %f1680, %f1678, %f1675, %f3147; - mul.rn.f32 %f1681, %f1680, %f1675; - mul.rn.f32 %f1682, %f1681, %f1674; - sub.f32 %f1683, %f253, %f1674; - neg.f32 %f1684, %f1674; - add.f32 %f1685, %f1683, %f1683; - fma.rn.f32 %f1686, %f1684, %f253, %f1685; - mul.rn.f32 %f1687, %f1642, %f1686; - add.f32 %f1688, %f1682, %f1674; - sub.f32 %f1689, %f1674, %f1688; - add.f32 %f1690, %f1682, %f1689; - add.f32 %f1691, %f1687, %f1690; - add.f32 %f1692, %f1688, %f1691; - sub.f32 %f1693, %f1688, %f1692; - add.f32 %f1694, %f1691, %f1693; - add.f32 %f1695, %f256, %f1692; - sub.f32 %f1696, %f256, %f1695; - add.f32 %f1697, %f1692, %f1696; - add.f32 %f1698, %f1694, %f1697; - add.f32 %f1699, %f257, %f1698; + add.f32 %f1605, %f1601, %f1604; + sub.f32 %f1606, %f1601, %f1605; + add.f32 %f1607, %f1604, %f1606; + mul.rn.f32 %f1609, %f1581, %f3047; + mul.rn.f32 %f1611, %f1581, %f3048; + add.f32 %f1612, %f1609, %f1605; + sub.f32 %f1613, %f1609, %f1612; + add.f32 %f1614, %f1605, %f1613; + add.f32 %f1615, %f1607, %f1614; + add.f32 %f1616, %f1611, %f1615; + add.f32 %f1617, %f1612, %f1616; + sub.f32 %f1618, %f1612, %f1617; + add.f32 %f1619, %f1616, %f1618; + mul.rn.f32 %f1621, %f3051, %f1617; + neg.f32 %f1622, %f1621; + fma.rn.f32 %f1623, %f3051, %f1617, %f1622; + fma.rn.f32 %f1624, %f3051, %f1619, %f1623; + fma.rn.f32 %f1626, %f3279, %f1617, %f1624; + add.rn.f32 %f1627, %f1621, %f1626; + neg.f32 %f1628, %f1627; + add.rn.f32 %f1629, %f1621, %f1628; + add.rn.f32 %f1630, %f1629, %f1626; + mov.b32 %r786, %f1627; + setp.eq.s32 %p662, %r786, 1118925336; + add.s32 %r787, %r786, -1; + mov.b32 %f1631, %r787; + add.f32 %f1632, %f1630, 0f37000000; + selp.f32 %f254, %f1632, %f1630, %p662; + selp.f32 %f1633, %f1631, %f1627, %p662; + mul.rn.f32 %f1635, %f1633, %f812; + cvt.rzi.f32.f32 %f1636, %f1635; + abs.f32 %f1637, %f1636; + setp.gt.f32 %p663, %f1637, 0f42FC0000; + mov.b32 %r788, %f1636; + and.b32 %r789, %r788, -2147483648; + or.b32 %r790, %r789, 1123811328; + mov.b32 %f1638, %r790; + selp.f32 %f1639, %f1638, %f1636, %p663; + fma.rn.f32 %f1641, %f1639, %f3049, %f1633; + fma.rn.f32 %f1643, %f1639, %f3050, %f1641; + mul.f32 %f1644, %f1643, 0f3FB8AA3B; + add.f32 %f1645, %f1639, 0f4B40007F; + mov.b32 %r791, %f1645; + shl.b32 %r792, %r791, 23; + mov.b32 %f1646, %r792; + ex2.approx.ftz.f32 %f1647, %f1644; + mul.f32 %f255, %f1647, %f1646; + setp.eq.f32 %p664, %f255, 0f7F800000; + mov.f32 %f3272, 0f7F800000; + @%p664 bra $L__BB6_408; + + fma.rn.f32 %f3272, %f255, %f254, %f255; + +$L__BB6_408: + mov.f32 %f3057, 0f3F400000; + cvt.rzi.f32.f32 %f3056, %f3057; + add.f32 %f3055, %f3056, %f3056; + mov.f32 %f3054, 0f3FC00000; + sub.f32 %f3053, %f3054, %f3055; + abs.f32 %f3052, %f3053; + setp.lt.f32 %p665, %f102, 0f00000000; + setp.eq.f32 %p666, %f3052, 0f3F800000; + and.pred %p44, %p665, %p666; + setp.eq.f32 %p667, %f102, 0f00000000; + @%p667 bra $L__BB6_412; + bra.uni $L__BB6_409; + +$L__BB6_412: + add.f32 %f1652, %f102, %f102; + selp.f32 %f3274, %f1652, 0f00000000, %p666; + bra.uni $L__BB6_413; + +$L__BB6_409: + mov.b32 %r793, %f3272; + xor.b32 %r794, %r793, -2147483648; + mov.b32 %f1648, %r794; + selp.f32 %f3274, %f1648, %f3272, %p44; + setp.geu.f32 %p668, %f102, 0f00000000; + @%p668 bra $L__BB6_413; + + mov.f32 %f3149, 0f3FC00000; + cvt.rzi.f32.f32 %f1650, %f3149; + setp.eq.f32 %p669, %f1650, 0f3FC00000; + @%p669 bra $L__BB6_413; + + mov.f32 %f3274, 0f7FFFFFFF; + +$L__BB6_413: + abs.f32 %f3171, %f102; + add.f32 %f1653, %f3171, 0f3FC00000; + mov.b32 %r795, %f1653; + setp.lt.s32 %p671, %r795, 2139095040; + @%p671 bra $L__BB6_418; + + abs.f32 %f3185, %f102; + setp.gtu.f32 %p672, %f3185, 0f7F800000; + @%p672 bra $L__BB6_417; + bra.uni $L__BB6_415; + +$L__BB6_417: + add.f32 %f3274, %f102, 0f3FC00000; + bra.uni $L__BB6_418; + +$L__BB6_415: + abs.f32 %f3186, %f102; + setp.neu.f32 %p673, %f3186, 0f7F800000; + @%p673 bra $L__BB6_418; + + selp.f32 %f3274, 0fFF800000, 0f7F800000, %p44; + +$L__BB6_418: + mov.f32 %f3065, 0f3FC00000; + mov.f32 %f3064, 0f3102E308; + mov.f32 %f3063, 0fBF317218; + mov.f32 %f3062, 0f35BFBE8E; + mov.f32 %f3061, 0f3F317200; + mov.f32 %f3060, 0f3DAAAABD; + mov.f32 %f3059, 0f3C4CAF63; + mov.f32 %f3058, 0f3B18F0FE; + setp.eq.f32 %p674, %f102, 0f3F800000; + selp.f32 %f1655, 0f3F800000, %f3274, %p674; + div.rn.f32 %f264, %f56, %f1655; + abs.f32 %f265, %f103; + setp.lt.f32 %p675, %f265, 0f00800000; + mul.f32 %f1656, %f265, 0f4B800000; + selp.f32 %f1657, %f1656, %f265, %p675; + selp.f32 %f1658, 0fC3170000, 0fC2FE0000, %p675; + mov.b32 %r796, %f1657; + and.b32 %r797, %r796, 8388607; + or.b32 %r798, %r797, 1065353216; + mov.b32 %f1659, %r798; + shr.u32 %r799, %r796, 23; + cvt.rn.f32.u32 %f1660, %r799; + add.f32 %f1661, %f1658, %f1660; + setp.gt.f32 %p676, %f1659, 0f3FB504F3; + mul.f32 %f1662, %f1659, 0f3F000000; + add.f32 %f1663, %f1661, 0f3F800000; + selp.f32 %f1664, %f1663, %f1661, %p676; + selp.f32 %f1665, %f1662, %f1659, %p676; + add.f32 %f1666, %f1665, 0fBF800000; + add.f32 %f1667, %f1665, 0f3F800000; + rcp.approx.ftz.f32 %f1668, %f1667; + add.f32 %f1669, %f1666, %f1666; + mul.f32 %f1670, %f1669, %f1668; + mul.f32 %f1671, %f1670, %f1670; + fma.rn.f32 %f1674, %f3058, %f1671, %f3059; + fma.rn.f32 %f1676, %f1674, %f1671, %f3060; + mul.rn.f32 %f1677, %f1676, %f1671; + mul.rn.f32 %f1678, %f1677, %f1670; + sub.f32 %f1679, %f1666, %f1670; + add.f32 %f1680, %f1679, %f1679; + neg.f32 %f1681, %f1670; + fma.rn.f32 %f1682, %f1681, %f1666, %f1680; + mul.rn.f32 %f1683, %f1668, %f1682; + add.f32 %f1684, %f1678, %f1670; + sub.f32 %f1685, %f1670, %f1684; + add.f32 %f1686, %f1678, %f1685; + add.f32 %f1687, %f1683, %f1686; + add.f32 %f1688, %f1684, %f1687; + sub.f32 %f1689, %f1684, %f1688; + add.f32 %f1690, %f1687, %f1689; + mul.rn.f32 %f1692, %f1664, %f3061; + mul.rn.f32 %f1694, %f1664, %f3062; + add.f32 %f1695, %f1692, %f1688; + sub.f32 %f1696, %f1692, %f1695; + add.f32 %f1697, %f1688, %f1696; + add.f32 %f1698, %f1690, %f1697; + add.f32 %f1699, %f1694, %f1698; add.f32 %f1700, %f1695, %f1699; sub.f32 %f1701, %f1695, %f1700; add.f32 %f1702, %f1699, %f1701; - mul.rn.f32 %f1704, %f900, %f1700; + mul.rn.f32 %f1704, %f3065, %f1700; neg.f32 %f1705, %f1704; - fma.rn.f32 %f1706, %f900, %f1700, %f1705; - fma.rn.f32 %f1707, %f900, %f1702, %f1706; - fma.rn.f32 %f1709, %f3354, %f1700, %f1707; + fma.rn.f32 %f1706, %f3065, %f1700, %f1705; + fma.rn.f32 %f1707, %f3065, %f1702, %f1706; + fma.rn.f32 %f1709, %f3279, %f1700, %f1707; add.rn.f32 %f1710, %f1704, %f1709; neg.f32 %f1711, %f1710; add.rn.f32 %f1712, %f1704, %f1711; add.rn.f32 %f1713, %f1712, %f1709; - mov.b32 %r190, %f1710; - setp.eq.s32 %p165, %r190, 1118925336; - add.s32 %r191, %r190, -1; - mov.b32 %f1714, %r191; + mov.b32 %r800, %f1710; + setp.eq.s32 %p677, %r800, 1118925336; + add.s32 %r801, %r800, -1; + mov.b32 %f1714, %r801; add.f32 %f1715, %f1713, 0f37000000; - selp.f32 %f1716, %f1714, %f1710, %p165; - selp.f32 %f309, %f1715, %f1713, %p165; - mul.f32 %f1717, %f1716, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1718, %f1717; - fma.rn.f32 %f1719, %f1718, %f3143, %f1716; - fma.rn.f32 %f1720, %f1718, %f3144, %f1719; - mul.f32 %f1721, %f1720, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1722, %f1721; - add.f32 %f1723, %f1718, 0f00000000; - ex2.approx.f32 %f1724, %f1723; - mul.f32 %f1725, %f1722, %f1724; - setp.lt.f32 %p166, %f1716, 0fC2D20000; - selp.f32 %f1726, 0f00000000, %f1725, %p166; - setp.gt.f32 %p167, %f1716, 0f42D20000; - selp.f32 %f3343, 0f7F800000, %f1726, %p167; - setp.eq.f32 %p168, %f3343, 0f7F800000; - @%p168 bra BB6_93; - - fma.rn.f32 %f3343, %f3343, %f309, %f3343; - -BB6_93: - setp.eq.f32 %p374, %f251, 0f00000000; - mov.b32 %r192, %f3343; - xor.b32 %r193, %r192, -2147483648; - mov.b32 %f1727, %r193; - selp.f32 %f3345, %f1727, %f3343, %p9; - @%p374 bra BB6_96; - bra.uni BB6_94; - -BB6_96: - add.f32 %f1730, %f251, %f251; - selp.f32 %f3345, %f1730, 0f00000000, %p59; - bra.uni BB6_97; - -BB6_94: - setp.geu.f32 %p170, %f251, 0f00000000; - @%p170 bra BB6_97; - - cvt.rzi.f32.f32 %f1729, %f900; - setp.neu.f32 %p171, %f1729, 0f40000000; - selp.f32 %f3345, 0f7FFFFFFF, %f3345, %p171; - -BB6_97: - abs.f32 %f3158, %f251; - add.f32 %f3157, %f3158, 0f40000000; - mov.b32 %r302, %f3157; - setp.lt.s32 %p375, %r302, 2139095040; - @%p375 bra BB6_102; - - abs.f32 %f3236, %f251; - setp.gtu.f32 %p174, %f3236, 0f7F800000; - @%p174 bra BB6_101; - bra.uni BB6_99; - -BB6_101: - add.f32 %f3345, %f251, 0f40000000; - bra.uni BB6_102; - -BB6_99: - abs.f32 %f3237, %f251; - setp.neu.f32 %p175, %f3237, 0f7F800000; - @%p175 bra BB6_102; - - selp.f32 %f3345, 0fFF800000, 0f7F800000, %p9; - -BB6_102: - setp.eq.f32 %p376, %f251, 0f3F800000; - mov.f32 %f3163, 0f3DAAAABD; - mov.f32 %f3162, 0f3C4CAF63; - mov.f32 %f3161, 0f3B18F0FE; - mov.f32 %f3160, 0fB5BFBE8E; - mov.f32 %f3159, 0fBF317200; - mul.f32 %f1733, %f3345, 0fBF000000; - selp.f32 %f1734, 0fBF000000, %f1733, %p376; - mul.f32 %f1735, %f1734, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1736, %f1735; - fma.rn.f32 %f1738, %f1736, %f3159, %f1734; - fma.rn.f32 %f1740, %f1736, %f3160, %f1738; - mul.f32 %f1741, %f1740, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1742, %f1741; - add.f32 %f1743, %f1736, 0f00000000; - ex2.approx.f32 %f1744, %f1743; - mul.f32 %f1745, %f1742, %f1744; - setp.lt.f32 %p177, %f1734, 0fC2D20000; - selp.f32 %f1746, 0f00000000, %f1745, %p177; - setp.gt.f32 %p178, %f1734, 0f42D20000; - selp.f32 %f320, 0f7F800000, %f1746, %p178; - // inline asm - rcp.approx.ftz.f32 %f1731,%f1401; - // inline asm - mul.f32 %f1747, %f1731, %f274; - mul.f32 %f1748, %f1747, %f1747; - fma.rn.f32 %f1751, %f3161, %f1748, %f3162; - fma.rn.f32 %f1753, %f1751, %f1748, %f3163; - mul.rn.f32 %f1754, %f1753, %f1748; - mul.rn.f32 %f1755, %f1754, %f1747; - sub.f32 %f1756, %f272, %f1747; - neg.f32 %f1757, %f1747; - add.f32 %f1758, %f1756, %f1756; - fma.rn.f32 %f1759, %f1757, %f272, %f1758; - mul.rn.f32 %f1760, %f1731, %f1759; - add.f32 %f1761, %f1755, %f1747; - sub.f32 %f1762, %f1747, %f1761; - add.f32 %f1763, %f1755, %f1762; - add.f32 %f1764, %f1760, %f1763; - add.f32 %f1765, %f1761, %f1764; - sub.f32 %f1766, %f1761, %f1765; - add.f32 %f1767, %f1764, %f1766; - add.f32 %f1768, %f275, %f1765; - sub.f32 %f1769, %f275, %f1768; - add.f32 %f1770, %f1765, %f1769; - add.f32 %f1771, %f1767, %f1770; - add.f32 %f1772, %f276, %f1771; - add.f32 %f1773, %f1768, %f1772; - sub.f32 %f1774, %f1768, %f1773; - add.f32 %f1775, %f1772, %f1774; - mul.rn.f32 %f1777, %f900, %f1773; - neg.f32 %f1778, %f1777; - fma.rn.f32 %f1779, %f900, %f1773, %f1778; - fma.rn.f32 %f1780, %f900, %f1775, %f1779; - fma.rn.f32 %f1782, %f3354, %f1773, %f1780; - add.rn.f32 %f1783, %f1777, %f1782; - neg.f32 %f1784, %f1783; - add.rn.f32 %f1785, %f1777, %f1784; - add.rn.f32 %f1786, %f1785, %f1782; - mov.b32 %r194, %f1783; - setp.eq.s32 %p179, %r194, 1118925336; - add.s32 %r195, %r194, -1; - mov.b32 %f1787, %r195; - add.f32 %f1788, %f1786, 0f37000000; - selp.f32 %f1789, %f1787, %f1783, %p179; - selp.f32 %f321, %f1788, %f1786, %p179; - mul.f32 %f1790, %f1789, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1791, %f1790; - fma.rn.f32 %f1792, %f1791, %f3159, %f1789; - fma.rn.f32 %f1793, %f1791, %f3160, %f1792; - mul.f32 %f1794, %f1793, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1795, %f1794; - add.f32 %f1796, %f1791, 0f00000000; - ex2.approx.f32 %f1797, %f1796; - mul.f32 %f1798, %f1795, %f1797; - setp.lt.f32 %p180, %f1789, 0fC2D20000; - selp.f32 %f1799, 0f00000000, %f1798, %p180; - setp.gt.f32 %p181, %f1789, 0f42D20000; - selp.f32 %f3346, 0f7F800000, %f1799, %p181; - setp.eq.f32 %p182, %f3346, 0f7F800000; - @%p182 bra BB6_104; - - fma.rn.f32 %f3346, %f3346, %f321, %f3346; - -BB6_104: - setp.eq.f32 %p381, %f270, 0f00000000; - mov.b32 %r196, %f3346; - xor.b32 %r197, %r196, -2147483648; - mov.b32 %f1800, %r197; - selp.f32 %f3348, %f1800, %f3346, %p10; - @%p381 bra BB6_107; - bra.uni BB6_105; - -BB6_107: - add.f32 %f1803, %f270, %f270; - selp.f32 %f3348, %f1803, 0f00000000, %p59; - bra.uni BB6_108; - -BB6_105: - setp.geu.f32 %p184, %f270, 0f00000000; - @%p184 bra BB6_108; - - cvt.rzi.f32.f32 %f1802, %f900; - setp.neu.f32 %p185, %f1802, 0f40000000; - selp.f32 %f3348, 0f7FFFFFFF, %f3348, %p185; - -BB6_108: - abs.f32 %f3250, %f270; - add.f32 %f3249, %f3250, 0f40000000; - mov.b32 %r307, %f3249; - setp.lt.s32 %p382, %r307, 2139095040; - @%p382 bra BB6_113; - - abs.f32 %f3234, %f270; - setp.gtu.f32 %p188, %f3234, 0f7F800000; - @%p188 bra BB6_112; - bra.uni BB6_110; - -BB6_112: - add.f32 %f3348, %f270, 0f40000000; - bra.uni BB6_113; - -BB6_110: - abs.f32 %f3235, %f270; - setp.neu.f32 %p189, %f3235, 0f7F800000; - @%p189 bra BB6_113; - - selp.f32 %f3348, 0fFF800000, 0f7F800000, %p10; - -BB6_113: - setp.eq.f32 %p383, %f270, 0f3F800000; - mov.f32 %f3172, 0f3FC00000; - cvt.rn.f32.s32 %f3171, %r323; - sub.f32 %f3170, %f3171, %f3369; - add.f32 %f3169, %f3170, 0f3F800000; - mov.f32 %f3168, 0f3DAAAABD; - mov.f32 %f3167, 0f3C4CAF63; - mov.f32 %f3166, 0f3B18F0FE; - mov.f32 %f3165, 0fB5BFBE8E; - mov.f32 %f3164, 0fBF317200; - mul.f32 %f1806, %f3348, 0fBF000000; - selp.f32 %f1807, 0fBF000000, %f1806, %p383; - mul.f32 %f1808, %f1807, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1809, %f1808; - fma.rn.f32 %f1811, %f1809, %f3164, %f1807; - fma.rn.f32 %f1813, %f1809, %f3165, %f1811; - mul.f32 %f1814, %f1813, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1815, %f1814; - add.f32 %f1816, %f1809, 0f00000000; - ex2.approx.f32 %f1817, %f1816; - mul.f32 %f1818, %f1815, %f1817; - setp.lt.f32 %p191, %f1807, 0fC2D20000; - selp.f32 %f1819, 0f00000000, %f1818, %p191; - setp.gt.f32 %p192, %f1807, 0f42D20000; - selp.f32 %f1820, 0f7F800000, %f1819, %p192; - mul.f32 %f1821, %f3170, %f1820; - mul.f32 %f1822, %f3169, %f320; - sub.f32 %f1823, %f1822, %f1821; - mul.f32 %f1824, %f168, %f1823; - mul.f32 %f332, %f216, %f1824; - mul.f32 %f1825, %f169, %f332; - mul.f32 %f1826, %f3169, %f3169; - mul.f32 %f1827, %f3169, %f1826; - mul.f32 %f1828, %f3170, %f3170; - mul.f32 %f1829, %f3170, %f1828; - mul.f32 %f1830, %f320, %f1827; - mul.f32 %f1831, %f1820, %f1829; - sub.f32 %f1832, %f1830, %f1831; - mul.f32 %f1833, %f199, %f1832; - mul.f32 %f1834, %f216, %f1833; - sub.f32 %f333, %f1825, %f1834; - mul.f32 %f1835, %f332, %f171; - fma.rn.f32 %f334, %f307, %f170, %f1835; - // inline asm - rcp.approx.ftz.f32 %f1804,%f174; - // inline asm - mul.f32 %f1836, %f1804, %f175; - mul.f32 %f1837, %f1836, %f1836; - fma.rn.f32 %f1840, %f3166, %f1837, %f3167; - fma.rn.f32 %f1842, %f1840, %f1837, %f3168; - mul.rn.f32 %f1843, %f1842, %f1837; - mul.rn.f32 %f1844, %f1843, %f1836; - sub.f32 %f1845, %f173, %f1836; - neg.f32 %f1846, %f1836; - add.f32 %f1847, %f1845, %f1845; - fma.rn.f32 %f1848, %f1846, %f173, %f1847; - mul.rn.f32 %f1849, %f1804, %f1848; - add.f32 %f1850, %f1844, %f1836; - sub.f32 %f1851, %f1836, %f1850; - add.f32 %f1852, %f1844, %f1851; - add.f32 %f1853, %f1849, %f1852; - add.f32 %f1854, %f1850, %f1853; - sub.f32 %f1855, %f1850, %f1854; - add.f32 %f1856, %f1853, %f1855; - add.f32 %f1857, %f176, %f1854; - sub.f32 %f1858, %f176, %f1857; - add.f32 %f1859, %f1854, %f1858; - add.f32 %f1860, %f1856, %f1859; - add.f32 %f1861, %f177, %f1860; - add.f32 %f1862, %f1857, %f1861; - sub.f32 %f1863, %f1857, %f1862; - add.f32 %f1864, %f1861, %f1863; - mul.rn.f32 %f1866, %f3172, %f1862; - neg.f32 %f1867, %f1866; - fma.rn.f32 %f1868, %f3172, %f1862, %f1867; - fma.rn.f32 %f1869, %f3172, %f1864, %f1868; - fma.rn.f32 %f1871, %f3354, %f1862, %f1869; - add.rn.f32 %f1872, %f1866, %f1871; - neg.f32 %f1873, %f1872; - add.rn.f32 %f1874, %f1866, %f1873; - add.rn.f32 %f1875, %f1874, %f1871; - mov.b32 %r198, %f1872; - setp.eq.s32 %p193, %r198, 1118925336; - add.s32 %r199, %r198, -1; - mov.b32 %f1876, %r199; - add.f32 %f1877, %f1875, 0f37000000; - selp.f32 %f1878, %f1876, %f1872, %p193; - selp.f32 %f335, %f1877, %f1875, %p193; - mul.f32 %f1879, %f1878, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1880, %f1879; - fma.rn.f32 %f1881, %f1880, %f3164, %f1878; - fma.rn.f32 %f1882, %f1880, %f3165, %f1881; - mul.f32 %f1883, %f1882, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1884, %f1883; - add.f32 %f1885, %f1880, 0f00000000; - ex2.approx.f32 %f1886, %f1885; - mul.f32 %f1887, %f1884, %f1886; - setp.lt.f32 %p194, %f1878, 0fC2D20000; - selp.f32 %f1888, 0f00000000, %f1887, %p194; - setp.gt.f32 %p195, %f1878, 0f42D20000; - selp.f32 %f3349, 0f7F800000, %f1888, %p195; - setp.eq.f32 %p196, %f3349, 0f7F800000; - @%p196 bra BB6_115; - - fma.rn.f32 %f3349, %f3349, %f335, %f3349; - -BB6_115: - setp.geu.f32 %p377, %f111, 0f00000000; - mov.b32 %r200, %f3349; - xor.b32 %r201, %r200, -2147483648; - mov.b32 %f1889, %r201; - selp.f32 %f339, %f1889, %f3349, %p5; - setp.eq.f32 %p197, %f111, 0f00000000; - selp.f32 %f3350, %f178, %f339, %p197; - @%p377 bra BB6_117; - - mov.f32 %f3173, 0f3FC00000; - cvt.rzi.f32.f32 %f1891, %f3173; - setp.neu.f32 %p198, %f1891, 0f3FC00000; - selp.f32 %f3350, 0f7FFFFFFF, %f339, %p198; - -BB6_117: - selp.f32 %f3182, 0fFF800000, 0f7F800000, %p5; - add.f32 %f3181, %f111, 0f3FC00000; - abs.f32 %f3180, %f111; - mov.f32 %f3179, 0f3FC00000; - mov.f32 %f3178, 0f3DAAAABD; - mov.f32 %f3177, 0f3C4CAF63; - mov.f32 %f3176, 0f3B18F0FE; - mov.f32 %f3175, 0fB5BFBE8E; - mov.f32 %f3174, 0fBF317200; - add.f32 %f1894, %f3180, 0f3FC00000; - mov.b32 %r202, %f1894; - setp.gt.s32 %p199, %r202, 2139095039; - setp.gtu.f32 %p200, %f3180, 0f7F800000; - selp.f32 %f1895, %f3181, %f3350, %p200; - setp.neu.f32 %p201, %f3180, 0f7F800000; - selp.f32 %f1896, %f1895, %f3182, %p201; - selp.f32 %f1897, %f1896, %f3350, %p199; - setp.eq.f32 %p202, %f111, 0f3F800000; - selp.f32 %f1898, 0f3F800000, %f1897, %p202; - div.rn.f32 %f343, %f97, %f1898; - // inline asm - rcp.approx.ftz.f32 %f1892,%f182; - // inline asm - mul.f32 %f1899, %f1892, %f183; - mul.f32 %f1900, %f1899, %f1899; - fma.rn.f32 %f1903, %f3176, %f1900, %f3177; - fma.rn.f32 %f1905, %f1903, %f1900, %f3178; - mul.rn.f32 %f1906, %f1905, %f1900; - mul.rn.f32 %f1907, %f1906, %f1899; - sub.f32 %f1908, %f181, %f1899; - neg.f32 %f1909, %f1899; - add.f32 %f1910, %f1908, %f1908; - fma.rn.f32 %f1911, %f1909, %f181, %f1910; - mul.rn.f32 %f1912, %f1892, %f1911; - add.f32 %f1913, %f1907, %f1899; - sub.f32 %f1914, %f1899, %f1913; - add.f32 %f1915, %f1907, %f1914; - add.f32 %f1916, %f1912, %f1915; - add.f32 %f1917, %f1913, %f1916; - sub.f32 %f1918, %f1913, %f1917; - add.f32 %f1919, %f1916, %f1918; - add.f32 %f1920, %f184, %f1917; - sub.f32 %f1921, %f184, %f1920; - add.f32 %f1922, %f1917, %f1921; - add.f32 %f1923, %f1919, %f1922; - add.f32 %f1924, %f185, %f1923; - add.f32 %f1925, %f1920, %f1924; - sub.f32 %f1926, %f1920, %f1925; - add.f32 %f1927, %f1924, %f1926; - mul.rn.f32 %f1929, %f3179, %f1925; - neg.f32 %f1930, %f1929; - fma.rn.f32 %f1931, %f3179, %f1925, %f1930; - fma.rn.f32 %f1932, %f3179, %f1927, %f1931; - fma.rn.f32 %f1934, %f3354, %f1925, %f1932; - add.rn.f32 %f1935, %f1929, %f1934; - neg.f32 %f1936, %f1935; - add.rn.f32 %f1937, %f1929, %f1936; - add.rn.f32 %f1938, %f1937, %f1934; - mov.b32 %r203, %f1935; - setp.eq.s32 %p203, %r203, 1118925336; - add.s32 %r204, %r203, -1; - mov.b32 %f1939, %r204; - add.f32 %f1940, %f1938, 0f37000000; - selp.f32 %f1941, %f1939, %f1935, %p203; - selp.f32 %f344, %f1940, %f1938, %p203; - mul.f32 %f1942, %f1941, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1943, %f1942; - fma.rn.f32 %f1945, %f1943, %f3174, %f1941; - fma.rn.f32 %f1947, %f1943, %f3175, %f1945; - mul.f32 %f1948, %f1947, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1949, %f1948; - add.f32 %f1950, %f1943, 0f00000000; - ex2.approx.f32 %f1951, %f1950; - mul.f32 %f1952, %f1949, %f1951; - setp.lt.f32 %p204, %f1941, 0fC2D20000; - selp.f32 %f1953, 0f00000000, %f1952, %p204; - setp.gt.f32 %p205, %f1941, 0f42D20000; - selp.f32 %f3351, 0f7F800000, %f1953, %p205; - setp.eq.f32 %p206, %f3351, 0f7F800000; - @%p206 bra BB6_119; - - fma.rn.f32 %f3351, %f3351, %f344, %f3351; - -BB6_119: - setp.geu.f32 %p378, %f112, 0f00000000; - mov.b32 %r205, %f3351; - xor.b32 %r206, %r205, -2147483648; - mov.b32 %f1954, %r206; - selp.f32 %f348, %f1954, %f3351, %p6; - setp.eq.f32 %p207, %f112, 0f00000000; - selp.f32 %f3352, %f186, %f348, %p207; - @%p378 bra BB6_121; - - mov.f32 %f3183, 0f3FC00000; - cvt.rzi.f32.f32 %f1956, %f3183; - setp.neu.f32 %p208, %f1956, 0f3FC00000; - selp.f32 %f3352, 0f7FFFFFFF, %f348, %p208; - -BB6_121: - mul.f32 %f3192, %f116, %f116; - mul.f32 %f3191, %f115, %f115; - cvt.rn.f32.s32 %f3190, %r322; - mul.f32 %f3189, %f171, %f171; - mul.f32 %f3188, %f170, %f170; - selp.f32 %f3187, 0fFF800000, 0f7F800000, %p6; - add.f32 %f3186, %f112, 0f3FC00000; - abs.f32 %f3185, %f112; - cvt.rn.f32.s32 %f3184, %r323; - add.f32 %f1958, %f3185, 0f3FC00000; - mov.b32 %r207, %f1958; - setp.gt.s32 %p209, %r207, 2139095039; - setp.gtu.f32 %p210, %f3185, 0f7F800000; - selp.f32 %f1959, %f3186, %f3352, %p210; - setp.neu.f32 %p211, %f3185, 0f7F800000; - selp.f32 %f1960, %f1959, %f3187, %p211; - selp.f32 %f1961, %f1960, %f3352, %p209; - setp.eq.f32 %p212, %f112, 0f3F800000; - selp.f32 %f1962, 0f3F800000, %f1961, %p212; - div.rn.f32 %f1963, %f98, %f1962; - fma.rn.f32 %f1964, %f343, %f3191, %f188; - fma.rn.f32 %f1965, %f1963, %f3192, %f189; - mul.f32 %f1966, %f307, %f1964; - fma.rn.f32 %f1967, %f308, %f3188, %f1966; - fma.rn.f32 %f1968, %f333, %f3189, %f1967; - fma.rn.f32 %f352, %f332, %f1965, %f1968; - mul.f32 %f1969, %f216, %f3368; - fma.rn.f32 %f353, %f230, %f1969, %f3277; - mad.lo.s32 %r208, %r323, %r63, %r322; - add.s32 %r209, %r208, %r4; - mul.wide.s32 %rd80, %r209, 4; - add.s64 %rd81, %rd1, %rd80; - ld.global.f32 %f354, [%rd81]; - add.f32 %f1970, %f3184, %f192; - fma.rn.f32 %f1971, %f104, %f1970, %f193; - add.f32 %f1972, %f3190, %f1971; - cvt.rzi.s32.f32 %r210, %f1972; - mul.wide.s32 %rd83, %r210, 4; - add.s64 %rd84, %rd82, %rd83; - ld.global.f32 %f3365, [%rd84]; - mul.f32 %f356, %f216, %f230; - setp.leu.f32 %p213, %f353, 0f3C23D70A; - mov.f32 %f3353, %f3354; - @%p213 bra BB6_123; - - sub.f32 %f1973, %f354, %f353; - add.f32 %f1974, %f353, %f3365; - div.rn.f32 %f3353, %f1973, %f1974; - -BB6_123: - @%p213 bra BB6_125; - - add.f32 %f1976, %f353, %f3365; - mul.f32 %f1977, %f1976, %f1976; - add.f32 %f1978, %f354, %f3365; - div.rn.f32 %f3354, %f1978, %f1977; - -BB6_125: - mov.f32 %f1979, 0f47C35000; - min.f32 %f1980, %f3353, %f1979; - fma.rn.f32 %f3319, %f1980, %f248, %f3319; - mul.f32 %f1981, %f1980, %f249; - mul.f32 %f1982, %f248, %f248; - min.f32 %f1983, %f3354, %f1979; - mul.f32 %f1984, %f1983, %f1982; - sub.f32 %f1985, %f1981, %f1984; - add.f32 %f3324, %f1985, %f3324; - fma.rn.f32 %f3318, %f1980, %f288, %f3318; - mul.f32 %f1986, %f1980, %f289; - mul.f32 %f1987, %f288, %f288; - mul.f32 %f1988, %f1983, %f1987; - sub.f32 %f1989, %f1986, %f1988; - add.f32 %f3323, %f1989, %f3323; - fma.rn.f32 %f3317, %f1980, %f356, %f3317; - mul.f32 %f1990, %f1980, 0f00000000; - mul.f32 %f1991, %f356, %f356; - mul.f32 %f1992, %f1983, %f1991; - sub.f32 %f1993, %f1990, %f1992; - add.f32 %f3322, %f1993, %f3322; - add.f32 %f3316, %f3316, %f1980; - sub.f32 %f1994, %f1990, %f1983; - add.f32 %f3321, %f1994, %f3321; - fma.rn.f32 %f3315, %f1980, %f334, %f3315; - mul.f32 %f1995, %f1980, %f352; - mul.f32 %f1996, %f334, %f334; - mul.f32 %f1997, %f1983, %f1996; - sub.f32 %f1998, %f1995, %f1997; - add.f32 %f3320, %f1998, %f3320; - add.s32 %r323, %r323, 1; - setp.lt.s32 %p215, %r323, %r63; - @%p215 bra BB6_41; - - add.s32 %r322, %r322, 1; - setp.lt.s32 %p216, %r322, %r63; - @%p216 bra BB6_40; - -BB6_127: - div.rn.f32 %f1999, %f3319, %f3324; - mov.f32 %f2000, 0fBF800000; - max.f32 %f2001, %f1999, %f2000; - mov.f32 %f2002, 0f3F800000; - min.f32 %f2003, %f2001, %f2002; - sub.f32 %f3370, %f3370, %f2003; - div.rn.f32 %f2004, %f3318, %f3323; - max.f32 %f2005, %f2004, %f2000; - min.f32 %f2006, %f2005, %f2002; - sub.f32 %f3369, %f3369, %f2006; - neg.f32 %f2007, %f3368; - div.rn.f32 %f2008, %f3317, %f3322; - max.f32 %f2009, %f2008, %f2007; - min.f32 %f2010, %f2009, %f3368; - sub.f32 %f2011, %f3368, %f2010; - neg.f32 %f2012, %f3277; - div.rn.f32 %f2013, %f3316, %f3321; - max.f32 %f2014, %f2013, %f2012; - min.f32 %f2015, %f2014, %f3277; - sub.f32 %f2016, %f3277, %f2015; - div.rn.f32 %f2017, %f3315, %f3320; - mov.f32 %f2018, 0fBDCCCCCD; - max.f32 %f2019, %f2017, %f2018; - mov.f32 %f2020, 0f3DCCCCCD; - min.f32 %f2021, %f2019, %f2020; - sub.f32 %f3366, %f3366, %f2021; - max.f32 %f3368, %f2011, %f2002; - mov.f32 %f2022, 0f3C23D70A; - max.f32 %f3277, %f2016, %f2022; - add.s32 %r321, %r321, 1; - setp.lt.s32 %p217, %r321, %r65; - @%p217 bra BB6_38; - -BB6_128: - mov.f32 %f3418, 0f00000000; - mov.f32 %f3388, %f3418; - @%p19 bra BB6_214; - - mul.f32 %f393, %f656, 0f3F000000; - mul.f32 %f394, %f663, 0f3F000000; - mul.f32 %f2025, %f662, %f662; - mul.f32 %f2026, %f2025, %f662; - mul.f32 %f2027, %f2025, %f2025; - sub.f32 %f2028, %f3366, %f661; - div.rn.f32 %f2029, %f2028, %f662; - mul.f32 %f2030, %f2029, %f2029; - mul.f32 %f2031, %f2029, %f2030; - add.f32 %f2032, %f2030, 0f3F800000; - fma.rn.f32 %f2033, %f2031, %f657, %f2032; - mul.f32 %f2034, %f2030, %f2030; - fma.rn.f32 %f395, %f2034, %f659, %f2033; - add.f32 %f2035, %f3366, %f661; - div.rn.f32 %f2036, %f2035, %f662; - mul.f32 %f2037, %f2036, %f2036; - mul.f32 %f2038, %f2036, %f2037; - add.f32 %f2039, %f2037, 0f3F800000; - fma.rn.f32 %f2040, %f2038, %f658, %f2039; - mul.f32 %f2041, %f2037, %f2037; - fma.rn.f32 %f396, %f2041, %f660, %f2040; - div.rn.f32 %f397, %f3368, 0fC0206C98; - add.f32 %f2042, %f2028, %f2028; - div.rn.f32 %f2043, %f2042, %f2025; - mul.f32 %f2044, %f2028, %f2028; - mul.f32 %f2045, %f657, 0f40400000; - mul.f32 %f2046, %f2045, %f2044; - div.rn.f32 %f2047, %f2046, %f2026; - add.f32 %f2048, %f2043, %f2047; - mul.f32 %f2049, %f2028, %f2044; - mul.f32 %f2050, %f659, 0f40800000; - mul.f32 %f2051, %f2050, %f2049; - div.rn.f32 %f2052, %f2051, %f2027; - add.f32 %f398, %f2048, %f2052; - add.f32 %f2053, %f2035, %f2035; - div.rn.f32 %f2054, %f2053, %f2025; - mul.f32 %f2055, %f2035, %f2035; - mul.f32 %f2056, %f658, 0f40400000; - mul.f32 %f2057, %f2056, %f2055; - div.rn.f32 %f2058, %f2057, %f2026; - add.f32 %f2059, %f2054, %f2058; - mul.f32 %f2060, %f2035, %f2055; - mul.f32 %f2061, %f660, 0f40800000; - mul.f32 %f2062, %f2061, %f2060; - div.rn.f32 %f2063, %f2062, %f2027; - add.f32 %f399, %f2059, %f2063; - mov.u32 %r211, 0; - mov.f32 %f3388, 0f00000000; - sqrt.rn.f32 %f2064, %f395; - sqrt.rn.f32 %f2066, %f396; - div.rn.f32 %f2107, %f393, %f2064; - div.rn.f32 %f2108, %f394, %f2066; - mul.f32 %f436, %f2107, %f398; - mul.f32 %f437, %f2108, %f399; - mov.u32 %r324, %r211; - -BB6_130: - mul.f32 %f2065, %f2064, %f656; - mul.f32 %f401, %f2066, %f663; - div.rn.f32 %f2068, %f697, %f2065; - div.rn.f32 %f2069, %f2068, %f2065; - cvt.rn.f32.s32 %f2070, %r324; - sub.f32 %f402, %f2070, %f3370; - add.f32 %f403, %f402, 0f3F800000; - sqrt.rn.f32 %f2071, %f2069; - mul.f32 %f404, %f403, %f2071; - abs.f32 %f405, %f404; - mul.f32 %f406, %f404, %f404; - mul.f32 %f407, %f402, %f2071; - abs.f32 %f408, %f407; - mul.f32 %f409, %f407, %f407; - div.rn.f32 %f2072, %f697, %f401; - div.rn.f32 %f2073, %f2072, %f401; - sqrt.rn.f32 %f410, %f2073; - add.f32 %f2074, %f2070, 0f3F800000; - sub.f32 %f2075, %f2074, %f3370; - div.rn.f32 %f411, %f2075, %f2065; - mov.f32 %f2076, 0f3F800000; - cvt.rzi.f32.f32 %f2077, %f2076; - add.f32 %f2078, %f2077, %f2077; - mov.f32 %f2079, 0f40000000; - sub.f32 %f2080, %f2079, %f2078; - abs.f32 %f412, %f2080; - setp.eq.f32 %p219, %f412, 0f3F800000; - abs.f32 %f413, %f411; - setp.lt.f32 %p220, %f413, 0f00800000; - mul.f32 %f2081, %f413, 0f4B800000; - selp.f32 %f2082, 0fC3170000, 0fC2FE0000, %p220; - selp.f32 %f2083, %f2081, %f413, %p220; - mov.b32 %r213, %f2083; - and.b32 %r214, %r213, 8388607; - or.b32 %r215, %r214, 1065353216; - mov.b32 %f2084, %r215; - shr.u32 %r216, %r213, 23; - cvt.rn.f32.u32 %f2085, %r216; - add.f32 %f2086, %f2082, %f2085; - setp.gt.f32 %p221, %f2084, 0f3FB504F3; - mul.f32 %f2087, %f2084, 0f3F000000; - add.f32 %f2088, %f2086, 0f3F800000; - selp.f32 %f2089, %f2087, %f2084, %p221; - selp.f32 %f2090, %f2088, %f2086, %p221; - add.f32 %f414, %f2089, 0fBF800000; - add.f32 %f415, %f2089, 0f3F800000; - add.f32 %f416, %f414, %f414; - mov.f32 %f2091, 0f3F317200; - mul.rn.f32 %f417, %f2090, %f2091; - mov.f32 %f2092, 0f35BFBE8E; - mul.rn.f32 %f418, %f2090, %f2092; - setp.lt.f32 %p222, %f411, 0f00000000; - and.pred %p11, %p222, %p219; - add.f32 %f2093, %f411, %f411; - selp.f32 %f419, %f2093, 0f00000000, %p219; - add.f32 %f2094, %f413, 0f40000000; - mov.b32 %r42, %f2094; - add.f32 %f420, %f411, 0f40000000; - selp.f32 %f421, 0fFF800000, 0f7F800000, %p11; - div.rn.f32 %f422, %f402, %f2065; - abs.f32 %f423, %f422; - setp.lt.f32 %p223, %f423, 0f00800000; - mul.f32 %f2095, %f423, 0f4B800000; - selp.f32 %f2096, 0fC3170000, 0fC2FE0000, %p223; - selp.f32 %f2097, %f2095, %f423, %p223; - mov.b32 %r217, %f2097; - and.b32 %r218, %r217, 8388607; - or.b32 %r219, %r218, 1065353216; - mov.b32 %f2098, %r219; - shr.u32 %r220, %r217, 23; - cvt.rn.f32.u32 %f2099, %r220; - add.f32 %f2100, %f2096, %f2099; - setp.gt.f32 %p224, %f2098, 0f3FB504F3; - mul.f32 %f2101, %f2098, 0f3F000000; - add.f32 %f2102, %f2100, 0f3F800000; - selp.f32 %f2103, %f2101, %f2098, %p224; - selp.f32 %f2104, %f2102, %f2100, %p224; - add.f32 %f424, %f2103, 0fBF800000; - add.f32 %f425, %f2103, 0f3F800000; - add.f32 %f426, %f424, %f424; - mul.rn.f32 %f427, %f2104, %f2091; - mul.rn.f32 %f428, %f2104, %f2092; - setp.lt.f32 %p225, %f422, 0f00000000; - and.pred %p12, %p225, %p219; - add.f32 %f2105, %f422, %f422; - selp.f32 %f429, %f2105, 0f00000000, %p219; - add.f32 %f2106, %f423, 0f40000000; - mov.b32 %r43, %f2106; - add.f32 %f430, %f422, 0f40000000; - selp.f32 %f431, 0fFF800000, 0f7F800000, %p12; - div.rn.f32 %f432, %f397, %f2065; - div.rn.f32 %f433, %f397, %f401; - div.rn.f32 %f434, %f432, %f2065; - div.rn.f32 %f435, %f433, %f401; - mov.b32 %r221, %f407; - and.b32 %r44, %r221, -2147483648; - mov.b32 %r222, %f404; - and.b32 %r45, %r222, -2147483648; - setp.geu.f32 %p13, %f411, 0f00000000; - setp.geu.f32 %p14, %f422, 0f00000000; - ld.local.f32 %f3387, [%rd2]; - ld.local.f32 %f3386, [%rd2+4]; - ld.local.f32 %f3385, [%rd2+8]; - ld.local.f32 %f3384, [%rd2+12]; - ld.local.f32 %f3383, [%rd2+16]; - ld.local.f32 %f3382, [%rd2+24]; - ld.local.f32 %f3381, [%rd2+28]; - ld.local.f32 %f3380, [%rd2+32]; - ld.local.f32 %f3379, [%rd2+36]; - ld.local.f32 %f3378, [%rd2+48]; - ld.local.f32 %f3377, [%rd2+52]; - ld.local.f32 %f3376, [%rd2+56]; - ld.local.f32 %f3375, [%rd2+72]; - ld.local.f32 %f3374, [%rd2+76]; - ld.local.f32 %f3373, [%rd2+96]; - mov.u32 %r325, %r211; - -BB6_131: - setp.ltu.f32 %p226, %f405, 0f3F800000; - @%p226 bra BB6_133; - bra.uni BB6_132; - -BB6_133: - mov.f32 %f2127, 0f3BA0C9F8; - mov.f32 %f2128, 0fBA1268FB; - fma.rn.f32 %f2129, %f2128, %f406, %f2127; - mov.f32 %f2130, 0fBCDABFD4; - fma.rn.f32 %f2131, %f2129, %f406, %f2130; - mov.f32 %f2132, 0f3DE70331; - fma.rn.f32 %f2133, %f2131, %f406, %f2132; - mov.f32 %f2134, 0fBEC09330; - fma.rn.f32 %f2135, %f2133, %f406, %f2134; - mov.f32 %f2136, 0f3F906EBA; - fma.rn.f32 %f2137, %f2135, %f406, %f2136; - mul.f32 %f3389, %f404, %f2137; - bra.uni BB6_134; - -BB6_132: - setp.ltu.f32 %p227, %f405, 0f407AD445; - mov.f32 %f2109, 0f3A03BB71; - mov.f32 %f2110, 0fB7B730FB; - fma.rn.f32 %f2111, %f2110, %f405, %f2109; - mov.f32 %f2112, 0fBBACA3B3; - fma.rn.f32 %f2113, %f2111, %f405, %f2112; - mov.f32 %f2114, 0f3D0A7445; - fma.rn.f32 %f2115, %f2113, %f405, %f2114; - mov.f32 %f2116, 0fBE1B3B75; - fma.rn.f32 %f2117, %f2115, %f405, %f2116; - mov.f32 %f2118, 0fBF6B385A; - fma.rn.f32 %f2119, %f2117, %f405, %f2118; - mov.f32 %f2120, 0fBFD0316E; - fma.rn.f32 %f2121, %f2119, %f405, %f2120; - mov.f32 %f2122, 0fBA031CCE; - fma.rn.f32 %f2123, %f2121, %f405, %f2122; - ex2.approx.ftz.f32 %f2124, %f2123; - sub.f32 %f2126, %f2076, %f2124; - mov.b32 %r223, %f2126; - selp.b32 %r224, %r223, 1065353216, %p227; - or.b32 %r225, %r224, %r45; - mov.b32 %f3389, %r225; - -BB6_134: - setp.ltu.f32 %p228, %f408, 0f3F800000; - @%p228 bra BB6_136; - bra.uni BB6_135; - -BB6_136: - mov.f32 %f2156, 0f3BA0C9F8; - mov.f32 %f2157, 0fBA1268FB; - fma.rn.f32 %f2158, %f2157, %f409, %f2156; - mov.f32 %f2159, 0fBCDABFD4; - fma.rn.f32 %f2160, %f2158, %f409, %f2159; - mov.f32 %f2161, 0f3DE70331; - fma.rn.f32 %f2162, %f2160, %f409, %f2161; - mov.f32 %f2163, 0fBEC09330; - fma.rn.f32 %f2164, %f2162, %f409, %f2163; - mov.f32 %f2165, 0f3F906EBA; - fma.rn.f32 %f2166, %f2164, %f409, %f2165; - mul.f32 %f3390, %f407, %f2166; - bra.uni BB6_137; - -BB6_135: - setp.ltu.f32 %p229, %f408, 0f407AD445; - mov.f32 %f2138, 0f3A03BB71; - mov.f32 %f2139, 0fB7B730FB; - fma.rn.f32 %f2140, %f2139, %f408, %f2138; - mov.f32 %f2141, 0fBBACA3B3; - fma.rn.f32 %f2142, %f2140, %f408, %f2141; - mov.f32 %f2143, 0f3D0A7445; - fma.rn.f32 %f2144, %f2142, %f408, %f2143; - mov.f32 %f2145, 0fBE1B3B75; - fma.rn.f32 %f2146, %f2144, %f408, %f2145; - mov.f32 %f2147, 0fBF6B385A; - fma.rn.f32 %f2148, %f2146, %f408, %f2147; - mov.f32 %f2149, 0fBFD0316E; - fma.rn.f32 %f2150, %f2148, %f408, %f2149; - mov.f32 %f2151, 0fBA031CCE; - fma.rn.f32 %f2152, %f2150, %f408, %f2151; - ex2.approx.ftz.f32 %f2153, %f2152; - sub.f32 %f2155, %f2076, %f2153; - mov.b32 %r226, %f2155; - selp.b32 %r227, %r226, 1065353216, %p229; - or.b32 %r228, %r227, %r44; - mov.b32 %f3390, %r228; - -BB6_137: - sub.f32 %f2167, %f3389, %f3390; - mul.f32 %f475, %f2167, 0f3F000000; - cvt.rn.f32.s32 %f476, %r325; - sub.f32 %f477, %f476, %f3369; - add.f32 %f478, %f477, 0f3F800000; - mul.f32 %f479, %f478, %f410; - abs.f32 %f480, %f479; - setp.ltu.f32 %p230, %f480, 0f3F800000; - @%p230 bra BB6_139; - bra.uni BB6_138; - -BB6_139: - mul.f32 %f2186, %f479, %f479; - mov.f32 %f2187, 0f3BA0C9F8; - mov.f32 %f2188, 0fBA1268FB; - fma.rn.f32 %f2189, %f2188, %f2186, %f2187; - mov.f32 %f2190, 0fBCDABFD4; - fma.rn.f32 %f2191, %f2189, %f2186, %f2190; - mov.f32 %f2192, 0f3DE70331; - fma.rn.f32 %f2193, %f2191, %f2186, %f2192; - mov.f32 %f2194, 0fBEC09330; - fma.rn.f32 %f2195, %f2193, %f2186, %f2194; - mov.f32 %f2196, 0f3F906EBA; - fma.rn.f32 %f2197, %f2195, %f2186, %f2196; - mul.f32 %f3391, %f479, %f2197; - bra.uni BB6_140; - -BB6_138: - mov.f32 %f2168, 0f3A03BB71; - mov.f32 %f2169, 0fB7B730FB; - fma.rn.f32 %f2170, %f2169, %f480, %f2168; - mov.f32 %f2171, 0fBBACA3B3; - fma.rn.f32 %f2172, %f2170, %f480, %f2171; - mov.f32 %f2173, 0f3D0A7445; - fma.rn.f32 %f2174, %f2172, %f480, %f2173; - mov.f32 %f2175, 0fBE1B3B75; - fma.rn.f32 %f2176, %f2174, %f480, %f2175; - mov.f32 %f2177, 0fBF6B385A; - fma.rn.f32 %f2178, %f2176, %f480, %f2177; - mov.f32 %f2179, 0fBFD0316E; - fma.rn.f32 %f2180, %f2178, %f480, %f2179; - mov.f32 %f2181, 0fBA031CCE; - fma.rn.f32 %f2182, %f2180, %f480, %f2181; - ex2.approx.ftz.f32 %f2183, %f2182; - sub.f32 %f2185, %f2076, %f2183; - mov.b32 %r229, %f2185; - setp.ltu.f32 %p231, %f480, 0f407AD445; - selp.b32 %r230, %r229, 1065353216, %p231; - mov.b32 %r231, %f479; - and.b32 %r232, %r231, -2147483648; - or.b32 %r233, %r230, %r232; - mov.b32 %f3391, %r233; - -BB6_140: - mul.f32 %f484, %f477, %f410; - abs.f32 %f485, %f484; - setp.ltu.f32 %p232, %f485, 0f3F800000; - @%p232 bra BB6_142; - bra.uni BB6_141; - -BB6_142: - mul.f32 %f2216, %f484, %f484; - mov.f32 %f2217, 0f3BA0C9F8; - mov.f32 %f2218, 0fBA1268FB; - fma.rn.f32 %f2219, %f2218, %f2216, %f2217; - mov.f32 %f2220, 0fBCDABFD4; - fma.rn.f32 %f2221, %f2219, %f2216, %f2220; - mov.f32 %f2222, 0f3DE70331; - fma.rn.f32 %f2223, %f2221, %f2216, %f2222; - mov.f32 %f2224, 0fBEC09330; - fma.rn.f32 %f2225, %f2223, %f2216, %f2224; - mov.f32 %f2226, 0f3F906EBA; - fma.rn.f32 %f2227, %f2225, %f2216, %f2226; - mul.f32 %f3392, %f484, %f2227; - bra.uni BB6_143; - -BB6_141: - mov.f32 %f2198, 0f3A03BB71; - mov.f32 %f2199, 0fB7B730FB; - fma.rn.f32 %f2200, %f2199, %f485, %f2198; - mov.f32 %f2201, 0fBBACA3B3; - fma.rn.f32 %f2202, %f2200, %f485, %f2201; - mov.f32 %f2203, 0f3D0A7445; - fma.rn.f32 %f2204, %f2202, %f485, %f2203; - mov.f32 %f2205, 0fBE1B3B75; - fma.rn.f32 %f2206, %f2204, %f485, %f2205; - mov.f32 %f2207, 0fBF6B385A; - fma.rn.f32 %f2208, %f2206, %f485, %f2207; - mov.f32 %f2209, 0fBFD0316E; - fma.rn.f32 %f2210, %f2208, %f485, %f2209; - mov.f32 %f2211, 0fBA031CCE; - fma.rn.f32 %f2212, %f2210, %f485, %f2211; - ex2.approx.ftz.f32 %f2213, %f2212; - sub.f32 %f2215, %f2076, %f2213; - mov.b32 %r234, %f2215; - setp.ltu.f32 %p233, %f485, 0f407AD445; - selp.b32 %r235, %r234, 1065353216, %p233; - mov.b32 %r236, %f484; - and.b32 %r237, %r236, -2147483648; - or.b32 %r238, %r235, %r237; - mov.b32 %f3392, %r238; - -BB6_143: - sub.f32 %f2230, %f3391, %f3392; - mul.f32 %f489, %f2230, 0f3F000000; - // inline asm - rcp.approx.ftz.f32 %f2228,%f415; - // inline asm - mul.f32 %f2231, %f2228, %f416; - mul.f32 %f2232, %f2231, %f2231; - mov.f32 %f2233, 0f3C4CAF63; - mov.f32 %f2234, 0f3B18F0FE; - fma.rn.f32 %f2235, %f2234, %f2232, %f2233; - mov.f32 %f2236, 0f3DAAAABD; - fma.rn.f32 %f2237, %f2235, %f2232, %f2236; - mul.rn.f32 %f2238, %f2237, %f2232; - mul.rn.f32 %f2239, %f2238, %f2231; - sub.f32 %f2240, %f414, %f2231; - neg.f32 %f2241, %f2231; - add.f32 %f2242, %f2240, %f2240; - fma.rn.f32 %f2243, %f2241, %f414, %f2242; - mul.rn.f32 %f2244, %f2228, %f2243; - add.f32 %f2245, %f2239, %f2231; - sub.f32 %f2246, %f2231, %f2245; - add.f32 %f2247, %f2239, %f2246; - add.f32 %f2248, %f2244, %f2247; - add.f32 %f2249, %f2245, %f2248; - sub.f32 %f2250, %f2245, %f2249; - add.f32 %f2251, %f2248, %f2250; - add.f32 %f2252, %f417, %f2249; - sub.f32 %f2253, %f417, %f2252; - add.f32 %f2254, %f2249, %f2253; - add.f32 %f2255, %f2251, %f2254; - add.f32 %f2256, %f418, %f2255; - add.f32 %f2257, %f2252, %f2256; - sub.f32 %f2258, %f2252, %f2257; - add.f32 %f2259, %f2256, %f2258; - mul.rn.f32 %f2261, %f2079, %f2257; - neg.f32 %f2262, %f2261; - fma.rn.f32 %f2263, %f2079, %f2257, %f2262; - fma.rn.f32 %f2264, %f2079, %f2259, %f2263; - mov.f32 %f2265, 0f00000000; - fma.rn.f32 %f2266, %f2265, %f2257, %f2264; - add.rn.f32 %f2267, %f2261, %f2266; - neg.f32 %f2268, %f2267; - add.rn.f32 %f2269, %f2261, %f2268; - add.rn.f32 %f2270, %f2269, %f2266; - mov.b32 %r239, %f2267; - setp.eq.s32 %p234, %r239, 1118925336; - add.s32 %r240, %r239, -1; - mov.b32 %f2271, %r240; - add.f32 %f2272, %f2270, 0f37000000; - selp.f32 %f2273, %f2271, %f2267, %p234; - selp.f32 %f490, %f2272, %f2270, %p234; - mul.f32 %f2274, %f2273, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2275, %f2274; - mov.f32 %f2276, 0fBF317200; - fma.rn.f32 %f2277, %f2275, %f2276, %f2273; - mov.f32 %f2278, 0fB5BFBE8E; - fma.rn.f32 %f2279, %f2275, %f2278, %f2277; - mul.f32 %f2280, %f2279, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2281, %f2280; - add.f32 %f2282, %f2275, 0f00000000; - ex2.approx.f32 %f2283, %f2282; - mul.f32 %f2284, %f2281, %f2283; - setp.lt.f32 %p235, %f2273, 0fC2D20000; - selp.f32 %f2285, 0f00000000, %f2284, %p235; - setp.gt.f32 %p236, %f2273, 0f42D20000; - selp.f32 %f3393, 0f7F800000, %f2285, %p236; - setp.eq.f32 %p237, %f3393, 0f7F800000; - @%p237 bra BB6_145; - - fma.rn.f32 %f3393, %f3393, %f490, %f3393; - -BB6_145: - mov.b32 %r241, %f3393; - xor.b32 %r242, %r241, -2147483648; - mov.b32 %f2286, %r242; - selp.f32 %f494, %f2286, %f3393, %p11; - setp.eq.f32 %p238, %f411, 0f00000000; - selp.f32 %f3394, %f419, %f494, %p238; - @%p13 bra BB6_147; - - cvt.rzi.f32.f32 %f2288, %f2079; - setp.neu.f32 %p239, %f2288, 0f40000000; - selp.f32 %f3394, 0f7FFFFFFF, %f494, %p239; - -BB6_147: - setp.gtu.f32 %p240, %f413, 0f7F800000; - selp.f32 %f2291, %f420, %f3394, %p240; - setp.neu.f32 %p241, %f413, 0f7F800000; - selp.f32 %f2292, %f2291, %f421, %p241; - setp.gt.s32 %p242, %r42, 2139095039; - selp.f32 %f2293, %f2292, %f3394, %p242; - mul.f32 %f2294, %f2293, 0fBF000000; - setp.eq.f32 %p243, %f411, 0f3F800000; - selp.f32 %f2295, 0fBF000000, %f2294, %p243; - mul.f32 %f2296, %f2295, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2297, %f2296; - fma.rn.f32 %f2299, %f2297, %f2276, %f2295; - fma.rn.f32 %f2301, %f2297, %f2278, %f2299; - mul.f32 %f2302, %f2301, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2303, %f2302; - add.f32 %f2304, %f2297, 0f00000000; - ex2.approx.f32 %f2305, %f2304; - mul.f32 %f2306, %f2303, %f2305; - setp.lt.f32 %p244, %f2295, 0fC2D20000; - selp.f32 %f2307, 0f00000000, %f2306, %p244; - setp.gt.f32 %p245, %f2295, 0f42D20000; - selp.f32 %f498, 0f7F800000, %f2307, %p245; - // inline asm - rcp.approx.ftz.f32 %f2289,%f425; - // inline asm - mul.f32 %f2308, %f2289, %f426; - mul.f32 %f2309, %f2308, %f2308; - fma.rn.f32 %f2312, %f2234, %f2309, %f2233; - fma.rn.f32 %f2314, %f2312, %f2309, %f2236; - mul.rn.f32 %f2315, %f2314, %f2309; - mul.rn.f32 %f2316, %f2315, %f2308; - sub.f32 %f2317, %f424, %f2308; - neg.f32 %f2318, %f2308; - add.f32 %f2319, %f2317, %f2317; - fma.rn.f32 %f2320, %f2318, %f424, %f2319; - mul.rn.f32 %f2321, %f2289, %f2320; - add.f32 %f2322, %f2316, %f2308; - sub.f32 %f2323, %f2308, %f2322; - add.f32 %f2324, %f2316, %f2323; - add.f32 %f2325, %f2321, %f2324; - add.f32 %f2326, %f2322, %f2325; - sub.f32 %f2327, %f2322, %f2326; - add.f32 %f2328, %f2325, %f2327; - add.f32 %f2329, %f427, %f2326; - sub.f32 %f2330, %f427, %f2329; - add.f32 %f2331, %f2326, %f2330; - add.f32 %f2332, %f2328, %f2331; - add.f32 %f2333, %f428, %f2332; - add.f32 %f2334, %f2329, %f2333; - sub.f32 %f2335, %f2329, %f2334; - add.f32 %f2336, %f2333, %f2335; - mul.rn.f32 %f2338, %f2079, %f2334; - neg.f32 %f2339, %f2338; - fma.rn.f32 %f2340, %f2079, %f2334, %f2339; - fma.rn.f32 %f2341, %f2079, %f2336, %f2340; - fma.rn.f32 %f2343, %f2265, %f2334, %f2341; - add.rn.f32 %f2344, %f2338, %f2343; - neg.f32 %f2345, %f2344; - add.rn.f32 %f2346, %f2338, %f2345; - add.rn.f32 %f2347, %f2346, %f2343; - mov.b32 %r243, %f2344; - setp.eq.s32 %p246, %r243, 1118925336; - add.s32 %r244, %r243, -1; - mov.b32 %f2348, %r244; - add.f32 %f2349, %f2347, 0f37000000; - selp.f32 %f2350, %f2348, %f2344, %p246; - selp.f32 %f499, %f2349, %f2347, %p246; - mul.f32 %f2351, %f2350, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2352, %f2351; - fma.rn.f32 %f2353, %f2352, %f2276, %f2350; - fma.rn.f32 %f2354, %f2352, %f2278, %f2353; - mul.f32 %f2355, %f2354, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2356, %f2355; - add.f32 %f2357, %f2352, 0f00000000; - ex2.approx.f32 %f2358, %f2357; - mul.f32 %f2359, %f2356, %f2358; - setp.lt.f32 %p247, %f2350, 0fC2D20000; - selp.f32 %f2360, 0f00000000, %f2359, %p247; - setp.gt.f32 %p248, %f2350, 0f42D20000; - selp.f32 %f3395, 0f7F800000, %f2360, %p248; - setp.eq.f32 %p249, %f3395, 0f7F800000; - @%p249 bra BB6_149; - - fma.rn.f32 %f3395, %f3395, %f499, %f3395; - -BB6_149: - mov.b32 %r245, %f3395; - xor.b32 %r246, %r245, -2147483648; - mov.b32 %f2361, %r246; - selp.f32 %f503, %f2361, %f3395, %p12; - setp.eq.f32 %p250, %f422, 0f00000000; - selp.f32 %f3396, %f429, %f503, %p250; - @%p14 bra BB6_151; - - cvt.rzi.f32.f32 %f2363, %f2079; - setp.neu.f32 %p251, %f2363, 0f40000000; - selp.f32 %f3396, 0f7FFFFFFF, %f503, %p251; - -BB6_151: - setp.gtu.f32 %p252, %f423, 0f7F800000; - selp.f32 %f2366, %f430, %f3396, %p252; - setp.neu.f32 %p253, %f423, 0f7F800000; - selp.f32 %f2367, %f2366, %f431, %p253; - setp.gt.s32 %p254, %r43, 2139095039; - selp.f32 %f2368, %f2367, %f3396, %p254; - mul.f32 %f2369, %f2368, 0fBF000000; - setp.eq.f32 %p255, %f422, 0f3F800000; - selp.f32 %f2370, 0fBF000000, %f2369, %p255; - mul.f32 %f2371, %f2370, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2372, %f2371; - fma.rn.f32 %f2374, %f2372, %f2276, %f2370; - fma.rn.f32 %f2376, %f2372, %f2278, %f2374; - mul.f32 %f2377, %f2376, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2378, %f2377; - add.f32 %f2379, %f2372, 0f00000000; - ex2.approx.f32 %f2380, %f2379; - mul.f32 %f2381, %f2378, %f2380; - setp.lt.f32 %p256, %f2370, 0fC2D20000; - selp.f32 %f2382, 0f00000000, %f2381, %p256; - setp.gt.f32 %p257, %f2370, 0f42D20000; - selp.f32 %f2383, 0f7F800000, %f2382, %p257; - sub.f32 %f2384, %f498, %f2383; - mul.f32 %f2385, %f432, %f2384; - mul.f32 %f507, %f489, %f2385; - add.f32 %f2386, %f476, 0f3F800000; - sub.f32 %f2387, %f2386, %f3369; - div.rn.f32 %f508, %f2387, %f401; - abs.f32 %f509, %f508; - setp.lt.f32 %p258, %f509, 0f00800000; - mul.f32 %f2388, %f509, 0f4B800000; - selp.f32 %f2389, 0fC3170000, 0fC2FE0000, %p258; - selp.f32 %f2390, %f2388, %f509, %p258; - mov.b32 %r247, %f2390; - and.b32 %r248, %r247, 8388607; - or.b32 %r249, %r248, 1065353216; - mov.b32 %f2391, %r249; - shr.u32 %r250, %r247, 23; - cvt.rn.f32.u32 %f2392, %r250; - add.f32 %f2393, %f2389, %f2392; - setp.gt.f32 %p259, %f2391, 0f3FB504F3; - mul.f32 %f2394, %f2391, 0f3F000000; - add.f32 %f2395, %f2393, 0f3F800000; - selp.f32 %f2396, %f2394, %f2391, %p259; - selp.f32 %f2397, %f2395, %f2393, %p259; - add.f32 %f510, %f2396, 0fBF800000; - add.f32 %f2365, %f2396, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2364,%f2365; - // inline asm - add.f32 %f512, %f510, %f510; - mul.f32 %f2398, %f2364, %f512; - mul.f32 %f2399, %f2398, %f2398; - fma.rn.f32 %f2402, %f2234, %f2399, %f2233; - fma.rn.f32 %f2404, %f2402, %f2399, %f2236; - mul.rn.f32 %f2405, %f2404, %f2399; - mul.rn.f32 %f2406, %f2405, %f2398; - sub.f32 %f2407, %f510, %f2398; - neg.f32 %f2408, %f2398; - add.f32 %f2409, %f2407, %f2407; - fma.rn.f32 %f2410, %f2408, %f510, %f2409; - mul.rn.f32 %f2411, %f2364, %f2410; - add.f32 %f2412, %f2406, %f2398; - sub.f32 %f2413, %f2398, %f2412; - add.f32 %f2414, %f2406, %f2413; - add.f32 %f2415, %f2411, %f2414; - add.f32 %f2416, %f2412, %f2415; - sub.f32 %f2417, %f2412, %f2416; - add.f32 %f2418, %f2415, %f2417; - mul.rn.f32 %f513, %f2397, %f2091; - mul.rn.f32 %f514, %f2397, %f2092; - add.f32 %f2421, %f513, %f2416; - sub.f32 %f2422, %f513, %f2421; - add.f32 %f2423, %f2416, %f2422; - add.f32 %f2424, %f2418, %f2423; - add.f32 %f2425, %f514, %f2424; - add.f32 %f2426, %f2421, %f2425; - sub.f32 %f2427, %f2421, %f2426; - add.f32 %f2428, %f2425, %f2427; - mul.rn.f32 %f2430, %f2079, %f2426; + selp.f32 %f266, %f1715, %f1713, %p677; + selp.f32 %f1716, %f1714, %f1710, %p677; + mul.rn.f32 %f1718, %f1716, %f812; + cvt.rzi.f32.f32 %f1719, %f1718; + abs.f32 %f1720, %f1719; + setp.gt.f32 %p678, %f1720, 0f42FC0000; + mov.b32 %r802, %f1719; + and.b32 %r803, %r802, -2147483648; + or.b32 %r804, %r803, 1123811328; + mov.b32 %f1721, %r804; + selp.f32 %f1722, %f1721, %f1719, %p678; + fma.rn.f32 %f1724, %f1722, %f3063, %f1716; + fma.rn.f32 %f1726, %f1722, %f3064, %f1724; + mul.f32 %f1727, %f1726, 0f3FB8AA3B; + add.f32 %f1728, %f1722, 0f4B40007F; + mov.b32 %r805, %f1728; + shl.b32 %r806, %r805, 23; + mov.b32 %f1729, %r806; + ex2.approx.ftz.f32 %f1730, %f1727; + mul.f32 %f267, %f1730, %f1729; + setp.eq.f32 %p679, %f267, 0f7F800000; + mov.f32 %f3275, 0f7F800000; + @%p679 bra $L__BB6_420; + + fma.rn.f32 %f3275, %f267, %f266, %f267; + +$L__BB6_420: + setp.lt.f32 %p680, %f103, 0f00000000; + and.pred %p45, %p680, %p666; + setp.eq.f32 %p682, %f103, 0f00000000; + @%p682 bra $L__BB6_424; + bra.uni $L__BB6_421; + +$L__BB6_424: + add.f32 %f1735, %f103, %f103; + selp.f32 %f3277, %f1735, 0f00000000, %p666; + bra.uni $L__BB6_425; + +$L__BB6_421: + mov.b32 %r807, %f3275; + xor.b32 %r808, %r807, -2147483648; + mov.b32 %f1731, %r808; + selp.f32 %f3277, %f1731, %f3275, %p45; + setp.geu.f32 %p683, %f103, 0f00000000; + @%p683 bra $L__BB6_425; + + mov.f32 %f3148, 0f3FC00000; + cvt.rzi.f32.f32 %f1733, %f3148; + setp.eq.f32 %p684, %f1733, 0f3FC00000; + @%p684 bra $L__BB6_425; + + mov.f32 %f3277, 0f7FFFFFFF; + +$L__BB6_425: + abs.f32 %f3187, %f103; + add.f32 %f1736, %f3187, 0f3FC00000; + mov.b32 %r809, %f1736; + setp.lt.s32 %p686, %r809, 2139095040; + @%p686 bra $L__BB6_430; + + abs.f32 %f3188, %f103; + setp.gtu.f32 %p687, %f3188, 0f7F800000; + @%p687 bra $L__BB6_429; + bra.uni $L__BB6_427; + +$L__BB6_429: + add.f32 %f3277, %f103, 0f3FC00000; + bra.uni $L__BB6_430; + +$L__BB6_427: + abs.f32 %f3189, %f103; + setp.neu.f32 %p688, %f3189, 0f7F800000; + @%p688 bra $L__BB6_430; + + selp.f32 %f3277, 0fFF800000, 0f7F800000, %p45; + +$L__BB6_430: + cvt.rn.f32.f64 %f3172, %fd804; + setp.eq.f32 %p689, %f103, 0f3F800000; + selp.f32 %f276, 0f3F800000, %f3277, %p689; + cvt.f64.f32 %fd254, %f3172; + { + .reg .b32 %temp; + mov.b64 {%temp, %r108}, %fd254; + } + abs.f64 %fd255, %fd254; + { // callseq 133, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd255; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1139, [retval0+0]; + } // callseq 133 + setp.lt.s32 %p690, %r108, 0; + and.pred %p46, %p690, %p144; + not.pred %p692, %p46; + @%p692 bra $L__BB6_432; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r810}, %fd1139; + } + xor.b32 %r811, %r810, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r812, %temp}, %fd1139; + } + mov.b64 %fd1139, {%r812, %r811}; + +$L__BB6_432: + cvt.rn.f32.f64 %f3173, %fd804; + setp.eq.f32 %p693, %f3173, 0f00000000; + @%p693 bra $L__BB6_436; + bra.uni $L__BB6_433; + +$L__BB6_436: + mov.u32 %r813, 0; + selp.b32 %r814, %r108, 0, %p144; + or.b32 %r815, %r814, 2146435072; + selp.b32 %r816, %r815, %r814, %p146; + mov.b64 %fd1139, {%r813, %r816}; + bra.uni $L__BB6_437; + +$L__BB6_433: + setp.gt.s32 %p694, %r108, -1; + @%p694 bra $L__BB6_437; + + cvt.rzi.f64.f64 %fd828, %fd646; + setp.eq.f64 %p695, %fd828, 0d4000000000000000; + @%p695 bra $L__BB6_437; + + mov.f64 %fd1139, 0dFFF8000000000000; + +$L__BB6_437: + add.f64 %fd261, %fd254, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r817}, %fd261; + } + and.b32 %r818, %r817, 2146435072; + setp.ne.s32 %p698, %r818, 2146435072; + mov.f64 %fd1140, %fd1139; + @%p698 bra $L__BB6_443; + + setp.gtu.f64 %p699, %fd255, 0d7FF0000000000000; + mov.f64 %fd1140, %fd261; + @%p699 bra $L__BB6_443; + + setp.eq.s32 %p700, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r819, %temp}, %fd646; + } + setp.eq.s32 %p701, %r819, 0; + and.pred %p702, %p700, %p701; + @%p702 bra $L__BB6_442; + bra.uni $L__BB6_440; + +$L__BB6_442: + cvt.rn.f32.f64 %f3184, %fd804; + mov.u32 %r824, 0; + setp.gt.f64 %p709, %fd255, 0d3FF0000000000000; + selp.b32 %r825, 2146435072, 0, %p709; + xor.b32 %r826, %r825, 2146435072; + selp.b32 %r827, %r826, %r825, %p146; + setp.eq.f32 %p710, %f3184, 0fBF800000; + selp.b32 %r828, 1072693248, %r827, %p710; + mov.b64 %fd1140, {%r824, %r828}; + bra.uni $L__BB6_443; + +$L__BB6_440: + { + .reg .b32 %temp; + mov.b64 {%r820, %temp}, %fd254; + } + and.b32 %r821, %r108, 2147483647; + setp.ne.s32 %p703, %r821, 2146435072; + setp.ne.s32 %p704, %r820, 0; + or.pred %p705, %p703, %p704; + mov.f64 %fd1140, %fd1139; + @%p705 bra $L__BB6_443; + + and.pred %p707, %p155, %p46; + selp.b32 %r822, %r57, %r56, %p707; + mov.u32 %r823, 0; + mov.b64 %fd1140, {%r823, %r822}; + +$L__BB6_443: + cvt.rn.f32.f64 %f3174, %fd804; + not.pred %p1289, %p12; + setp.eq.f32 %p711, %f3174, 0f3F800000; + selp.f64 %fd831, 0d3FF0000000000000, %fd1140, %p711; + cvt.f64.f32 %fd832, %f264; + mul.f64 %fd265, %fd831, %fd832; + mov.f64 %fd1142, %fd66; + @%p1289 bra $L__BB6_445; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r829}, %fd66; + } + xor.b32 %r830, %r829, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r831, %temp}, %fd66; + } + mov.b64 %fd1142, {%r831, %r830}; + +$L__BB6_445: + @%p582 bra $L__BB6_449; + bra.uni $L__BB6_446; + +$L__BB6_449: + mov.u32 %r832, 0; + mov.b64 %fd1142, {%r832, %r84}; + bra.uni $L__BB6_450; + +$L__BB6_446: + setp.gt.s32 %p714, %r83, -1; + @%p714 bra $L__BB6_450; + + cvt.rzi.f64.f64 %fd834, %fd646; + setp.eq.f64 %p715, %fd834, 0d4000000000000000; + @%p715 bra $L__BB6_450; + + mov.f64 %fd1142, 0dFFF8000000000000; + +$L__BB6_450: + selp.f64 %fd1143, %fd1142, %fd46, %p182; + @%p25 bra $L__BB6_455; + + setp.eq.s32 %p717, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r833, %temp}, %fd646; + } + setp.eq.s32 %p718, %r833, 0; + and.pred %p719, %p717, %p718; + @%p719 bra $L__BB6_454; + bra.uni $L__BB6_452; + +$L__BB6_454: + mov.u32 %r837, 0; + mov.b64 %fd1143, {%r837, %r86}; + bra.uni $L__BB6_455; + +$L__BB6_452: + and.b32 %r834, %r83, 2147483647; + setp.ne.s32 %p720, %r834, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r835, %temp}, %fd45; + } + setp.ne.s32 %p721, %r835, 0; + or.pred %p722, %p720, %p721; + mov.f64 %fd1143, %fd1142; + @%p722 bra $L__BB6_455; + + mov.u32 %r836, 0; + mov.b64 %fd1143, {%r836, %r88}; + +$L__BB6_455: + mov.f64 %fd1145, %fd68; + @%p607 bra $L__BB6_457; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r838}, %fd68; + } + xor.b32 %r839, %r838, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r840, %temp}, %fd68; + } + mov.b64 %fd1145, {%r840, %r839}; + +$L__BB6_457: + @%p608 bra $L__BB6_461; + bra.uni $L__BB6_458; + +$L__BB6_461: + mov.u32 %r841, 0; + mov.b64 %fd1145, {%r841, %r91}; + bra.uni $L__BB6_462; + +$L__BB6_458: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1358}, %fd645; + } + setp.gt.s32 %p725, %r1358, -1; + @%p725 bra $L__BB6_462; + + cvt.rzi.f64.f64 %fd838, %fd651; + setp.eq.f64 %p726, %fd838, 0d4010000000000000; + @%p726 bra $L__BB6_462; + + mov.f64 %fd1145, 0dFFF8000000000000; + +$L__BB6_462: + selp.f64 %fd1146, %fd1145, %fd36, %p189; + @%p27 bra $L__BB6_467; + + setp.eq.s32 %p728, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r842, %temp}, %fd651; + } + setp.eq.s32 %p729, %r842, 0; + and.pred %p730, %p728, %p729; + @%p730 bra $L__BB6_466; + bra.uni $L__BB6_464; + +$L__BB6_466: + mov.u32 %r846, 0; + mov.b64 %fd1146, {%r846, %r94}; + bra.uni $L__BB6_467; + +$L__BB6_464: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1359}, %fd645; + } + and.b32 %r843, %r1359, 2147483647; + setp.ne.s32 %p731, %r843, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r844, %temp}, %fd645; + } + setp.ne.s32 %p732, %r844, 0; + or.pred %p733, %p731, %p732; + mov.f64 %fd1146, %fd1145; + @%p733 bra $L__BB6_467; + + mov.u32 %r845, 0; + mov.b64 %fd1146, {%r845, %r97}; + +$L__BB6_467: + cvt.rn.f32.f64 %f3176, %fd825; + sub.f32 %f3175, %f3291, %f552; + setp.eq.f32 %p1290, %f3175, 0f3F800000; + selp.f64 %fd842, 0d3FF0000000000000, %fd1146, %p618; + selp.f64 %fd843, 0d3FF0000000000000, %fd1143, %p1290; + mul.f64 %fd844, %fd843, %fd39; + div.rn.f64 %fd845, %fd844, %fd842; + add.f64 %fd846, %fd845, %fd52; + cvt.rn.f32.f64 %f1737, %fd846; + mul.f32 %f1738, %f246, %f1737; + cvt.f64.f32 %fd847, %f1738; + add.f64 %fd282, %fd265, %fd847; + cvt.f64.f32 %fd283, %f3176; + { + .reg .b32 %temp; + mov.b64 {%temp, %r109}, %fd283; + } + abs.f64 %fd284, %fd283; + { // callseq 134, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd284; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1148, [retval0+0]; + } // callseq 134 + setp.lt.s32 %p736, %r109, 0; + and.pred %p47, %p736, %p144; + not.pred %p738, %p47; + @%p738 bra $L__BB6_469; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r847}, %fd1148; + } + xor.b32 %r848, %r847, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r849, %temp}, %fd1148; + } + mov.b64 %fd1148, {%r849, %r848}; + +$L__BB6_469: + cvt.rn.f32.f64 %f3177, %fd825; + setp.eq.f32 %p739, %f3177, 0f00000000; + @%p739 bra $L__BB6_473; + bra.uni $L__BB6_470; + +$L__BB6_473: + mov.u32 %r850, 0; + selp.b32 %r851, %r109, 0, %p144; + or.b32 %r852, %r851, 2146435072; + selp.b32 %r853, %r852, %r851, %p146; + mov.b64 %fd1148, {%r850, %r853}; + bra.uni $L__BB6_474; + +$L__BB6_470: + setp.gt.s32 %p740, %r109, -1; + @%p740 bra $L__BB6_474; + + cvt.rzi.f64.f64 %fd850, %fd646; + setp.eq.f64 %p741, %fd850, 0d4000000000000000; + @%p741 bra $L__BB6_474; + + mov.f64 %fd1148, 0dFFF8000000000000; + +$L__BB6_474: + add.f64 %fd290, %fd283, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r854}, %fd290; + } + and.b32 %r855, %r854, 2146435072; + setp.ne.s32 %p744, %r855, 2146435072; + mov.f64 %fd1149, %fd1148; + @%p744 bra $L__BB6_480; + + setp.gtu.f64 %p745, %fd284, 0d7FF0000000000000; + mov.f64 %fd1149, %fd290; + @%p745 bra $L__BB6_480; + + setp.eq.s32 %p746, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r856, %temp}, %fd646; + } + setp.eq.s32 %p747, %r856, 0; + and.pred %p748, %p746, %p747; + @%p748 bra $L__BB6_479; + bra.uni $L__BB6_477; + +$L__BB6_479: + cvt.rn.f32.f64 %f3183, %fd825; + mov.u32 %r861, 0; + setp.gt.f64 %p755, %fd284, 0d3FF0000000000000; + selp.b32 %r862, 2146435072, 0, %p755; + xor.b32 %r863, %r862, 2146435072; + selp.b32 %r864, %r863, %r862, %p146; + setp.eq.f32 %p756, %f3183, 0fBF800000; + selp.b32 %r865, 1072693248, %r864, %p756; + mov.b64 %fd1149, {%r861, %r865}; + bra.uni $L__BB6_480; + +$L__BB6_477: + { + .reg .b32 %temp; + mov.b64 {%r857, %temp}, %fd283; + } + and.b32 %r858, %r109, 2147483647; + setp.ne.s32 %p749, %r858, 2146435072; + setp.ne.s32 %p750, %r857, 0; + or.pred %p751, %p749, %p750; + mov.f64 %fd1149, %fd1148; + @%p751 bra $L__BB6_480; + + and.pred %p753, %p155, %p47; + selp.b32 %r859, %r57, %r56, %p753; + mov.u32 %r860, 0; + mov.b64 %fd1149, {%r860, %r859}; + +$L__BB6_480: + cvt.rn.f32.f64 %f3178, %fd825; + not.pred %p1291, %p15; + setp.eq.f32 %p757, %f3178, 0f3F800000; + selp.f64 %fd853, 0d3FF0000000000000, %fd1149, %p757; + div.rn.f32 %f1739, %f57, %f276; + cvt.f64.f32 %fd854, %f1739; + mul.f64 %fd294, %fd853, %fd854; + mov.f64 %fd1151, %fd69; + @%p1291 bra $L__BB6_482; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r866}, %fd69; + } + xor.b32 %r867, %r866, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r868, %temp}, %fd69; + } + mov.b64 %fd1151, {%r868, %r867}; + +$L__BB6_482: + @%p621 bra $L__BB6_486; + bra.uni $L__BB6_483; + +$L__BB6_486: + mov.u32 %r869, 0; + mov.b64 %fd1151, {%r869, %r95}; + bra.uni $L__BB6_487; + +$L__BB6_483: + setp.gt.s32 %p760, %r93, -1; + @%p760 bra $L__BB6_487; + + cvt.rzi.f64.f64 %fd856, %fd646; + setp.eq.f64 %p761, %fd856, 0d4000000000000000; + @%p761 bra $L__BB6_487; + + mov.f64 %fd1151, 0dFFF8000000000000; + +$L__BB6_487: + selp.f64 %fd1152, %fd1151, %fd50, %p194; + @%p28 bra $L__BB6_492; + + setp.eq.s32 %p763, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r870, %temp}, %fd646; + } + setp.eq.s32 %p764, %r870, 0; + and.pred %p765, %p763, %p764; + @%p765 bra $L__BB6_491; + bra.uni $L__BB6_489; + +$L__BB6_491: + mov.u32 %r874, 0; + mov.b64 %fd1152, {%r874, %r98}; + bra.uni $L__BB6_492; + +$L__BB6_489: + and.b32 %r871, %r93, 2147483647; + setp.ne.s32 %p766, %r871, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r872, %temp}, %fd49; + } + setp.ne.s32 %p767, %r872, 0; + or.pred %p768, %p766, %p767; + mov.f64 %fd1152, %fd1151; + @%p768 bra $L__BB6_492; + + mov.u32 %r873, 0; + mov.b64 %fd1152, {%r873, %r100}; + +$L__BB6_492: + mov.f64 %fd1154, %fd68; + @%p607 bra $L__BB6_494; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r875}, %fd68; + } + xor.b32 %r876, %r875, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r877, %temp}, %fd68; + } + mov.b64 %fd1154, {%r877, %r876}; + +$L__BB6_494: + @%p608 bra $L__BB6_498; + bra.uni $L__BB6_495; + +$L__BB6_498: + mov.u32 %r878, 0; + mov.b64 %fd1154, {%r878, %r91}; + bra.uni $L__BB6_499; + +$L__BB6_495: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1360}, %fd645; + } + setp.gt.s32 %p771, %r1360, -1; + @%p771 bra $L__BB6_499; + + cvt.rzi.f64.f64 %fd860, %fd651; + setp.eq.f64 %p772, %fd860, 0d4010000000000000; + @%p772 bra $L__BB6_499; + + mov.f64 %fd1154, 0dFFF8000000000000; + +$L__BB6_499: + selp.f64 %fd1155, %fd1154, %fd36, %p189; + @%p27 bra $L__BB6_504; + + setp.eq.s32 %p774, %r65, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r879, %temp}, %fd651; + } + setp.eq.s32 %p775, %r879, 0; + and.pred %p776, %p774, %p775; + @%p776 bra $L__BB6_503; + bra.uni $L__BB6_501; + +$L__BB6_503: + mov.u32 %r883, 0; + mov.b64 %fd1155, {%r883, %r94}; + bra.uni $L__BB6_504; + +$L__BB6_501: + { + .reg .b32 %temp; + mov.b64 {%temp, %r1361}, %fd645; + } + and.b32 %r880, %r1361, 2147483647; + setp.ne.s32 %p777, %r880, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r881, %temp}, %fd645; + } + setp.ne.s32 %p778, %r881, 0; + or.pred %p779, %p777, %p778; + mov.f64 %fd1155, %fd1154; + @%p779 bra $L__BB6_504; + + mov.u32 %r882, 0; + mov.b64 %fd1155, {%r882, %r97}; + +$L__BB6_504: + cvt.rn.f32.f64 %f3182, %fd825; + mul.f32 %f3181, %f247, %f3182; + cvt.rn.f32.f64 %f3180, %fd804; + mul.f32 %f3179, %f246, %f3180; + cvt.rn.f32.s32 %f3067, %r1374; + cvt.rn.f32.s32 %f3066, %r1375; + selp.f64 %fd864, 0d3FF0000000000000, %fd1155, %p618; + selp.f64 %fd865, 0d3FF0000000000000, %fd1152, %p631; + mul.f64 %fd866, %fd865, %fd40; + div.rn.f64 %fd867, %fd866, %fd864; + add.f64 %fd868, %fd867, %fd53; + cvt.rn.f32.f64 %f1741, %fd868; + mul.f32 %f1742, %f247, %f1741; + cvt.f64.f32 %fd869, %f1742; + add.f64 %fd870, %fd294, %fd869; + cvt.rn.f32.f64 %f1743, %fd870; + mul.f32 %f1744, %f3179, %f3179; + cvt.rn.f32.f64 %f1745, %fd282; + mul.f32 %f1746, %f217, %f1745; + fma.rn.f32 %f1747, %f1744, %f218, %f1746; + mul.f32 %f1748, %f3181, %f3181; + cvt.rn.f32.f64 %f1749, %fd203; + fma.rn.f32 %f1750, %f1748, %f1749, %f1747; + fma.rn.f32 %f277, %f245, %f1743, %f1750; + mul.f32 %f1751, %f117, %f3293; + fma.rn.f32 %f278, %f131, %f1751, %f3292; + mad.lo.s32 %r884, %r1375, %r182, %r1374; + add.s32 %r885, %r884, %r2; + mul.wide.s32 %rd31, %r885, 4; + add.s64 %rd32, %rd1, %rd31; + ld.global.f32 %f279, [%rd32]; + add.f32 %f1752, %f73, %f3066; + fma.rn.f32 %f1753, %f1752, %f61, %f74; + add.f32 %f1754, %f1753, %f3067; + cvt.rzi.s32.f32 %r886, %f1754; + mul.wide.s32 %rd34, %r886, 4; + add.s64 %rd35, %rd33, %rd34; + ld.global.f32 %f3290, [%rd35]; + mul.f32 %f281, %f117, %f131; + setp.leu.f32 %p782, %f278, 0f3C23D70A; + mov.f32 %f3278, %f3279; + @%p782 bra $L__BB6_506; + + sub.f32 %f1755, %f279, %f278; + add.f32 %f1756, %f278, %f3290; + div.rn.f32 %f3278, %f1755, %f1756; + +$L__BB6_506: + @%p782 bra $L__BB6_521; + + add.f32 %f284, %f278, %f3290; + cvt.f64.f32 %fd311, %f284; + { + .reg .b32 %temp; + mov.b64 {%temp, %r110}, %fd311; + } + abs.f64 %fd312, %fd311; + { // callseq 135, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd312; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1157, [retval0+0]; + } // callseq 135 + setp.lt.s32 %p785, %r110, 0; + and.pred %p48, %p785, %p144; + not.pred %p786, %p48; + @%p786 bra $L__BB6_509; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r887}, %fd1157; + } + xor.b32 %r888, %r887, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r889, %temp}, %fd1157; + } + mov.b64 %fd1157, {%r889, %r888}; + +$L__BB6_509: + setp.eq.f32 %p787, %f284, 0f00000000; + @%p787 bra $L__BB6_513; + bra.uni $L__BB6_510; + +$L__BB6_513: + mov.u32 %r890, 0; + selp.b32 %r891, %r110, 0, %p144; + or.b32 %r892, %r891, 2146435072; + selp.b32 %r893, %r892, %r891, %p146; + mov.b64 %fd1157, {%r890, %r893}; + bra.uni $L__BB6_514; + +$L__BB6_510: + setp.gt.s32 %p788, %r110, -1; + @%p788 bra $L__BB6_514; + + cvt.rzi.f64.f64 %fd873, %fd646; + setp.eq.f64 %p789, %fd873, 0d4000000000000000; + @%p789 bra $L__BB6_514; + + mov.f64 %fd1157, 0dFFF8000000000000; + +$L__BB6_514: + add.f64 %fd318, %fd311, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r894}, %fd318; + } + and.b32 %r895, %r894, 2146435072; + setp.ne.s32 %p792, %r895, 2146435072; + mov.f64 %fd1158, %fd1157; + @%p792 bra $L__BB6_520; + + setp.gtu.f64 %p793, %fd312, 0d7FF0000000000000; + mov.f64 %fd1158, %fd318; + @%p793 bra $L__BB6_520; + + setp.eq.s32 %p794, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r896, %temp}, %fd646; + } + setp.eq.s32 %p795, %r896, 0; + and.pred %p796, %p794, %p795; + @%p796 bra $L__BB6_519; + bra.uni $L__BB6_517; + +$L__BB6_519: + mov.u32 %r901, 0; + setp.gt.f64 %p803, %fd312, 0d3FF0000000000000; + selp.b32 %r902, 2146435072, 0, %p803; + xor.b32 %r903, %r902, 2146435072; + selp.b32 %r904, %r903, %r902, %p146; + setp.eq.f32 %p804, %f284, 0fBF800000; + selp.b32 %r905, 1072693248, %r904, %p804; + mov.b64 %fd1158, {%r901, %r905}; + bra.uni $L__BB6_520; + +$L__BB6_517: + { + .reg .b32 %temp; + mov.b64 {%r897, %temp}, %fd311; + } + and.b32 %r898, %r110, 2147483647; + setp.ne.s32 %p797, %r898, 2146435072; + setp.ne.s32 %p798, %r897, 0; + or.pred %p799, %p797, %p798; + mov.f64 %fd1158, %fd1157; + @%p799 bra $L__BB6_520; + + and.pred %p801, %p155, %p48; + selp.b32 %r899, %r57, %r56, %p801; + mov.u32 %r900, 0; + mov.b64 %fd1158, {%r900, %r899}; + +$L__BB6_520: + setp.eq.f32 %p805, %f284, 0f3F800000; + selp.f64 %fd876, 0d3FF0000000000000, %fd1158, %p805; + add.f32 %f1758, %f279, %f3290; + cvt.f64.f32 %fd877, %f1758; + div.rn.f64 %fd878, %fd877, %fd876; + cvt.rn.f32.f64 %f3279, %fd878; + +$L__BB6_521: + mov.f32 %f1759, 0f47C35000; + min.f32 %f1760, %f3279, %f1759; + cvt.f64.f32 %fd322, %f1760; + min.f32 %f287, %f3278, %f1759; + fma.rn.f32 %f3238, %f287, %f159, %f3238; + mul.f32 %f1761, %f287, %f160; + cvt.f64.f32 %fd323, %f1761; + cvt.f64.f32 %fd324, %f159; + { + .reg .b32 %temp; + mov.b64 {%temp, %r111}, %fd324; + } + abs.f64 %fd325, %fd324; + { // callseq 136, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd325; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1159, [retval0+0]; + } // callseq 136 + @%p144 bra $L__BB6_567; + bra.uni $L__BB6_522; + +$L__BB6_567: + setp.gt.s32 %p867, %r111, -1; + @%p867 bra $L__BB6_569; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r954}, %fd1159; + } + xor.b32 %r955, %r954, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r956, %temp}, %fd1159; + } + mov.b64 %fd1159, {%r956, %r955}; + +$L__BB6_569: + setp.eq.f32 %p868, %f159, 0f00000000; + @%p868 bra $L__BB6_573; + bra.uni $L__BB6_570; + +$L__BB6_573: + mov.u32 %r957, 0; + or.b32 %r958, %r111, 2146435072; + selp.b32 %r959, %r958, %r111, %p146; + mov.b64 %fd1159, {%r957, %r959}; + bra.uni $L__BB6_574; + +$L__BB6_522: + setp.eq.f32 %p807, %f159, 0f00000000; + @%p807 bra $L__BB6_526; + bra.uni $L__BB6_523; + +$L__BB6_526: + mov.u32 %r906, 0; + mov.b64 %fd1159, {%r906, %r102}; + bra.uni $L__BB6_527; + +$L__BB6_570: + @%p867 bra $L__BB6_574; + + cvt.rzi.f64.f64 %fd921, %fd646; + setp.eq.f64 %p870, %fd921, 0d4000000000000000; + @%p870 bra $L__BB6_574; + + mov.f64 %fd1159, 0dFFF8000000000000; + +$L__BB6_574: + add.f64 %fd369, %fd324, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r960}, %fd369; + } + and.b32 %r961, %r960, 2146435072; + setp.ne.s32 %p872, %r961, 2146435072; + mov.f64 %fd1169, %fd1159; + @%p872 bra $L__BB6_580; + + setp.gtu.f64 %p873, %fd325, 0d7FF0000000000000; + mov.f64 %fd1169, %fd369; + @%p873 bra $L__BB6_580; + + setp.eq.s32 %p874, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r962, %temp}, %fd646; + } + setp.eq.s32 %p875, %r962, 0; + and.pred %p876, %p874, %p875; + @%p876 bra $L__BB6_579; + bra.uni $L__BB6_577; + +$L__BB6_579: + mov.u32 %r967, 0; + setp.gt.f64 %p884, %fd325, 0d3FF0000000000000; + selp.b32 %r968, 2146435072, 0, %p884; + xor.b32 %r969, %r968, 2146435072; + selp.b32 %r970, %r969, %r968, %p146; + setp.eq.f32 %p885, %f159, 0fBF800000; + selp.b32 %r971, 1072693248, %r970, %p885; + mov.b64 %fd1169, {%r967, %r971}; + bra.uni $L__BB6_580; + +$L__BB6_523: + setp.gt.s32 %p808, %r111, -1; + @%p808 bra $L__BB6_527; + + cvt.rzi.f64.f64 %fd881, %fd646; + setp.eq.f64 %p809, %fd881, 0d4000000000000000; + @%p809 bra $L__BB6_527; + + mov.f64 %fd1159, 0dFFF8000000000000; + +$L__BB6_527: + add.f64 %fd329, %fd324, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r907}, %fd329; + } + and.b32 %r908, %r907, 2146435072; + setp.ne.s32 %p810, %r908, 2146435072; + mov.f64 %fd1160, %fd1159; + @%p810 bra $L__BB6_533; + + setp.gtu.f64 %p811, %fd325, 0d7FF0000000000000; + mov.f64 %fd1160, %fd329; + @%p811 bra $L__BB6_533; + + setp.eq.s32 %p812, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r909, %temp}, %fd646; + } + setp.eq.s32 %p813, %r909, 0; + and.pred %p814, %p812, %p813; + @%p814 bra $L__BB6_532; + bra.uni $L__BB6_530; + +$L__BB6_532: + mov.u32 %r913, 0; + setp.gt.f64 %p819, %fd325, 0d3FF0000000000000; + selp.b32 %r914, 2146435072, 0, %p819; + xor.b32 %r915, %r914, 2146435072; + selp.b32 %r916, %r915, %r914, %p146; + setp.eq.f32 %p820, %f159, 0fBF800000; + selp.b32 %r917, 1072693248, %r916, %p820; + mov.b64 %fd1160, {%r913, %r917}; + bra.uni $L__BB6_533; + +$L__BB6_577: + { + .reg .b32 %temp; + mov.b64 {%r963, %temp}, %fd324; + } + and.b32 %r964, %r111, 2147483647; + setp.ne.s32 %p877, %r964, 2146435072; + setp.ne.s32 %p878, %r963, 0; + or.pred %p879, %p877, %p878; + mov.f64 %fd1169, %fd1159; + @%p879 bra $L__BB6_580; + + setp.lt.s32 %p880, %r111, 0; + mov.u32 %r965, 0; + and.pred %p882, %p155, %p880; + selp.b32 %r966, %r57, %r56, %p882; + mov.b64 %fd1169, {%r965, %r966}; + +$L__BB6_580: + setp.eq.f32 %p886, %f159, 0f3F800000; + selp.f64 %fd924, 0d3FF0000000000000, %fd1169, %p886; + mul.f64 %fd925, %fd924, %fd322; + sub.f64 %fd926, %fd323, %fd925; + cvt.f64.f32 %fd927, %f3243; + add.f64 %fd1183, %fd926, %fd927; + cvt.f64.f32 %fd374, %f189; + { + .reg .b32 %temp; + mov.b64 {%temp, %r115}, %fd374; + } + abs.f64 %fd375, %fd374; + { // callseq 140, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd375; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1171, [retval0+0]; + } // callseq 140 + setp.gt.s32 %p887, %r115, -1; + @%p887 bra $L__BB6_582; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r972}, %fd1171; + } + xor.b32 %r973, %r972, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r974, %temp}, %fd1171; + } + mov.b64 %fd1171, {%r974, %r973}; + +$L__BB6_582: + setp.eq.f32 %p888, %f189, 0f00000000; + @%p888 bra $L__BB6_586; + bra.uni $L__BB6_583; + +$L__BB6_586: + mov.u32 %r975, 0; + or.b32 %r976, %r115, 2146435072; + selp.b32 %r977, %r976, %r115, %p146; + mov.b64 %fd1171, {%r975, %r977}; + bra.uni $L__BB6_587; + +$L__BB6_583: + @%p887 bra $L__BB6_587; + + cvt.rzi.f64.f64 %fd930, %fd646; + setp.eq.f64 %p890, %fd930, 0d4000000000000000; + @%p890 bra $L__BB6_587; + + mov.f64 %fd1171, 0dFFF8000000000000; + +$L__BB6_587: + add.f64 %fd381, %fd374, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r978}, %fd381; + } + and.b32 %r979, %r978, 2146435072; + setp.ne.s32 %p892, %r979, 2146435072; + mov.f64 %fd1172, %fd1171; + @%p892 bra $L__BB6_593; + + setp.gtu.f64 %p893, %fd375, 0d7FF0000000000000; + mov.f64 %fd1172, %fd381; + @%p893 bra $L__BB6_593; + + setp.eq.s32 %p894, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r980, %temp}, %fd646; + } + setp.eq.s32 %p895, %r980, 0; + and.pred %p896, %p894, %p895; + @%p896 bra $L__BB6_592; + bra.uni $L__BB6_590; + +$L__BB6_592: + mov.u32 %r985, 0; + setp.gt.f64 %p904, %fd375, 0d3FF0000000000000; + selp.b32 %r986, 2146435072, 0, %p904; + xor.b32 %r987, %r986, 2146435072; + selp.b32 %r988, %r987, %r986, %p146; + setp.eq.f32 %p905, %f189, 0fBF800000; + selp.b32 %r989, 1072693248, %r988, %p905; + mov.b64 %fd1172, {%r985, %r989}; + bra.uni $L__BB6_593; + +$L__BB6_530: + { + .reg .b32 %temp; + mov.b64 {%r910, %temp}, %fd324; + } + and.b32 %r911, %r111, 2147483647; + setp.ne.s32 %p815, %r911, 2146435072; + setp.ne.s32 %p816, %r910, 0; + or.pred %p817, %p815, %p816; + mov.f64 %fd1160, %fd1159; + @%p817 bra $L__BB6_533; + + mov.u32 %r912, 0; + mov.b64 %fd1160, {%r912, %r56}; + +$L__BB6_533: + setp.eq.f32 %p821, %f159, 0f3F800000; + selp.f64 %fd884, 0d3FF0000000000000, %fd1160, %p821; + mul.f64 %fd885, %fd884, %fd322; + sub.f64 %fd886, %fd323, %fd885; + cvt.f64.f32 %fd887, %f3243; + add.f64 %fd1183, %fd886, %fd887; + cvt.f64.f32 %fd334, %f189; + { + .reg .b32 %temp; + mov.b64 {%temp, %r112}, %fd334; + } + abs.f64 %fd335, %fd334; + setp.eq.f32 %p822, %f189, 0f00000000; + @%p822 bra $L__BB6_537; + bra.uni $L__BB6_534; + +$L__BB6_537: + mov.u32 %r918, 0; + mov.b64 %fd1161, {%r918, %r102}; + bra.uni $L__BB6_538; + +$L__BB6_534: + { // callseq 137, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd335; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1161, [retval0+0]; + } // callseq 137 + setp.gt.s32 %p823, %r112, -1; + @%p823 bra $L__BB6_538; + + cvt.rzi.f64.f64 %fd890, %fd646; + setp.eq.f64 %p824, %fd890, 0d4000000000000000; + @%p824 bra $L__BB6_538; + + mov.f64 %fd1161, 0dFFF8000000000000; + +$L__BB6_538: + add.f64 %fd339, %fd334, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r919}, %fd339; + } + and.b32 %r920, %r919, 2146435072; + setp.ne.s32 %p825, %r920, 2146435072; + mov.f64 %fd1162, %fd1161; + @%p825 bra $L__BB6_544; + + setp.gtu.f64 %p826, %fd335, 0d7FF0000000000000; + mov.f64 %fd1162, %fd339; + @%p826 bra $L__BB6_544; + + setp.eq.s32 %p827, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r921, %temp}, %fd646; + } + setp.eq.s32 %p828, %r921, 0; + and.pred %p829, %p827, %p828; + @%p829 bra $L__BB6_543; + bra.uni $L__BB6_541; + +$L__BB6_543: + mov.u32 %r925, 0; + setp.gt.f64 %p834, %fd335, 0d3FF0000000000000; + selp.b32 %r926, 2146435072, 0, %p834; + xor.b32 %r927, %r926, 2146435072; + selp.b32 %r928, %r927, %r926, %p146; + setp.eq.f32 %p835, %f189, 0fBF800000; + selp.b32 %r929, 1072693248, %r928, %p835; + mov.b64 %fd1162, {%r925, %r929}; + bra.uni $L__BB6_544; + +$L__BB6_590: + { + .reg .b32 %temp; + mov.b64 {%r981, %temp}, %fd374; + } + and.b32 %r982, %r115, 2147483647; + setp.ne.s32 %p897, %r982, 2146435072; + setp.ne.s32 %p898, %r981, 0; + or.pred %p899, %p897, %p898; + mov.f64 %fd1172, %fd1171; + @%p899 bra $L__BB6_593; + + setp.lt.s32 %p900, %r115, 0; + mov.u32 %r983, 0; + and.pred %p902, %p155, %p900; + selp.b32 %r984, %r57, %r56, %p902; + mov.b64 %fd1172, {%r983, %r984}; + +$L__BB6_593: + setp.eq.f32 %p906, %f189, 0f3F800000; + selp.f64 %fd933, 0d3FF0000000000000, %fd1172, %p906; + mul.f64 %fd934, %fd933, %fd322; + mul.f32 %f1765, %f287, %f190; + cvt.f64.f32 %fd935, %f1765; + sub.f64 %fd936, %fd935, %fd934; + cvt.f64.f32 %fd937, %f3242; + add.f64 %fd1182, %fd936, %fd937; + cvt.f64.f32 %fd386, %f281; + { + .reg .b32 %temp; + mov.b64 {%temp, %r116}, %fd386; + } + abs.f64 %fd387, %fd386; + { // callseq 141, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd387; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1174, [retval0+0]; + } // callseq 141 + setp.gt.s32 %p907, %r116, -1; + @%p907 bra $L__BB6_595; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r990}, %fd1174; + } + xor.b32 %r991, %r990, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r992, %temp}, %fd1174; + } + mov.b64 %fd1174, {%r992, %r991}; + +$L__BB6_595: + setp.eq.f32 %p908, %f281, 0f00000000; + @%p908 bra $L__BB6_599; + bra.uni $L__BB6_596; + +$L__BB6_599: + mov.u32 %r993, 0; + or.b32 %r994, %r116, 2146435072; + selp.b32 %r995, %r994, %r116, %p146; + mov.b64 %fd1174, {%r993, %r995}; + bra.uni $L__BB6_600; + +$L__BB6_596: + @%p907 bra $L__BB6_600; + + cvt.rzi.f64.f64 %fd940, %fd646; + setp.eq.f64 %p910, %fd940, 0d4000000000000000; + @%p910 bra $L__BB6_600; + + mov.f64 %fd1174, 0dFFF8000000000000; + +$L__BB6_600: + add.f64 %fd393, %fd386, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r996}, %fd393; + } + and.b32 %r997, %r996, 2146435072; + setp.ne.s32 %p912, %r997, 2146435072; + mov.f64 %fd1175, %fd1174; + @%p912 bra $L__BB6_606; + + setp.gtu.f64 %p913, %fd387, 0d7FF0000000000000; + mov.f64 %fd1175, %fd393; + @%p913 bra $L__BB6_606; + + setp.eq.s32 %p914, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r998, %temp}, %fd646; + } + setp.eq.s32 %p915, %r998, 0; + and.pred %p916, %p914, %p915; + @%p916 bra $L__BB6_605; + bra.uni $L__BB6_603; + +$L__BB6_605: + mov.u32 %r1003, 0; + setp.gt.f64 %p924, %fd387, 0d3FF0000000000000; + selp.b32 %r1004, 2146435072, 0, %p924; + xor.b32 %r1005, %r1004, 2146435072; + selp.b32 %r1006, %r1005, %r1004, %p146; + setp.eq.f32 %p925, %f281, 0fBF800000; + selp.b32 %r1007, 1072693248, %r1006, %p925; + mov.b64 %fd1175, {%r1003, %r1007}; + bra.uni $L__BB6_606; + +$L__BB6_541: + { + .reg .b32 %temp; + mov.b64 {%r922, %temp}, %fd334; + } + and.b32 %r923, %r112, 2147483647; + setp.ne.s32 %p830, %r923, 2146435072; + setp.ne.s32 %p831, %r922, 0; + or.pred %p832, %p830, %p831; + mov.f64 %fd1162, %fd1161; + @%p832 bra $L__BB6_544; + + mov.u32 %r924, 0; + mov.b64 %fd1162, {%r924, %r56}; + +$L__BB6_544: + setp.eq.f32 %p836, %f189, 0f3F800000; + selp.f64 %fd893, 0d3FF0000000000000, %fd1162, %p836; + mul.f64 %fd894, %fd893, %fd322; + mul.f32 %f1762, %f287, %f190; + cvt.f64.f32 %fd895, %f1762; + sub.f64 %fd896, %fd895, %fd894; + cvt.f64.f32 %fd897, %f3242; + add.f64 %fd1182, %fd896, %fd897; + cvt.f64.f32 %fd344, %f281; + { + .reg .b32 %temp; + mov.b64 {%temp, %r113}, %fd344; + } + abs.f64 %fd345, %fd344; + setp.eq.f32 %p837, %f281, 0f00000000; + @%p837 bra $L__BB6_548; + bra.uni $L__BB6_545; + +$L__BB6_548: + mov.u32 %r930, 0; + mov.b64 %fd1163, {%r930, %r102}; + bra.uni $L__BB6_549; + +$L__BB6_545: + { // callseq 138, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd345; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1163, [retval0+0]; + } // callseq 138 + setp.gt.s32 %p838, %r113, -1; + @%p838 bra $L__BB6_549; + + cvt.rzi.f64.f64 %fd900, %fd646; + setp.eq.f64 %p839, %fd900, 0d4000000000000000; + @%p839 bra $L__BB6_549; + + mov.f64 %fd1163, 0dFFF8000000000000; + +$L__BB6_549: + add.f64 %fd349, %fd344, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r931}, %fd349; + } + and.b32 %r932, %r931, 2146435072; + setp.ne.s32 %p840, %r932, 2146435072; + mov.f64 %fd1164, %fd1163; + @%p840 bra $L__BB6_555; + + setp.gtu.f64 %p841, %fd345, 0d7FF0000000000000; + mov.f64 %fd1164, %fd349; + @%p841 bra $L__BB6_555; + + setp.eq.s32 %p842, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r933, %temp}, %fd646; + } + setp.eq.s32 %p843, %r933, 0; + and.pred %p844, %p842, %p843; + @%p844 bra $L__BB6_554; + bra.uni $L__BB6_552; + +$L__BB6_554: + mov.u32 %r937, 0; + setp.gt.f64 %p849, %fd345, 0d3FF0000000000000; + selp.b32 %r938, 2146435072, 0, %p849; + xor.b32 %r939, %r938, 2146435072; + selp.b32 %r940, %r939, %r938, %p146; + setp.eq.f32 %p850, %f281, 0fBF800000; + selp.b32 %r941, 1072693248, %r940, %p850; + mov.b64 %fd1164, {%r937, %r941}; + bra.uni $L__BB6_555; + +$L__BB6_603: + { + .reg .b32 %temp; + mov.b64 {%r999, %temp}, %fd386; + } + and.b32 %r1000, %r116, 2147483647; + setp.ne.s32 %p917, %r1000, 2146435072; + setp.ne.s32 %p918, %r999, 0; + or.pred %p919, %p917, %p918; + mov.f64 %fd1175, %fd1174; + @%p919 bra $L__BB6_606; + + setp.lt.s32 %p920, %r116, 0; + mov.u32 %r1001, 0; + and.pred %p922, %p155, %p920; + selp.b32 %r1002, %r57, %r56, %p922; + mov.b64 %fd1175, {%r1001, %r1002}; + +$L__BB6_606: + mul.f32 %f1766, %f287, 0f00000000; + cvt.f64.f32 %fd943, %f1766; + setp.eq.f32 %p926, %f281, 0f3F800000; + selp.f64 %fd944, 0d3FF0000000000000, %fd1175, %p926; + mul.f64 %fd945, %fd944, %fd322; + sub.f64 %fd946, %fd943, %fd945; + cvt.f64.f32 %fd947, %f3241; + add.f64 %fd1181, %fd946, %fd947; + cvt.f64.f32 %fd948, %f3240; + sub.f64 %fd949, %fd943, %fd322; + add.f64 %fd1180, %fd949, %fd948; + cvt.f64.f32 %fd399, %f252; + { + .reg .b32 %temp; + mov.b64 {%temp, %r117}, %fd399; + } + abs.f64 %fd400, %fd399; + { // callseq 142, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd400; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1177, [retval0+0]; + } // callseq 142 + setp.gt.s32 %p927, %r117, -1; + @%p927 bra $L__BB6_608; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1008}, %fd1177; + } + xor.b32 %r1009, %r1008, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1010, %temp}, %fd1177; + } + mov.b64 %fd1177, {%r1010, %r1009}; + +$L__BB6_608: + setp.eq.f32 %p928, %f252, 0f00000000; + @%p928 bra $L__BB6_612; + bra.uni $L__BB6_609; + +$L__BB6_612: + mov.u32 %r1011, 0; + or.b32 %r1012, %r117, 2146435072; + selp.b32 %r1013, %r1012, %r117, %p146; + mov.b64 %fd1177, {%r1011, %r1013}; + bra.uni $L__BB6_613; + +$L__BB6_609: + @%p927 bra $L__BB6_613; + + cvt.rzi.f64.f64 %fd952, %fd646; + setp.eq.f64 %p930, %fd952, 0d4000000000000000; + @%p930 bra $L__BB6_613; + + mov.f64 %fd1177, 0dFFF8000000000000; + +$L__BB6_613: + add.f64 %fd406, %fd399, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1014}, %fd406; + } + and.b32 %r1015, %r1014, 2146435072; + setp.ne.s32 %p932, %r1015, 2146435072; + mov.f64 %fd1178, %fd1177; + @%p932 bra $L__BB6_619; + + setp.gtu.f64 %p933, %fd400, 0d7FF0000000000000; + mov.f64 %fd1178, %fd406; + @%p933 bra $L__BB6_619; + + setp.eq.s32 %p934, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1016, %temp}, %fd646; + } + setp.eq.s32 %p935, %r1016, 0; + and.pred %p936, %p934, %p935; + @%p936 bra $L__BB6_618; + bra.uni $L__BB6_616; + +$L__BB6_618: + mov.u32 %r1021, 0; + setp.gt.f64 %p944, %fd400, 0d3FF0000000000000; + selp.b32 %r1022, 2146435072, 0, %p944; + xor.b32 %r1023, %r1022, 2146435072; + selp.b32 %r1024, %r1023, %r1022, %p146; + setp.eq.f32 %p945, %f252, 0fBF800000; + selp.b32 %r1025, 1072693248, %r1024, %p945; + mov.b64 %fd1178, {%r1021, %r1025}; + bra.uni $L__BB6_619; + +$L__BB6_552: + { + .reg .b32 %temp; + mov.b64 {%r934, %temp}, %fd344; + } + and.b32 %r935, %r113, 2147483647; + setp.ne.s32 %p845, %r935, 2146435072; + setp.ne.s32 %p846, %r934, 0; + or.pred %p847, %p845, %p846; + mov.f64 %fd1164, %fd1163; + @%p847 bra $L__BB6_555; + + mov.u32 %r936, 0; + mov.b64 %fd1164, {%r936, %r56}; + +$L__BB6_555: + mul.f32 %f1763, %f287, 0f00000000; + cvt.f64.f32 %fd903, %f1763; + setp.eq.f32 %p851, %f281, 0f3F800000; + selp.f64 %fd904, 0d3FF0000000000000, %fd1164, %p851; + mul.f64 %fd905, %fd904, %fd322; + sub.f64 %fd906, %fd903, %fd905; + cvt.f64.f32 %fd907, %f3241; + add.f64 %fd1181, %fd906, %fd907; + cvt.f64.f32 %fd908, %f3240; + sub.f64 %fd909, %fd903, %fd322; + add.f64 %fd1180, %fd909, %fd908; + cvt.f64.f32 %fd355, %f252; + { + .reg .b32 %temp; + mov.b64 {%temp, %r114}, %fd355; + } + abs.f64 %fd356, %fd355; + setp.eq.f32 %p852, %f252, 0f00000000; + @%p852 bra $L__BB6_559; + bra.uni $L__BB6_556; + +$L__BB6_559: + mov.u32 %r942, 0; + mov.b64 %fd1165, {%r942, %r102}; + bra.uni $L__BB6_560; + +$L__BB6_556: + { // callseq 139, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd356; + .param .b64 param1; + st.param.f64 [param1+0], %fd646; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd1165, [retval0+0]; + } // callseq 139 + setp.gt.s32 %p853, %r114, -1; + @%p853 bra $L__BB6_560; + + cvt.rzi.f64.f64 %fd912, %fd646; + setp.eq.f64 %p854, %fd912, 0d4000000000000000; + @%p854 bra $L__BB6_560; + + mov.f64 %fd1165, 0dFFF8000000000000; + +$L__BB6_560: + add.f64 %fd360, %fd355, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r943}, %fd360; + } + and.b32 %r944, %r943, 2146435072; + setp.ne.s32 %p855, %r944, 2146435072; + mov.f64 %fd1166, %fd1165; + @%p855 bra $L__BB6_566; + + setp.gtu.f64 %p856, %fd356, 0d7FF0000000000000; + mov.f64 %fd1166, %fd360; + @%p856 bra $L__BB6_566; + + setp.eq.s32 %p857, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r945, %temp}, %fd646; + } + setp.eq.s32 %p858, %r945, 0; + and.pred %p859, %p857, %p858; + @%p859 bra $L__BB6_565; + bra.uni $L__BB6_563; + +$L__BB6_565: + mov.u32 %r949, 0; + setp.gt.f64 %p864, %fd356, 0d3FF0000000000000; + selp.b32 %r950, 2146435072, 0, %p864; + xor.b32 %r951, %r950, 2146435072; + selp.b32 %r952, %r951, %r950, %p146; + setp.eq.f32 %p865, %f252, 0fBF800000; + selp.b32 %r953, 1072693248, %r952, %p865; + mov.b64 %fd1166, {%r949, %r953}; + bra.uni $L__BB6_566; + +$L__BB6_616: + { + .reg .b32 %temp; + mov.b64 {%r1017, %temp}, %fd399; + } + and.b32 %r1018, %r117, 2147483647; + setp.ne.s32 %p937, %r1018, 2146435072; + setp.ne.s32 %p938, %r1017, 0; + or.pred %p939, %p937, %p938; + mov.f64 %fd1178, %fd1177; + @%p939 bra $L__BB6_619; + + setp.lt.s32 %p940, %r117, 0; + mov.u32 %r1019, 0; + and.pred %p942, %p155, %p940; + selp.b32 %r1020, %r57, %r56, %p942; + mov.b64 %fd1178, {%r1019, %r1020}; + +$L__BB6_619: + setp.eq.f32 %p946, %f252, 0f3F800000; + selp.f64 %fd955, 0d3FF0000000000000, %fd1178, %p946; + mul.f64 %fd956, %fd955, %fd322; + mul.f32 %f1767, %f287, %f277; + cvt.f64.f32 %fd957, %f1767; + sub.f64 %fd958, %fd957, %fd956; + cvt.f64.f32 %fd959, %f3239; + add.f64 %fd1179, %fd958, %fd959; + bra.uni $L__BB6_620; + +$L__BB6_563: + { + .reg .b32 %temp; + mov.b64 {%r946, %temp}, %fd355; + } + and.b32 %r947, %r114, 2147483647; + setp.ne.s32 %p860, %r947, 2146435072; + setp.ne.s32 %p861, %r946, 0; + or.pred %p862, %p860, %p861; + mov.f64 %fd1166, %fd1165; + @%p862 bra $L__BB6_566; + + mov.u32 %r948, 0; + mov.b64 %fd1166, {%r948, %r56}; + +$L__BB6_566: + setp.eq.f32 %p866, %f252, 0f3F800000; + selp.f64 %fd915, 0d3FF0000000000000, %fd1166, %p866; + mul.f64 %fd916, %fd915, %fd322; + mul.f32 %f1764, %f287, %f277; + cvt.f64.f32 %fd917, %f1764; + sub.f64 %fd918, %fd917, %fd916; + cvt.f64.f32 %fd919, %f3239; + add.f64 %fd1179, %fd918, %fd919; + +$L__BB6_620: + cvt.rn.f32.f64 %f3243, %fd1183; + cvt.rn.f32.f64 %f3242, %fd1182; + cvt.rn.f32.f64 %f3241, %fd1181; + cvt.rn.f32.f64 %f3240, %fd1180; + cvt.rn.f32.f64 %f3239, %fd1179; + fma.rn.f32 %f3237, %f287, %f189, %f3237; + fma.rn.f32 %f3236, %f287, %f281, %f3236; + add.f32 %f3235, %f3235, %f287; + fma.rn.f32 %f3234, %f287, %f252, %f3234; + add.s32 %r1375, %r1375, 1; + setp.lt.s32 %p947, %r1375, %r182; + @%p947 bra $L__BB6_56; + + add.s32 %r1374, %r1374, 1; + setp.lt.s32 %p948, %r1374, %r182; + @%p948 bra $L__BB6_55; + +$L__BB6_622: + ld.param.u32 %r1362, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_14]; + div.rn.f32 %f1768, %f3238, %f3243; + mov.f32 %f1769, 0fBF800000; + max.f32 %f1770, %f1768, %f1769; + mov.f32 %f1771, 0f3F800000; + min.f32 %f1772, %f1770, %f1771; + sub.f32 %f3295, %f3295, %f1772; + div.rn.f32 %f1773, %f3237, %f3242; + max.f32 %f1774, %f1773, %f1769; + min.f32 %f1775, %f1774, %f1771; + sub.f32 %f3294, %f3294, %f1775; + neg.f32 %f1776, %f3293; + div.rn.f32 %f1777, %f3236, %f3241; + max.f32 %f1778, %f1777, %f1776; + min.f32 %f1779, %f1778, %f3293; + sub.f32 %f1780, %f3293, %f1779; + neg.f32 %f1781, %f3292; + div.rn.f32 %f1782, %f3235, %f3240; + max.f32 %f1783, %f1782, %f1781; + min.f32 %f1784, %f1783, %f3292; + sub.f32 %f1785, %f3292, %f1784; + div.rn.f32 %f1786, %f3234, %f3239; + mov.f32 %f1787, 0fBDCCCCCD; + max.f32 %f1788, %f1786, %f1787; + mov.f32 %f1789, 0f3DCCCCCD; + min.f32 %f1790, %f1788, %f1789; + sub.f32 %f3291, %f3291, %f1790; + max.f32 %f3293, %f1780, %f1771; + mov.f32 %f1791, 0f3C23D70A; + max.f32 %f3292, %f1785, %f1791; + add.s32 %r1373, %r1373, 1; + setp.lt.s32 %p949, %r1373, %r1362; + @%p949 bra $L__BB6_53; + +$L__BB6_623: + mov.f32 %f1807, 0f00000000; + mov.f32 %f3313, %f1807; + mov.f32 %f3314, %f1807; + mov.f32 %f3315, %f1807; + mov.f32 %f3318, %f1807; + mov.f32 %f3322, %f1807; + mov.f32 %f3316, %f1807; + mov.f32 %f3317, %f1807; + mov.f32 %f3319, %f1807; + mov.f32 %f3323, %f1807; + mov.f32 %f3320, %f1807; + mov.f32 %f3321, %f1807; + mov.f32 %f3324, %f1807; + mov.f32 %f3325, %f1807; + mov.f32 %f3326, %f1807; + mov.f32 %f3327, %f1807; + mov.f32 %f3359, %f1807; + @%p80 bra $L__BB6_884; + + sub.f32 %f320, %f3291, %f552; + div.rn.f32 %f321, %f320, %f553; + cvt.f64.f32 %fd416, %f321; + add.f64 %fd417, %fd416, 0d4000000000000000; + mov.f64 %fd960, 0d4000000000000000; + cvt.f64.f32 %fd418, %f548; + setp.eq.f32 %p951, %f321, 0fBF800000; + add.f64 %fd419, %fd416, 0d4008000000000000; + mov.f64 %fd961, 0d4008000000000000; + cvt.f64.f32 %fd420, %f550; + add.f64 %fd421, %fd416, 0d4010000000000000; + mov.f64 %fd962, 0d4010000000000000; + add.f32 %f322, %f3291, %f552; + div.rn.f32 %f323, %f322, %f553; + cvt.f64.f32 %fd422, %f323; + add.f64 %fd423, %fd422, 0d4000000000000000; + cvt.f64.f32 %fd424, %f549; + setp.eq.f32 %p952, %f323, 0fBF800000; + add.f64 %fd425, %fd422, 0d4008000000000000; + cvt.f64.f32 %fd426, %f551; + add.f64 %fd427, %fd422, 0d4010000000000000; + div.rn.f32 %f324, %f3293, 0fC0206C98; + mul.f32 %f325, %f547, 0f3F000000; + mul.f32 %f326, %f554, 0f3F000000; + add.f32 %f1824, %f320, %f320; + mov.f32 %f1825, 0f40000000; + mul.f32 %f1826, %f553, %f553; + div.rn.f32 %f1827, %f1824, %f1826; + cvt.f64.f32 %fd428, %f1827; + mul.f32 %f1828, %f548, 0f40400000; + cvt.f64.f32 %fd429, %f1828; + cvt.f64.f32 %fd430, %f320; + add.f64 %fd431, %fd430, 0d4000000000000000; + mul.f32 %f1829, %f1826, %f553; + cvt.f64.f32 %fd432, %f1829; + mul.f32 %f1830, %f550, 0f40800000; + cvt.f64.f32 %fd433, %f1830; + setp.eq.f32 %p953, %f320, 0fBF800000; + add.f64 %fd434, %fd430, 0d4008000000000000; + cvt.f64.f32 %fd435, %f553; + add.f64 %fd436, %fd435, 0d4010000000000000; + add.f32 %f1831, %f322, %f322; + div.rn.f32 %f1832, %f1831, %f1826; + cvt.f64.f32 %fd437, %f1832; + mul.f32 %f1833, %f549, 0f40400000; + cvt.f64.f32 %fd438, %f1833; + cvt.f64.f32 %fd439, %f322; + add.f64 %fd440, %fd439, 0d4000000000000000; + mul.f32 %f1834, %f551, 0f40800000; + cvt.f64.f32 %fd441, %f1834; + setp.eq.f32 %p954, %f322, 0fBF800000; + add.f64 %fd442, %fd439, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r121}, %fd416; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r1027}, %fd960; + } + and.b32 %r1028, %r1027, 2146435072; + setp.eq.s32 %p955, %r1028, 1062207488; + abs.f64 %fd963, %fd416; + { // callseq 143, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd963; + .param .b64 param1; + st.param.f64 [param1+0], %fd960; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd443, [retval0+0]; + } // callseq 143 + mov.u32 %r1026, 0; + setp.lt.s32 %p956, %r121, 0; + and.pred %p49, %p956, %p955; + selp.b32 %r1029, %r121, 0, %p955; + setp.lt.s32 %p957, %r1027, 0; + or.b32 %r1030, %r1029, 2146435072; + selp.b32 %r122, %r1030, %r1029, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1031}, %fd417; + } + and.b32 %r123, %r1031, 2146435072; + setp.ne.s32 %p958, %r123, 2146435072; + setp.gtu.f64 %p959, %fd963, 0d7FF0000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1032}, %fd961; + } + and.b32 %r1033, %r1032, 2146435072; + setp.eq.s32 %p960, %r1033, 1073741824; + { // callseq 144, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd963; + .param .b64 param1; + st.param.f64 [param1+0], %fd961; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd444, [retval0+0]; + } // callseq 144 + and.pred %p50, %p956, %p960; + and.b32 %r124, %r1027, 2147483647; + setp.gt.f64 %p961, %fd963, 0d3FF0000000000000; + selp.b32 %r1034, 2146435072, 0, %p961; + xor.b32 %r1035, %r1034, 2146435072; + selp.b32 %r1036, %r1035, %r1034, %p957; + selp.b32 %r125, 1072693248, %r1036, %p951; + and.b32 %r126, %r121, 2147483647; + selp.b32 %r1037, %r121, 0, %p960; + setp.lt.s32 %p962, %r1032, 0; + or.b32 %r1038, %r1037, 2146435072; + selp.b32 %r127, %r1038, %r1037, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1039}, %fd419; + } + and.b32 %r128, %r1039, 2146435072; + setp.ne.s32 %p963, %r128, 2146435072; + setp.gt.s32 %p964, %r1027, -1; + selp.b32 %r1040, 2146435072, 0, %p964; + setp.ne.s32 %p965, %r124, 1071644672; + and.pred %p966, %p965, %p49; + or.b32 %r1041, %r1040, -2147483648; + selp.b32 %r129, %r1041, %r1040, %p966; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1042}, %fd962; + } + and.b32 %r1043, %r1042, 2146435072; + setp.eq.s32 %p967, %r1043, 1072693248; + { // callseq 145, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd963; + .param .b64 param1; + st.param.f64 [param1+0], %fd962; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd445, [retval0+0]; + } // callseq 145 + and.pred %p51, %p956, %p967; + and.b32 %r130, %r1032, 2147483647; + selp.b32 %r1044, %r1035, %r1034, %p962; + selp.b32 %r131, 1072693248, %r1044, %p951; + selp.b32 %r1045, %r121, 0, %p967; + setp.lt.s32 %p968, %r1042, 0; + or.b32 %r1046, %r1045, 2146435072; + selp.b32 %r132, %r1046, %r1045, %p968; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1047}, %fd421; + } + and.b32 %r133, %r1047, 2146435072; + setp.ne.s32 %p969, %r133, 2146435072; + setp.gt.s32 %p970, %r1032, -1; + selp.b32 %r1048, 2146435072, 0, %p970; + setp.ne.s32 %p971, %r130, 1071644672; + and.pred %p972, %p971, %p50; + or.b32 %r1049, %r1048, -2147483648; + selp.b32 %r134, %r1049, %r1048, %p972; + { + .reg .b32 %temp; + mov.b64 {%temp, %r135}, %fd422; + } + abs.f64 %fd964, %fd422; + { // callseq 146, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd964; + .param .b64 param1; + st.param.f64 [param1+0], %fd960; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd446, [retval0+0]; + } // callseq 146 + setp.lt.s32 %p973, %r135, 0; + and.pred %p52, %p973, %p955; + and.b32 %r136, %r1042, 2147483647; + selp.b32 %r1050, %r1035, %r1034, %p968; + selp.b32 %r137, 1072693248, %r1050, %p951; + selp.b32 %r1051, %r135, 0, %p955; + or.b32 %r1052, %r1051, 2146435072; + selp.b32 %r138, %r1052, %r1051, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1053}, %fd423; + } + and.b32 %r139, %r1053, 2146435072; + setp.ne.s32 %p974, %r139, 2146435072; + setp.gt.s32 %p975, %r1042, -1; + selp.b32 %r1054, 2146435072, 0, %p975; + setp.ne.s32 %p976, %r136, 1071644672; + and.pred %p977, %p976, %p51; + or.b32 %r1055, %r1054, -2147483648; + selp.b32 %r140, %r1055, %r1054, %p977; + setp.gtu.f64 %p978, %fd964, 0d7FF0000000000000; + { // callseq 147, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd964; + .param .b64 param1; + st.param.f64 [param1+0], %fd961; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd447, [retval0+0]; + } // callseq 147 + and.pred %p53, %p973, %p960; + setp.gt.f64 %p979, %fd964, 0d3FF0000000000000; + selp.b32 %r1056, 2146435072, 0, %p979; + xor.b32 %r1057, %r1056, 2146435072; + selp.b32 %r1058, %r1057, %r1056, %p957; + selp.b32 %r141, 1072693248, %r1058, %p952; + and.b32 %r142, %r135, 2147483647; + selp.b32 %r1059, %r135, 0, %p960; + or.b32 %r1060, %r1059, 2146435072; + selp.b32 %r143, %r1060, %r1059, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1061}, %fd425; + } + and.b32 %r144, %r1061, 2146435072; + setp.ne.s32 %p980, %r144, 2146435072; + and.pred %p981, %p965, %p52; + selp.b32 %r145, %r1041, %r1040, %p981; + { // callseq 148, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd964; + .param .b64 param1; + st.param.f64 [param1+0], %fd962; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd448, [retval0+0]; + } // callseq 148 + and.pred %p54, %p973, %p967; + selp.b32 %r1062, %r1057, %r1056, %p962; + selp.b32 %r146, 1072693248, %r1062, %p952; + selp.b32 %r1063, %r135, 0, %p967; + or.b32 %r1064, %r1063, 2146435072; + selp.b32 %r147, %r1064, %r1063, %p968; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1065}, %fd427; + } + and.b32 %r148, %r1065, 2146435072; + setp.ne.s32 %p982, %r148, 2146435072; + and.pred %p983, %p971, %p53; + selp.b32 %r149, %r1049, %r1048, %p983; + selp.b32 %r1066, %r1057, %r1056, %p968; + selp.b32 %r150, 1072693248, %r1066, %p952; + and.pred %p984, %p976, %p54; + selp.b32 %r151, %r1055, %r1054, %p984; + mov.f32 %f1835, 0f3F800000; + cvt.rzi.f32.f32 %f1836, %f1835; + add.f32 %f1837, %f1836, %f1836; + sub.f32 %f1838, %f1825, %f1837; + abs.f32 %f327, %f1838; + { + .reg .b32 %temp; + mov.b64 {%temp, %r152}, %fd430; + } + abs.f64 %fd965, %fd430; + { // callseq 149, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd965; + .param .b64 param1; + st.param.f64 [param1+0], %fd960; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd449, [retval0+0]; + } // callseq 149 + setp.lt.s32 %p985, %r152, 0; + and.pred %p55, %p985, %p955; + selp.b32 %r1067, %r152, 0, %p955; + or.b32 %r1068, %r1067, 2146435072; + selp.b32 %r153, %r1068, %r1067, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1069}, %fd431; + } + and.b32 %r154, %r1069, 2146435072; + setp.ne.s32 %p986, %r154, 2146435072; + setp.gtu.f64 %p987, %fd965, 0d7FF0000000000000; + { // callseq 150, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd965; + .param .b64 param1; + st.param.f64 [param1+0], %fd961; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd450, [retval0+0]; + } // callseq 150 + and.pred %p56, %p985, %p960; + setp.gt.f64 %p988, %fd965, 0d3FF0000000000000; + selp.b32 %r1070, 2146435072, 0, %p988; + xor.b32 %r1071, %r1070, 2146435072; + selp.b32 %r1072, %r1071, %r1070, %p957; + selp.b32 %r155, 1072693248, %r1072, %p953; + and.b32 %r156, %r152, 2147483647; + selp.b32 %r1073, %r152, 0, %p960; + or.b32 %r1074, %r1073, 2146435072; + selp.b32 %r157, %r1074, %r1073, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1075}, %fd434; + } + and.b32 %r158, %r1075, 2146435072; + setp.ne.s32 %p989, %r158, 2146435072; + and.pred %p990, %p965, %p55; + selp.b32 %r159, %r1041, %r1040, %p990; + { + .reg .b32 %temp; + mov.b64 {%temp, %r160}, %fd435; + } + abs.f64 %fd966, %fd435; + { // callseq 151, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd966; + .param .b64 param1; + st.param.f64 [param1+0], %fd962; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd451, [retval0+0]; + } // callseq 151 + setp.lt.s32 %p991, %r160, 0; + and.pred %p57, %p991, %p967; + selp.b32 %r1076, %r1071, %r1070, %p962; + selp.b32 %r161, 1072693248, %r1076, %p953; + selp.b32 %r1077, %r160, 0, %p967; + or.b32 %r1078, %r1077, 2146435072; + selp.b32 %r162, %r1078, %r1077, %p968; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1079}, %fd436; + } + and.b32 %r163, %r1079, 2146435072; + setp.ne.s32 %p992, %r163, 2146435072; + and.pred %p993, %p971, %p56; + selp.b32 %r164, %r1049, %r1048, %p993; + setp.gtu.f64 %p994, %fd966, 0d7FF0000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r165}, %fd439; + } + abs.f64 %fd967, %fd439; + { // callseq 152, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd967; + .param .b64 param1; + st.param.f64 [param1+0], %fd960; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd452, [retval0+0]; + } // callseq 152 + setp.lt.s32 %p995, %r165, 0; + and.pred %p58, %p995, %p955; + setp.gt.f64 %p996, %fd966, 0d3FF0000000000000; + selp.b32 %r1080, 2146435072, 0, %p996; + xor.b32 %r1081, %r1080, 2146435072; + selp.b32 %r1082, %r1081, %r1080, %p968; + setp.eq.f32 %p997, %f553, 0fBF800000; + selp.b32 %r166, 1072693248, %r1082, %p997; + and.b32 %r167, %r160, 2147483647; + selp.b32 %r1083, %r165, 0, %p955; + or.b32 %r1084, %r1083, 2146435072; + selp.b32 %r168, %r1084, %r1083, %p957; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1085}, %fd440; + } + and.b32 %r169, %r1085, 2146435072; + setp.ne.s32 %p998, %r169, 2146435072; + and.pred %p999, %p976, %p57; + selp.b32 %r170, %r1055, %r1054, %p999; + setp.gtu.f64 %p1000, %fd967, 0d7FF0000000000000; + { // callseq 153, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd967; + .param .b64 param1; + st.param.f64 [param1+0], %fd961; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd453, [retval0+0]; + } // callseq 153 + and.pred %p59, %p995, %p960; + setp.gt.f64 %p1001, %fd967, 0d3FF0000000000000; + selp.b32 %r1086, 2146435072, 0, %p1001; + xor.b32 %r1087, %r1086, 2146435072; + selp.b32 %r1088, %r1087, %r1086, %p957; + selp.b32 %r171, 1072693248, %r1088, %p954; + and.b32 %r172, %r165, 2147483647; + selp.b32 %r1089, %r165, 0, %p960; + or.b32 %r1090, %r1089, 2146435072; + selp.b32 %r173, %r1090, %r1089, %p962; + { + .reg .b32 %temp; + mov.b64 {%temp, %r1091}, %fd442; + } + and.b32 %r174, %r1091, 2146435072; + setp.ne.s32 %p1002, %r174, 2146435072; + and.pred %p1003, %p965, %p58; + selp.b32 %r175, %r1041, %r1040, %p1003; + selp.b32 %r1092, %r1087, %r1086, %p962; + selp.b32 %r176, 1072693248, %r1092, %p954; + and.pred %p1004, %p971, %p59; + selp.b32 %r177, %r1049, %r1048, %p1004; + or.pred %p60, %p958, %p959; + or.pred %p61, %p963, %p959; + or.pred %p62, %p969, %p959; + or.pred %p63, %p974, %p978; + or.pred %p64, %p980, %p978; + or.pred %p65, %p982, %p978; + or.pred %p66, %p986, %p987; + or.pred %p67, %p989, %p987; + or.pred %p68, %p992, %p994; + or.pred %p69, %p998, %p1000; + or.pred %p70, %p1002, %p1000; + mov.u32 %r1376, %r1026; + +$L__BB6_625: + cvt.rn.f32.s32 %f1839, %r1376; + sub.f32 %f344, %f1839, %f3295; + add.f32 %f345, %f344, 0f3F000000; + add.f32 %f346, %f344, 0fBF000000; + add.f32 %f1840, %f1839, 0f3F000000; + sub.f32 %f347, %f1840, %f3295; + add.f32 %f1841, %f1839, 0f3F800000; + sub.f32 %f348, %f1841, %f3295; + add.f32 %f349, %f344, 0f3F800000; + mov.u32 %r1377, %r1026; + +$L__BB6_626: + not.pred %p1005, %p49; + mov.f64 %fd1185, %fd443; + @%p1005 bra $L__BB6_628; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1094}, %fd443; + } + xor.b32 %r1095, %r1094, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1096, %temp}, %fd443; + } + mov.b64 %fd1185, {%r1096, %r1095}; + +$L__BB6_628: + setp.eq.f32 %p1006, %f321, 0f00000000; + @%p1006 bra $L__BB6_632; + bra.uni $L__BB6_629; + +$L__BB6_632: + mov.u32 %r1097, 0; + mov.b64 %fd1185, {%r1097, %r122}; + bra.uni $L__BB6_633; + +$L__BB6_629: + setp.gt.s32 %p1007, %r121, -1; + @%p1007 bra $L__BB6_633; + + cvt.rzi.f64.f64 %fd969, %fd960; + setp.eq.f64 %p1008, %fd969, 0d4000000000000000; + @%p1008 bra $L__BB6_633; + + mov.f64 %fd1185, 0dFFF8000000000000; + +$L__BB6_633: + selp.f64 %fd1186, %fd1185, %fd417, %p958; + @%p60 bra $L__BB6_638; + + setp.eq.s32 %p1010, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1098, %temp}, %fd960; + } + setp.eq.s32 %p1011, %r1098, 0; + and.pred %p1012, %p1010, %p1011; + @%p1012 bra $L__BB6_637; + bra.uni $L__BB6_635; + +$L__BB6_637: + mov.u32 %r1101, 0; + mov.b64 %fd1186, {%r1101, %r125}; + bra.uni $L__BB6_638; + +$L__BB6_635: + setp.ne.s32 %p1013, %r126, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1099, %temp}, %fd416; + } + setp.ne.s32 %p1014, %r1099, 0; + or.pred %p1015, %p1013, %p1014; + mov.f64 %fd1186, %fd1185; + @%p1015 bra $L__BB6_638; + + mov.u32 %r1100, 0; + mov.b64 %fd1186, {%r1100, %r129}; + +$L__BB6_638: + not.pred %p1016, %p50; + mov.f64 %fd1188, %fd444; + @%p1016 bra $L__BB6_640; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1102}, %fd444; + } + xor.b32 %r1103, %r1102, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1104, %temp}, %fd444; + } + mov.b64 %fd1188, {%r1104, %r1103}; + +$L__BB6_640: + @%p1006 bra $L__BB6_644; + bra.uni $L__BB6_641; + +$L__BB6_644: + mov.u32 %r1105, 0; + mov.b64 %fd1188, {%r1105, %r127}; + bra.uni $L__BB6_645; + +$L__BB6_641: + setp.gt.s32 %p1018, %r121, -1; + @%p1018 bra $L__BB6_645; + + cvt.rzi.f64.f64 %fd973, %fd961; + setp.eq.f64 %p1019, %fd973, 0d4008000000000000; + @%p1019 bra $L__BB6_645; + + mov.f64 %fd1188, 0dFFF8000000000000; + +$L__BB6_645: + selp.f64 %fd1189, %fd1188, %fd419, %p963; + @%p61 bra $L__BB6_650; + + setp.eq.s32 %p1021, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1106, %temp}, %fd961; + } + setp.eq.s32 %p1022, %r1106, 0; + and.pred %p1023, %p1021, %p1022; + @%p1023 bra $L__BB6_649; + bra.uni $L__BB6_647; + +$L__BB6_649: + mov.u32 %r1109, 0; + mov.b64 %fd1189, {%r1109, %r131}; + bra.uni $L__BB6_650; + +$L__BB6_647: + setp.ne.s32 %p1024, %r126, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1107, %temp}, %fd416; + } + setp.ne.s32 %p1025, %r1107, 0; + or.pred %p1026, %p1024, %p1025; + mov.f64 %fd1189, %fd1188; + @%p1026 bra $L__BB6_650; + + mov.u32 %r1108, 0; + mov.b64 %fd1189, {%r1108, %r134}; + +$L__BB6_650: + setp.eq.f32 %p1027, %f321, 0f3F800000; + selp.f64 %fd976, 0d3FF0000000000000, %fd1189, %p1027; + add.f64 %fd977, %fd1186, 0d3FF0000000000000; + selp.f64 %fd978, 0d4000000000000000, %fd977, %p1027; + fma.rn.f64 %fd470, %fd976, %fd418, %fd978; + not.pred %p1028, %p51; + mov.f64 %fd1191, %fd445; + @%p1028 bra $L__BB6_652; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1110}, %fd445; + } + xor.b32 %r1111, %r1110, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1112, %temp}, %fd445; + } + mov.b64 %fd1191, {%r1112, %r1111}; + +$L__BB6_652: + @%p1006 bra $L__BB6_656; + bra.uni $L__BB6_653; + +$L__BB6_656: + mov.u32 %r1113, 0; + mov.b64 %fd1191, {%r1113, %r132}; + bra.uni $L__BB6_657; + +$L__BB6_653: + setp.gt.s32 %p1030, %r121, -1; + @%p1030 bra $L__BB6_657; + + cvt.rzi.f64.f64 %fd980, %fd962; + setp.eq.f64 %p1031, %fd980, 0d4010000000000000; + @%p1031 bra $L__BB6_657; + + mov.f64 %fd1191, 0dFFF8000000000000; + +$L__BB6_657: + selp.f64 %fd1192, %fd1191, %fd421, %p969; + @%p62 bra $L__BB6_662; + + setp.eq.s32 %p1033, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1114, %temp}, %fd962; + } + setp.eq.s32 %p1034, %r1114, 0; + and.pred %p1035, %p1033, %p1034; + @%p1035 bra $L__BB6_661; + bra.uni $L__BB6_659; + +$L__BB6_661: + mov.u32 %r1117, 0; + mov.b64 %fd1192, {%r1117, %r137}; + bra.uni $L__BB6_662; + +$L__BB6_659: + setp.ne.s32 %p1036, %r126, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1115, %temp}, %fd416; + } + setp.ne.s32 %p1037, %r1115, 0; + or.pred %p1038, %p1036, %p1037; + mov.f64 %fd1192, %fd1191; + @%p1038 bra $L__BB6_662; + + mov.u32 %r1116, 0; + mov.b64 %fd1192, {%r1116, %r140}; + +$L__BB6_662: + selp.f64 %fd983, 0d3FF0000000000000, %fd1192, %p1027; + fma.rn.f64 %fd479, %fd983, %fd420, %fd470; + not.pred %p1040, %p52; + mov.f64 %fd1194, %fd446; + @%p1040 bra $L__BB6_664; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1118}, %fd446; + } + xor.b32 %r1119, %r1118, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1120, %temp}, %fd446; + } + mov.b64 %fd1194, {%r1120, %r1119}; + +$L__BB6_664: + setp.eq.f32 %p1041, %f323, 0f00000000; + @%p1041 bra $L__BB6_668; + bra.uni $L__BB6_665; + +$L__BB6_668: + mov.u32 %r1121, 0; + mov.b64 %fd1194, {%r1121, %r138}; + bra.uni $L__BB6_669; + +$L__BB6_665: + setp.gt.s32 %p1042, %r135, -1; + @%p1042 bra $L__BB6_669; + + cvt.rzi.f64.f64 %fd985, %fd960; + setp.eq.f64 %p1043, %fd985, 0d4000000000000000; + @%p1043 bra $L__BB6_669; + + mov.f64 %fd1194, 0dFFF8000000000000; + +$L__BB6_669: + selp.f64 %fd1195, %fd1194, %fd423, %p974; + @%p63 bra $L__BB6_674; + + setp.eq.s32 %p1045, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1122, %temp}, %fd960; + } + setp.eq.s32 %p1046, %r1122, 0; + and.pred %p1047, %p1045, %p1046; + @%p1047 bra $L__BB6_673; + bra.uni $L__BB6_671; + +$L__BB6_673: + mov.u32 %r1125, 0; + mov.b64 %fd1195, {%r1125, %r141}; + bra.uni $L__BB6_674; + +$L__BB6_671: + setp.ne.s32 %p1048, %r142, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1123, %temp}, %fd422; + } + setp.ne.s32 %p1049, %r1123, 0; + or.pred %p1050, %p1048, %p1049; + mov.f64 %fd1195, %fd1194; + @%p1050 bra $L__BB6_674; + + mov.u32 %r1124, 0; + mov.b64 %fd1195, {%r1124, %r145}; + +$L__BB6_674: + not.pred %p1051, %p53; + mov.f64 %fd1197, %fd447; + @%p1051 bra $L__BB6_676; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1126}, %fd447; + } + xor.b32 %r1127, %r1126, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1128, %temp}, %fd447; + } + mov.b64 %fd1197, {%r1128, %r1127}; + +$L__BB6_676: + @%p1041 bra $L__BB6_680; + bra.uni $L__BB6_677; + +$L__BB6_680: + mov.u32 %r1129, 0; + mov.b64 %fd1197, {%r1129, %r143}; + bra.uni $L__BB6_681; + +$L__BB6_677: + setp.gt.s32 %p1053, %r135, -1; + @%p1053 bra $L__BB6_681; + + cvt.rzi.f64.f64 %fd989, %fd961; + setp.eq.f64 %p1054, %fd989, 0d4008000000000000; + @%p1054 bra $L__BB6_681; + + mov.f64 %fd1197, 0dFFF8000000000000; + +$L__BB6_681: + selp.f64 %fd1198, %fd1197, %fd425, %p980; + @%p64 bra $L__BB6_686; + + setp.eq.s32 %p1056, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1130, %temp}, %fd961; + } + setp.eq.s32 %p1057, %r1130, 0; + and.pred %p1058, %p1056, %p1057; + @%p1058 bra $L__BB6_685; + bra.uni $L__BB6_683; + +$L__BB6_685: + mov.u32 %r1133, 0; + mov.b64 %fd1198, {%r1133, %r146}; + bra.uni $L__BB6_686; + +$L__BB6_683: + setp.ne.s32 %p1059, %r142, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1131, %temp}, %fd422; + } + setp.ne.s32 %p1060, %r1131, 0; + or.pred %p1061, %p1059, %p1060; + mov.f64 %fd1198, %fd1197; + @%p1061 bra $L__BB6_686; + + mov.u32 %r1132, 0; + mov.b64 %fd1198, {%r1132, %r149}; + +$L__BB6_686: + setp.eq.f32 %p1062, %f323, 0f3F800000; + selp.f64 %fd992, 0d3FF0000000000000, %fd1198, %p1062; + add.f64 %fd993, %fd1195, 0d3FF0000000000000; + selp.f64 %fd994, 0d4000000000000000, %fd993, %p1062; + fma.rn.f64 %fd496, %fd992, %fd424, %fd994; + not.pred %p1063, %p54; + mov.f64 %fd1200, %fd448; + @%p1063 bra $L__BB6_688; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1134}, %fd448; + } + xor.b32 %r1135, %r1134, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1136, %temp}, %fd448; + } + mov.b64 %fd1200, {%r1136, %r1135}; + +$L__BB6_688: + @%p1041 bra $L__BB6_692; + bra.uni $L__BB6_689; + +$L__BB6_692: + mov.u32 %r1137, 0; + mov.b64 %fd1200, {%r1137, %r147}; + bra.uni $L__BB6_693; + +$L__BB6_689: + setp.gt.s32 %p1065, %r135, -1; + @%p1065 bra $L__BB6_693; + + cvt.rzi.f64.f64 %fd996, %fd962; + setp.eq.f64 %p1066, %fd996, 0d4010000000000000; + @%p1066 bra $L__BB6_693; + + mov.f64 %fd1200, 0dFFF8000000000000; + +$L__BB6_693: + selp.f64 %fd1201, %fd1200, %fd427, %p982; + @%p65 bra $L__BB6_698; + + setp.eq.s32 %p1068, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1138, %temp}, %fd962; + } + setp.eq.s32 %p1069, %r1138, 0; + and.pred %p1070, %p1068, %p1069; + @%p1070 bra $L__BB6_697; + bra.uni $L__BB6_695; + +$L__BB6_697: + mov.u32 %r1141, 0; + mov.b64 %fd1201, {%r1141, %r150}; + bra.uni $L__BB6_698; + +$L__BB6_695: + setp.ne.s32 %p1071, %r142, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1139, %temp}, %fd422; + } + setp.ne.s32 %p1072, %r1139, 0; + or.pred %p1073, %p1071, %p1072; + mov.f64 %fd1201, %fd1200; + @%p1073 bra $L__BB6_698; + + mov.u32 %r1140, 0; + mov.b64 %fd1201, {%r1140, %r151}; + +$L__BB6_698: + selp.f64 %fd999, 0d3FF0000000000000, %fd1201, %p1062; + fma.rn.f64 %fd1000, %fd999, %fd426, %fd496; + cvt.rn.f32.f64 %f1842, %fd1000; + cvt.rn.f32.f64 %f1843, %fd479; + sqrt.rn.f32 %f366, %f1843; + mul.f32 %f367, %f366, %f547; + sqrt.rn.f32 %f368, %f1842; + mul.f32 %f369, %f368, %f554; + mov.f32 %f1844, 0f3F000000; + div.rn.f32 %f1845, %f1844, %f367; + div.rn.f32 %f1846, %f1845, %f367; + sqrt.rn.f32 %f370, %f1846; + mul.f32 %f371, %f370, %f345; + abs.f32 %f1847, %f371; + setp.ltu.f32 %p1075, %f1847, 0f3F8060FE; + setp.ge.f32 %p1076, %f1847, 0f3F8060FE; + mul.f32 %f1848, %f371, %f371; + selp.f32 %f1849, %f1847, %f1848, %p1076; + selp.f32 %f1850, 0f3789CA3C, 0f38B1E96A, %p1076; + selp.f32 %f1851, 0fB9F560B9, 0fBA574D20, %p1076; + fma.rn.f32 %f1852, %f1850, %f1849, %f1851; + selp.f32 %f1853, 0f3BAC840B, 0f3BAAD5EA, %p1076; + fma.rn.f32 %f1854, %f1852, %f1849, %f1853; + selp.f32 %f1855, 0fBD0C8162, 0fBCDC1BE7, %p1076; + fma.rn.f32 %f1856, %f1854, %f1849, %f1855; + selp.f32 %f1857, 0f3E1CF906, 0f3DE718AF, %p1076; + fma.rn.f32 %f1858, %f1856, %f1849, %f1857; + selp.f32 %f1859, 0f3F6A937E, 0fBEC093AC, %p1076; + fma.rn.f32 %f1860, %f1858, %f1849, %f1859; + selp.f32 %f1861, 0f3F20D842, 0f3E0375D3, %p1076; + fma.rn.f32 %f1862, %f1860, %f1849, %f1861; + neg.f32 %f1863, %f1847; + selp.f32 %f1864, %f1863, %f371, %p1076; + fma.rn.f32 %f3329, %f1862, %f1864, %f1864; + @%p1075 bra $L__BB6_700; + + ex2.approx.ftz.f32 %f1865, %f3329; + sub.f32 %f1867, %f1835, %f1865; + mov.b32 %r1142, %f1867; + mov.b32 %r1143, %f371; + and.b32 %r1144, %r1143, -2147483648; + or.b32 %r1145, %r1144, %r1142; + mov.b32 %f3329, %r1145; + +$L__BB6_700: + mul.f32 %f375, %f370, %f346; + abs.f32 %f1868, %f375; + setp.ltu.f32 %p1077, %f1868, 0f3F8060FE; + setp.ge.f32 %p1078, %f1868, 0f3F8060FE; + mul.f32 %f1869, %f375, %f375; + selp.f32 %f1870, %f1868, %f1869, %p1078; + selp.f32 %f1871, 0f3789CA3C, 0f38B1E96A, %p1078; + selp.f32 %f1872, 0fB9F560B9, 0fBA574D20, %p1078; + fma.rn.f32 %f1873, %f1871, %f1870, %f1872; + selp.f32 %f1874, 0f3BAC840B, 0f3BAAD5EA, %p1078; + fma.rn.f32 %f1875, %f1873, %f1870, %f1874; + selp.f32 %f1876, 0fBD0C8162, 0fBCDC1BE7, %p1078; + fma.rn.f32 %f1877, %f1875, %f1870, %f1876; + selp.f32 %f1878, 0f3E1CF906, 0f3DE718AF, %p1078; + fma.rn.f32 %f1879, %f1877, %f1870, %f1878; + selp.f32 %f1880, 0f3F6A937E, 0fBEC093AC, %p1078; + fma.rn.f32 %f1881, %f1879, %f1870, %f1880; + selp.f32 %f1882, 0f3F20D842, 0f3E0375D3, %p1078; + fma.rn.f32 %f1883, %f1881, %f1870, %f1882; + neg.f32 %f1884, %f1868; + selp.f32 %f1885, %f1884, %f375, %p1078; + fma.rn.f32 %f3330, %f1883, %f1885, %f1885; + @%p1077 bra $L__BB6_702; + + ex2.approx.ftz.f32 %f1886, %f3330; + sub.f32 %f1888, %f1835, %f1886; + mov.b32 %r1146, %f1888; + mov.b32 %r1147, %f375; + and.b32 %r1148, %r1147, -2147483648; + or.b32 %r1149, %r1148, %r1146; + mov.b32 %f3330, %r1149; + +$L__BB6_702: + sub.f32 %f1889, %f3329, %f3330; + mul.f32 %f379, %f1889, 0f3F000000; + div.rn.f32 %f1891, %f1844, %f369; + div.rn.f32 %f1892, %f1891, %f369; + cvt.rn.f32.s32 %f380, %r1377; + sub.f32 %f381, %f380, %f3294; + add.f32 %f1893, %f381, 0f3F000000; + sqrt.rn.f32 %f382, %f1892; + mul.f32 %f383, %f382, %f1893; + abs.f32 %f1894, %f383; + setp.ltu.f32 %p1079, %f1894, 0f3F8060FE; + setp.ge.f32 %p1080, %f1894, 0f3F8060FE; + mul.f32 %f1895, %f383, %f383; + selp.f32 %f1896, %f1894, %f1895, %p1080; + selp.f32 %f1897, 0f3789CA3C, 0f38B1E96A, %p1080; + selp.f32 %f1898, 0fB9F560B9, 0fBA574D20, %p1080; + fma.rn.f32 %f1899, %f1897, %f1896, %f1898; + selp.f32 %f1900, 0f3BAC840B, 0f3BAAD5EA, %p1080; + fma.rn.f32 %f1901, %f1899, %f1896, %f1900; + selp.f32 %f1902, 0fBD0C8162, 0fBCDC1BE7, %p1080; + fma.rn.f32 %f1903, %f1901, %f1896, %f1902; + selp.f32 %f1904, 0f3E1CF906, 0f3DE718AF, %p1080; + fma.rn.f32 %f1905, %f1903, %f1896, %f1904; + selp.f32 %f1906, 0f3F6A937E, 0fBEC093AC, %p1080; + fma.rn.f32 %f1907, %f1905, %f1896, %f1906; + selp.f32 %f1908, 0f3F20D842, 0f3E0375D3, %p1080; + fma.rn.f32 %f1909, %f1907, %f1896, %f1908; + neg.f32 %f1910, %f1894; + selp.f32 %f1911, %f1910, %f383, %p1080; + fma.rn.f32 %f3331, %f1909, %f1911, %f1911; + @%p1079 bra $L__BB6_704; + + ex2.approx.ftz.f32 %f1912, %f3331; + sub.f32 %f1914, %f1835, %f1912; + mov.b32 %r1150, %f1914; + mov.b32 %r1151, %f383; + and.b32 %r1152, %r1151, -2147483648; + or.b32 %r1153, %r1152, %r1150; + mov.b32 %f3331, %r1153; + +$L__BB6_704: + add.f32 %f387, %f381, 0fBF000000; + mul.f32 %f388, %f382, %f387; + abs.f32 %f1915, %f388; + setp.ltu.f32 %p1081, %f1915, 0f3F8060FE; + setp.ge.f32 %p1082, %f1915, 0f3F8060FE; + mul.f32 %f1916, %f388, %f388; + selp.f32 %f1917, %f1915, %f1916, %p1082; + selp.f32 %f1918, 0f3789CA3C, 0f38B1E96A, %p1082; + selp.f32 %f1919, 0fB9F560B9, 0fBA574D20, %p1082; + fma.rn.f32 %f1920, %f1918, %f1917, %f1919; + selp.f32 %f1921, 0f3BAC840B, 0f3BAAD5EA, %p1082; + fma.rn.f32 %f1922, %f1920, %f1917, %f1921; + selp.f32 %f1923, 0fBD0C8162, 0fBCDC1BE7, %p1082; + fma.rn.f32 %f1924, %f1922, %f1917, %f1923; + selp.f32 %f1925, 0f3E1CF906, 0f3DE718AF, %p1082; + fma.rn.f32 %f1926, %f1924, %f1917, %f1925; + selp.f32 %f1927, 0f3F6A937E, 0fBEC093AC, %p1082; + fma.rn.f32 %f1928, %f1926, %f1917, %f1927; + selp.f32 %f1929, 0f3F20D842, 0f3E0375D3, %p1082; + fma.rn.f32 %f1930, %f1928, %f1917, %f1929; + neg.f32 %f1931, %f1915; + selp.f32 %f1932, %f1931, %f388, %p1082; + fma.rn.f32 %f3332, %f1930, %f1932, %f1932; + @%p1081 bra $L__BB6_706; + + ex2.approx.ftz.f32 %f1933, %f3332; + sub.f32 %f1935, %f1835, %f1933; + mov.b32 %r1154, %f1935; + mov.b32 %r1155, %f388; + and.b32 %r1156, %r1155, -2147483648; + or.b32 %r1157, %r1156, %r1154; + mov.b32 %f3332, %r1157; + +$L__BB6_706: + sub.f32 %f1937, %f3331, %f3332; + mul.f32 %f392, %f1937, 0f3F000000; + div.rn.f32 %f393, %f347, %f367; + abs.f32 %f394, %f393; + setp.lt.f32 %p1083, %f394, 0f00800000; + mul.f32 %f1938, %f394, 0f4B800000; + selp.f32 %f1939, %f1938, %f394, %p1083; + selp.f32 %f1940, 0fC3170000, 0fC2FE0000, %p1083; + mov.b32 %r1158, %f1939; + and.b32 %r1159, %r1158, 8388607; + or.b32 %r1160, %r1159, 1065353216; + mov.b32 %f1941, %r1160; + shr.u32 %r1161, %r1158, 23; + cvt.rn.f32.u32 %f1942, %r1161; + add.f32 %f1943, %f1940, %f1942; + setp.gt.f32 %p1084, %f1941, 0f3FB504F3; + mul.f32 %f1944, %f1941, 0f3F000000; + add.f32 %f1945, %f1943, 0f3F800000; + selp.f32 %f1946, %f1945, %f1943, %p1084; + selp.f32 %f1947, %f1944, %f1941, %p1084; + add.f32 %f1948, %f1947, 0fBF800000; + add.f32 %f1949, %f1947, 0f3F800000; + rcp.approx.ftz.f32 %f1950, %f1949; + add.f32 %f1951, %f1948, %f1948; + mul.f32 %f1953, %f1951, %f1950; + mul.f32 %f1954, %f1953, %f1953; + mov.f32 %f1955, 0f3C4CAF63; + mov.f32 %f1956, 0f3B18F0FE; + fma.rn.f32 %f1957, %f1956, %f1954, %f1955; + mov.f32 %f1958, 0f3DAAAABD; + fma.rn.f32 %f1959, %f1957, %f1954, %f1958; + mul.rn.f32 %f1960, %f1959, %f1954; + mul.rn.f32 %f1961, %f1960, %f1953; + sub.f32 %f1962, %f1948, %f1953; + add.f32 %f1963, %f1962, %f1962; + neg.f32 %f1964, %f1953; + fma.rn.f32 %f1965, %f1964, %f1948, %f1963; + mul.rn.f32 %f1966, %f1950, %f1965; + add.f32 %f1967, %f1961, %f1953; + sub.f32 %f1968, %f1953, %f1967; + add.f32 %f1969, %f1961, %f1968; + add.f32 %f1970, %f1966, %f1969; + add.f32 %f1971, %f1967, %f1970; + sub.f32 %f1972, %f1967, %f1971; + add.f32 %f1973, %f1970, %f1972; + mov.f32 %f1974, 0f3F317200; + mul.rn.f32 %f1975, %f1946, %f1974; + mov.f32 %f1976, 0f35BFBE8E; + mul.rn.f32 %f1977, %f1946, %f1976; + add.f32 %f1978, %f1975, %f1971; + sub.f32 %f1979, %f1975, %f1978; + add.f32 %f1980, %f1971, %f1979; + add.f32 %f1981, %f1973, %f1980; + add.f32 %f1982, %f1977, %f1981; + add.f32 %f1983, %f1978, %f1982; + sub.f32 %f1984, %f1978, %f1983; + add.f32 %f1985, %f1982, %f1984; + mul.rn.f32 %f1986, %f1825, %f1983; + neg.f32 %f1987, %f1986; + fma.rn.f32 %f1988, %f1825, %f1983, %f1987; + fma.rn.f32 %f1989, %f1825, %f1985, %f1988; + mov.f32 %f1990, 0f00000000; + fma.rn.f32 %f1991, %f1990, %f1983, %f1989; + add.rn.f32 %f1992, %f1986, %f1991; + neg.f32 %f1993, %f1992; + add.rn.f32 %f1994, %f1986, %f1993; + add.rn.f32 %f1995, %f1994, %f1991; + mov.b32 %r1162, %f1992; + setp.eq.s32 %p1085, %r1162, 1118925336; + add.s32 %r1163, %r1162, -1; + mov.b32 %f1996, %r1163; + add.f32 %f1997, %f1995, 0f37000000; + selp.f32 %f395, %f1997, %f1995, %p1085; + selp.f32 %f1998, %f1996, %f1992, %p1085; + mov.f32 %f1999, 0f3FB8AA3B; + mul.rn.f32 %f2000, %f1998, %f1999; + cvt.rzi.f32.f32 %f2001, %f2000; + abs.f32 %f2002, %f2001; + setp.gt.f32 %p1086, %f2002, 0f42FC0000; + mov.b32 %r1164, %f2001; + and.b32 %r1165, %r1164, -2147483648; + or.b32 %r1166, %r1165, 1123811328; + mov.b32 %f2003, %r1166; + selp.f32 %f2004, %f2003, %f2001, %p1086; + mov.f32 %f2005, 0fBF317218; + fma.rn.f32 %f2006, %f2004, %f2005, %f1998; + mov.f32 %f2007, 0f3102E308; + fma.rn.f32 %f2008, %f2004, %f2007, %f2006; + mul.f32 %f2009, %f2008, 0f3FB8AA3B; + add.f32 %f2010, %f2004, 0f4B40007F; + mov.b32 %r1167, %f2010; + shl.b32 %r1168, %r1167, 23; + mov.b32 %f2011, %r1168; + ex2.approx.ftz.f32 %f2012, %f2009; + mul.f32 %f396, %f2012, %f2011; + setp.eq.f32 %p1087, %f396, 0f7F800000; + mov.f32 %f3333, 0f7F800000; + @%p1087 bra $L__BB6_708; + + fma.rn.f32 %f3333, %f396, %f395, %f396; + +$L__BB6_708: + setp.lt.f32 %p1088, %f393, 0f00000000; + setp.eq.f32 %p1089, %f327, 0f3F800000; + and.pred %p71, %p1088, %p1089; + setp.eq.f32 %p1090, %f393, 0f00000000; + @%p1090 bra $L__BB6_712; + bra.uni $L__BB6_709; + +$L__BB6_712: + add.f32 %f2017, %f393, %f393; + selp.f32 %f3335, %f2017, 0f00000000, %p1089; + bra.uni $L__BB6_713; + +$L__BB6_709: + mov.b32 %r1169, %f3333; + xor.b32 %r1170, %r1169, -2147483648; + mov.b32 %f2013, %r1170; + selp.f32 %f3335, %f2013, %f3333, %p71; + setp.geu.f32 %p1091, %f393, 0f00000000; + @%p1091 bra $L__BB6_713; + + cvt.rzi.f32.f32 %f2015, %f1825; + setp.eq.f32 %p1092, %f2015, 0f40000000; + @%p1092 bra $L__BB6_713; + + mov.f32 %f3335, 0f7FFFFFFF; + +$L__BB6_713: + add.f32 %f2018, %f394, 0f40000000; + mov.b32 %r1171, %f2018; + setp.lt.s32 %p1094, %r1171, 2139095040; + @%p1094 bra $L__BB6_718; + + setp.gtu.f32 %p1095, %f394, 0f7F800000; + @%p1095 bra $L__BB6_717; + bra.uni $L__BB6_715; + +$L__BB6_717: + add.f32 %f3335, %f393, 0f40000000; + bra.uni $L__BB6_718; + +$L__BB6_715: + setp.neu.f32 %p1096, %f394, 0f7F800000; + @%p1096 bra $L__BB6_718; + + selp.f32 %f3335, 0fFF800000, 0f7F800000, %p71; + +$L__BB6_718: + mul.f32 %f2020, %f3335, 0fBF000000; + setp.eq.f32 %p1097, %f393, 0f3F800000; + selp.f32 %f2021, 0fBF000000, %f2020, %p1097; + mov.f32 %f2023, 0f3BBB989D; + fma.rn.f32 %f2024, %f2021, %f2023, %f1844; + mov.f32 %f2026, 0f437C0000; + cvt.sat.f32.f32 %f2027, %f2024; + mov.f32 %f2028, 0f4B400001; + fma.rm.f32 %f2029, %f2027, %f2026, %f2028; + add.f32 %f2030, %f2029, 0fCB40007F; + neg.f32 %f2031, %f2030; + fma.rn.f32 %f2032, %f2021, %f1999, %f2031; + mov.f32 %f2033, 0f32A57060; + fma.rn.f32 %f2034, %f2021, %f2033, %f2032; + mov.b32 %r1172, %f2029; + shl.b32 %r1173, %r1172, 23; + mov.b32 %f2035, %r1173; + ex2.approx.ftz.f32 %f2036, %f2034; + mul.f32 %f405, %f2036, %f2035; + div.rn.f32 %f406, %f346, %f367; + abs.f32 %f407, %f406; + setp.lt.f32 %p1098, %f407, 0f00800000; + mul.f32 %f2037, %f407, 0f4B800000; + selp.f32 %f2038, %f2037, %f407, %p1098; + selp.f32 %f2039, 0fC3170000, 0fC2FE0000, %p1098; + mov.b32 %r1174, %f2038; + and.b32 %r1175, %r1174, 8388607; + or.b32 %r1176, %r1175, 1065353216; + mov.b32 %f2040, %r1176; + shr.u32 %r1177, %r1174, 23; + cvt.rn.f32.u32 %f2041, %r1177; + add.f32 %f2042, %f2039, %f2041; + setp.gt.f32 %p1099, %f2040, 0f3FB504F3; + mul.f32 %f2043, %f2040, 0f3F000000; + add.f32 %f2044, %f2042, 0f3F800000; + selp.f32 %f2045, %f2044, %f2042, %p1099; + selp.f32 %f2046, %f2043, %f2040, %p1099; + add.f32 %f2047, %f2046, 0fBF800000; + add.f32 %f2048, %f2046, 0f3F800000; + rcp.approx.ftz.f32 %f2049, %f2048; + add.f32 %f2050, %f2047, %f2047; + mul.f32 %f2052, %f2050, %f2049; + mul.f32 %f2053, %f2052, %f2052; + fma.rn.f32 %f2056, %f1956, %f2053, %f1955; + fma.rn.f32 %f2058, %f2056, %f2053, %f1958; + mul.rn.f32 %f2059, %f2058, %f2053; + mul.rn.f32 %f2060, %f2059, %f2052; + sub.f32 %f2061, %f2047, %f2052; + add.f32 %f2062, %f2061, %f2061; + neg.f32 %f2063, %f2052; + fma.rn.f32 %f2064, %f2063, %f2047, %f2062; + mul.rn.f32 %f2065, %f2049, %f2064; + add.f32 %f2066, %f2060, %f2052; + sub.f32 %f2067, %f2052, %f2066; + add.f32 %f2068, %f2060, %f2067; + add.f32 %f2069, %f2065, %f2068; + add.f32 %f2070, %f2066, %f2069; + sub.f32 %f2071, %f2066, %f2070; + add.f32 %f2072, %f2069, %f2071; + mul.rn.f32 %f2074, %f2045, %f1974; + mul.rn.f32 %f2076, %f2045, %f1976; + add.f32 %f2077, %f2074, %f2070; + sub.f32 %f2078, %f2074, %f2077; + add.f32 %f2079, %f2070, %f2078; + add.f32 %f2080, %f2072, %f2079; + add.f32 %f2081, %f2076, %f2080; + add.f32 %f2082, %f2077, %f2081; + sub.f32 %f2083, %f2077, %f2082; + add.f32 %f2084, %f2081, %f2083; + mul.rn.f32 %f2085, %f1825, %f2082; + neg.f32 %f2086, %f2085; + fma.rn.f32 %f2087, %f1825, %f2082, %f2086; + fma.rn.f32 %f2088, %f1825, %f2084, %f2087; + fma.rn.f32 %f2090, %f1990, %f2082, %f2088; + add.rn.f32 %f2091, %f2085, %f2090; + neg.f32 %f2092, %f2091; + add.rn.f32 %f2093, %f2085, %f2092; + add.rn.f32 %f2094, %f2093, %f2090; + mov.b32 %r1178, %f2091; + setp.eq.s32 %p1100, %r1178, 1118925336; + add.s32 %r1179, %r1178, -1; + mov.b32 %f2095, %r1179; + add.f32 %f2096, %f2094, 0f37000000; + selp.f32 %f408, %f2096, %f2094, %p1100; + selp.f32 %f2097, %f2095, %f2091, %p1100; + mul.rn.f32 %f2098, %f2097, %f1999; + cvt.rzi.f32.f32 %f2099, %f2098; + abs.f32 %f2100, %f2099; + setp.gt.f32 %p1101, %f2100, 0f42FC0000; + mov.b32 %r1180, %f2099; + and.b32 %r1181, %r1180, -2147483648; + or.b32 %r1182, %r1181, 1123811328; + mov.b32 %f2101, %r1182; + selp.f32 %f2102, %f2101, %f2099, %p1101; + fma.rn.f32 %f2104, %f2102, %f2005, %f2097; + fma.rn.f32 %f2106, %f2102, %f2007, %f2104; + mul.f32 %f2107, %f2106, 0f3FB8AA3B; + add.f32 %f2108, %f2102, 0f4B40007F; + mov.b32 %r1183, %f2108; + shl.b32 %r1184, %r1183, 23; + mov.b32 %f2109, %r1184; + ex2.approx.ftz.f32 %f2110, %f2107; + mul.f32 %f409, %f2110, %f2109; + setp.eq.f32 %p1102, %f409, 0f7F800000; + mov.f32 %f3336, 0f7F800000; + @%p1102 bra $L__BB6_720; + + fma.rn.f32 %f3336, %f409, %f408, %f409; + +$L__BB6_720: + setp.lt.f32 %p1103, %f406, 0f00000000; + and.pred %p72, %p1103, %p1089; + setp.eq.f32 %p1105, %f406, 0f00000000; + @%p1105 bra $L__BB6_724; + bra.uni $L__BB6_721; + +$L__BB6_724: + add.f32 %f2115, %f406, %f406; + selp.f32 %f3338, %f2115, 0f00000000, %p1089; + bra.uni $L__BB6_725; + +$L__BB6_721: + mov.b32 %r1185, %f3336; + xor.b32 %r1186, %r1185, -2147483648; + mov.b32 %f2111, %r1186; + selp.f32 %f3338, %f2111, %f3336, %p72; + setp.geu.f32 %p1106, %f406, 0f00000000; + @%p1106 bra $L__BB6_725; + + cvt.rzi.f32.f32 %f2113, %f1825; + setp.eq.f32 %p1107, %f2113, 0f40000000; + @%p1107 bra $L__BB6_725; + + mov.f32 %f3338, 0f7FFFFFFF; + +$L__BB6_725: + add.f32 %f2116, %f407, 0f40000000; + mov.b32 %r1187, %f2116; + setp.lt.s32 %p1109, %r1187, 2139095040; + @%p1109 bra $L__BB6_730; + + setp.gtu.f32 %p1110, %f407, 0f7F800000; + @%p1110 bra $L__BB6_729; + bra.uni $L__BB6_727; + +$L__BB6_729: + add.f32 %f3338, %f406, 0f40000000; + bra.uni $L__BB6_730; + +$L__BB6_727: + setp.neu.f32 %p1111, %f407, 0f7F800000; + @%p1111 bra $L__BB6_730; + + selp.f32 %f3338, 0fFF800000, 0f7F800000, %p72; + +$L__BB6_730: + mul.f32 %f2118, %f3338, 0fBF000000; + setp.eq.f32 %p1112, %f406, 0f3F800000; + selp.f32 %f2119, 0fBF000000, %f2118, %p1112; + fma.rn.f32 %f2122, %f2119, %f2023, %f1844; + cvt.sat.f32.f32 %f2125, %f2122; + fma.rm.f32 %f2127, %f2125, %f2026, %f2028; + add.f32 %f2128, %f2127, 0fCB40007F; + neg.f32 %f2129, %f2128; + fma.rn.f32 %f2130, %f2119, %f1999, %f2129; + fma.rn.f32 %f2132, %f2119, %f2033, %f2130; + mov.b32 %r1188, %f2127; + shl.b32 %r1189, %r1188, 23; + mov.b32 %f2133, %r1189; + ex2.approx.ftz.f32 %f2134, %f2132; + mul.f32 %f2135, %f2134, %f2133; + sub.f32 %f2136, %f405, %f2135; + div.rn.f32 %f418, %f324, %f367; + mul.f32 %f2137, %f418, %f2136; + mul.f32 %f419, %f392, %f2137; + add.f32 %f2138, %f380, 0f3F000000; + sub.f32 %f2139, %f2138, %f3294; + div.rn.f32 %f420, %f2139, %f369; + abs.f32 %f421, %f420; + setp.lt.f32 %p1113, %f421, 0f00800000; + mul.f32 %f2140, %f421, 0f4B800000; + selp.f32 %f2141, %f2140, %f421, %p1113; + selp.f32 %f2142, 0fC3170000, 0fC2FE0000, %p1113; + mov.b32 %r1190, %f2141; + and.b32 %r1191, %r1190, 8388607; + or.b32 %r1192, %r1191, 1065353216; + mov.b32 %f2143, %r1192; + shr.u32 %r1193, %r1190, 23; + cvt.rn.f32.u32 %f2144, %r1193; + add.f32 %f2145, %f2142, %f2144; + setp.gt.f32 %p1114, %f2143, 0f3FB504F3; + mul.f32 %f2146, %f2143, 0f3F000000; + add.f32 %f2147, %f2145, 0f3F800000; + selp.f32 %f2148, %f2147, %f2145, %p1114; + selp.f32 %f2149, %f2146, %f2143, %p1114; + add.f32 %f2150, %f2149, 0fBF800000; + add.f32 %f2151, %f2149, 0f3F800000; + rcp.approx.ftz.f32 %f2152, %f2151; + add.f32 %f2153, %f2150, %f2150; + mul.f32 %f2155, %f2153, %f2152; + mul.f32 %f2156, %f2155, %f2155; + fma.rn.f32 %f2159, %f1956, %f2156, %f1955; + fma.rn.f32 %f2161, %f2159, %f2156, %f1958; + mul.rn.f32 %f2162, %f2161, %f2156; + mul.rn.f32 %f2163, %f2162, %f2155; + sub.f32 %f2164, %f2150, %f2155; + add.f32 %f2165, %f2164, %f2164; + neg.f32 %f2166, %f2155; + fma.rn.f32 %f2167, %f2166, %f2150, %f2165; + mul.rn.f32 %f2168, %f2152, %f2167; + add.f32 %f2169, %f2163, %f2155; + sub.f32 %f2170, %f2155, %f2169; + add.f32 %f2171, %f2163, %f2170; + add.f32 %f2172, %f2168, %f2171; + add.f32 %f2173, %f2169, %f2172; + sub.f32 %f2174, %f2169, %f2173; + add.f32 %f2175, %f2172, %f2174; + mul.rn.f32 %f2177, %f2148, %f1974; + mul.rn.f32 %f2179, %f2148, %f1976; + add.f32 %f2180, %f2177, %f2173; + sub.f32 %f2181, %f2177, %f2180; + add.f32 %f2182, %f2173, %f2181; + add.f32 %f2183, %f2175, %f2182; + add.f32 %f2184, %f2179, %f2183; + add.f32 %f2185, %f2180, %f2184; + sub.f32 %f2186, %f2180, %f2185; + add.f32 %f2187, %f2184, %f2186; + mul.rn.f32 %f2188, %f1825, %f2185; + neg.f32 %f2189, %f2188; + fma.rn.f32 %f2190, %f1825, %f2185, %f2189; + fma.rn.f32 %f2191, %f1825, %f2187, %f2190; + fma.rn.f32 %f2193, %f1990, %f2185, %f2191; + add.rn.f32 %f2194, %f2188, %f2193; + neg.f32 %f2195, %f2194; + add.rn.f32 %f2196, %f2188, %f2195; + add.rn.f32 %f2197, %f2196, %f2193; + mov.b32 %r1194, %f2194; + setp.eq.s32 %p1115, %r1194, 1118925336; + add.s32 %r1195, %r1194, -1; + mov.b32 %f2198, %r1195; + add.f32 %f2199, %f2197, 0f37000000; + selp.f32 %f422, %f2199, %f2197, %p1115; + selp.f32 %f2200, %f2198, %f2194, %p1115; + mul.rn.f32 %f2201, %f2200, %f1999; + cvt.rzi.f32.f32 %f2202, %f2201; + abs.f32 %f2203, %f2202; + setp.gt.f32 %p1116, %f2203, 0f42FC0000; + mov.b32 %r1196, %f2202; + and.b32 %r1197, %r1196, -2147483648; + or.b32 %r1198, %r1197, 1123811328; + mov.b32 %f2204, %r1198; + selp.f32 %f2205, %f2204, %f2202, %p1116; + fma.rn.f32 %f2207, %f2205, %f2005, %f2200; + fma.rn.f32 %f2209, %f2205, %f2007, %f2207; + mul.f32 %f2210, %f2209, 0f3FB8AA3B; + add.f32 %f2211, %f2205, 0f4B40007F; + mov.b32 %r1199, %f2211; + shl.b32 %r1200, %r1199, 23; + mov.b32 %f2212, %r1200; + ex2.approx.ftz.f32 %f2213, %f2210; + mul.f32 %f423, %f2213, %f2212; + setp.eq.f32 %p1117, %f423, 0f7F800000; + mov.f32 %f3339, 0f7F800000; + @%p1117 bra $L__BB6_732; + + fma.rn.f32 %f3339, %f423, %f422, %f423; + +$L__BB6_732: + setp.lt.f32 %p1118, %f420, 0f00000000; + and.pred %p73, %p1118, %p1089; + setp.eq.f32 %p1120, %f420, 0f00000000; + @%p1120 bra $L__BB6_736; + bra.uni $L__BB6_733; + +$L__BB6_736: + add.f32 %f2218, %f420, %f420; + selp.f32 %f3341, %f2218, 0f00000000, %p1089; + bra.uni $L__BB6_737; + +$L__BB6_733: + mov.b32 %r1201, %f3339; + xor.b32 %r1202, %r1201, -2147483648; + mov.b32 %f2214, %r1202; + selp.f32 %f3341, %f2214, %f3339, %p73; + setp.geu.f32 %p1121, %f420, 0f00000000; + @%p1121 bra $L__BB6_737; + + cvt.rzi.f32.f32 %f2216, %f1825; + setp.eq.f32 %p1122, %f2216, 0f40000000; + @%p1122 bra $L__BB6_737; + + mov.f32 %f3341, 0f7FFFFFFF; + +$L__BB6_737: + add.f32 %f2219, %f421, 0f40000000; + mov.b32 %r1203, %f2219; + setp.lt.s32 %p1124, %r1203, 2139095040; + @%p1124 bra $L__BB6_742; + + setp.gtu.f32 %p1125, %f421, 0f7F800000; + @%p1125 bra $L__BB6_741; + bra.uni $L__BB6_739; + +$L__BB6_741: + add.f32 %f3341, %f420, 0f40000000; + bra.uni $L__BB6_742; + +$L__BB6_739: + setp.neu.f32 %p1126, %f421, 0f7F800000; + @%p1126 bra $L__BB6_742; + + selp.f32 %f3341, 0fFF800000, 0f7F800000, %p73; + +$L__BB6_742: + mul.f32 %f2221, %f3341, 0fBF000000; + setp.eq.f32 %p1127, %f420, 0f3F800000; + selp.f32 %f2222, 0fBF000000, %f2221, %p1127; + fma.rn.f32 %f2225, %f2222, %f2023, %f1844; + cvt.sat.f32.f32 %f2228, %f2225; + fma.rm.f32 %f2230, %f2228, %f2026, %f2028; + add.f32 %f2231, %f2230, 0fCB40007F; + neg.f32 %f2232, %f2231; + fma.rn.f32 %f2233, %f2222, %f1999, %f2232; + fma.rn.f32 %f2235, %f2222, %f2033, %f2233; + mov.b32 %r1204, %f2230; + shl.b32 %r1205, %r1204, 23; + mov.b32 %f2236, %r1205; + ex2.approx.ftz.f32 %f2237, %f2235; + mul.f32 %f432, %f2237, %f2236; + div.rn.f32 %f433, %f387, %f369; + abs.f32 %f434, %f433; + setp.lt.f32 %p1128, %f434, 0f00800000; + mul.f32 %f2238, %f434, 0f4B800000; + selp.f32 %f2239, %f2238, %f434, %p1128; + selp.f32 %f2240, 0fC3170000, 0fC2FE0000, %p1128; + mov.b32 %r1206, %f2239; + and.b32 %r1207, %r1206, 8388607; + or.b32 %r1208, %r1207, 1065353216; + mov.b32 %f2241, %r1208; + shr.u32 %r1209, %r1206, 23; + cvt.rn.f32.u32 %f2242, %r1209; + add.f32 %f2243, %f2240, %f2242; + setp.gt.f32 %p1129, %f2241, 0f3FB504F3; + mul.f32 %f2244, %f2241, 0f3F000000; + add.f32 %f2245, %f2243, 0f3F800000; + selp.f32 %f2246, %f2245, %f2243, %p1129; + selp.f32 %f2247, %f2244, %f2241, %p1129; + add.f32 %f2248, %f2247, 0fBF800000; + add.f32 %f2249, %f2247, 0f3F800000; + rcp.approx.ftz.f32 %f2250, %f2249; + add.f32 %f2251, %f2248, %f2248; + mul.f32 %f2253, %f2251, %f2250; + mul.f32 %f2254, %f2253, %f2253; + fma.rn.f32 %f2257, %f1956, %f2254, %f1955; + fma.rn.f32 %f2259, %f2257, %f2254, %f1958; + mul.rn.f32 %f2260, %f2259, %f2254; + mul.rn.f32 %f2261, %f2260, %f2253; + sub.f32 %f2262, %f2248, %f2253; + add.f32 %f2263, %f2262, %f2262; + neg.f32 %f2264, %f2253; + fma.rn.f32 %f2265, %f2264, %f2248, %f2263; + mul.rn.f32 %f2266, %f2250, %f2265; + add.f32 %f2267, %f2261, %f2253; + sub.f32 %f2268, %f2253, %f2267; + add.f32 %f2269, %f2261, %f2268; + add.f32 %f2270, %f2266, %f2269; + add.f32 %f2271, %f2267, %f2270; + sub.f32 %f2272, %f2267, %f2271; + add.f32 %f2273, %f2270, %f2272; + mul.rn.f32 %f2275, %f2246, %f1974; + mul.rn.f32 %f2277, %f2246, %f1976; + add.f32 %f2278, %f2275, %f2271; + sub.f32 %f2279, %f2275, %f2278; + add.f32 %f2280, %f2271, %f2279; + add.f32 %f2281, %f2273, %f2280; + add.f32 %f2282, %f2277, %f2281; + add.f32 %f2283, %f2278, %f2282; + sub.f32 %f2284, %f2278, %f2283; + add.f32 %f2285, %f2282, %f2284; + mul.rn.f32 %f2286, %f1825, %f2283; + neg.f32 %f2287, %f2286; + fma.rn.f32 %f2288, %f1825, %f2283, %f2287; + fma.rn.f32 %f2289, %f1825, %f2285, %f2288; + fma.rn.f32 %f2291, %f1990, %f2283, %f2289; + add.rn.f32 %f2292, %f2286, %f2291; + neg.f32 %f2293, %f2292; + add.rn.f32 %f2294, %f2286, %f2293; + add.rn.f32 %f2295, %f2294, %f2291; + mov.b32 %r1210, %f2292; + setp.eq.s32 %p1130, %r1210, 1118925336; + add.s32 %r1211, %r1210, -1; + mov.b32 %f2296, %r1211; + add.f32 %f2297, %f2295, 0f37000000; + selp.f32 %f435, %f2297, %f2295, %p1130; + selp.f32 %f2298, %f2296, %f2292, %p1130; + mul.rn.f32 %f2299, %f2298, %f1999; + cvt.rzi.f32.f32 %f2300, %f2299; + abs.f32 %f2301, %f2300; + setp.gt.f32 %p1131, %f2301, 0f42FC0000; + mov.b32 %r1212, %f2300; + and.b32 %r1213, %r1212, -2147483648; + or.b32 %r1214, %r1213, 1123811328; + mov.b32 %f2302, %r1214; + selp.f32 %f2303, %f2302, %f2300, %p1131; + fma.rn.f32 %f2305, %f2303, %f2005, %f2298; + fma.rn.f32 %f2307, %f2303, %f2007, %f2305; + mul.f32 %f2308, %f2307, 0f3FB8AA3B; + add.f32 %f2309, %f2303, 0f4B40007F; + mov.b32 %r1215, %f2309; + shl.b32 %r1216, %r1215, 23; + mov.b32 %f2310, %r1216; + ex2.approx.ftz.f32 %f2311, %f2308; + mul.f32 %f436, %f2311, %f2310; + setp.eq.f32 %p1132, %f436, 0f7F800000; + mov.f32 %f3342, 0f7F800000; + @%p1132 bra $L__BB6_744; + + fma.rn.f32 %f3342, %f436, %f435, %f436; + +$L__BB6_744: + setp.lt.f32 %p1133, %f433, 0f00000000; + and.pred %p74, %p1133, %p1089; + setp.eq.f32 %p1135, %f433, 0f00000000; + @%p1135 bra $L__BB6_748; + bra.uni $L__BB6_745; + +$L__BB6_748: + add.f32 %f2316, %f433, %f433; + selp.f32 %f3344, %f2316, 0f00000000, %p1089; + bra.uni $L__BB6_749; + +$L__BB6_745: + mov.b32 %r1217, %f3342; + xor.b32 %r1218, %r1217, -2147483648; + mov.b32 %f2312, %r1218; + selp.f32 %f3344, %f2312, %f3342, %p74; + setp.geu.f32 %p1136, %f433, 0f00000000; + @%p1136 bra $L__BB6_749; + + cvt.rzi.f32.f32 %f2314, %f1825; + setp.eq.f32 %p1137, %f2314, 0f40000000; + @%p1137 bra $L__BB6_749; + + mov.f32 %f3344, 0f7FFFFFFF; + +$L__BB6_749: + add.f32 %f2317, %f434, 0f40000000; + mov.b32 %r1219, %f2317; + setp.lt.s32 %p1139, %r1219, 2139095040; + @%p1139 bra $L__BB6_754; + + setp.gtu.f32 %p1140, %f434, 0f7F800000; + @%p1140 bra $L__BB6_753; + bra.uni $L__BB6_751; + +$L__BB6_753: + add.f32 %f3344, %f433, 0f40000000; + bra.uni $L__BB6_754; + +$L__BB6_751: + setp.neu.f32 %p1141, %f434, 0f7F800000; + @%p1141 bra $L__BB6_754; + + selp.f32 %f3344, 0fFF800000, 0f7F800000, %p74; + +$L__BB6_754: + mul.f32 %f2319, %f3344, 0fBF000000; + setp.eq.f32 %p1142, %f433, 0f3F800000; + selp.f32 %f2320, 0fBF000000, %f2319, %p1142; + fma.rn.f32 %f2323, %f2320, %f2023, %f1844; + cvt.sat.f32.f32 %f2326, %f2323; + fma.rm.f32 %f2328, %f2326, %f2026, %f2028; + add.f32 %f2329, %f2328, 0fCB40007F; + neg.f32 %f2330, %f2329; + fma.rn.f32 %f2331, %f2320, %f1999, %f2330; + fma.rn.f32 %f2333, %f2320, %f2033, %f2331; + mov.b32 %r1220, %f2328; + shl.b32 %r1221, %r1220, 23; + mov.b32 %f2334, %r1221; + ex2.approx.ftz.f32 %f2335, %f2333; + mul.f32 %f2336, %f2335, %f2334; + sub.f32 %f2337, %f432, %f2336; + div.rn.f32 %f445, %f324, %f369; + mul.f32 %f2338, %f445, %f2337; + mul.f32 %f446, %f379, %f2338; + div.rn.f32 %f447, %f348, %f367; + abs.f32 %f448, %f447; + setp.lt.f32 %p1143, %f448, 0f00800000; + mul.f32 %f2339, %f448, 0f4B800000; + selp.f32 %f2340, %f2339, %f448, %p1143; + selp.f32 %f2341, 0fC3170000, 0fC2FE0000, %p1143; + mov.b32 %r1222, %f2340; + and.b32 %r1223, %r1222, 8388607; + or.b32 %r1224, %r1223, 1065353216; + mov.b32 %f2342, %r1224; + shr.u32 %r1225, %r1222, 23; + cvt.rn.f32.u32 %f2343, %r1225; + add.f32 %f2344, %f2341, %f2343; + setp.gt.f32 %p1144, %f2342, 0f3FB504F3; + mul.f32 %f2345, %f2342, 0f3F000000; + add.f32 %f2346, %f2344, 0f3F800000; + selp.f32 %f2347, %f2346, %f2344, %p1144; + selp.f32 %f2348, %f2345, %f2342, %p1144; + add.f32 %f2349, %f2348, 0fBF800000; + add.f32 %f2350, %f2348, 0f3F800000; + rcp.approx.ftz.f32 %f2351, %f2350; + add.f32 %f2352, %f2349, %f2349; + mul.f32 %f2354, %f2352, %f2351; + mul.f32 %f2355, %f2354, %f2354; + fma.rn.f32 %f2358, %f1956, %f2355, %f1955; + fma.rn.f32 %f2360, %f2358, %f2355, %f1958; + mul.rn.f32 %f2361, %f2360, %f2355; + mul.rn.f32 %f2362, %f2361, %f2354; + sub.f32 %f2363, %f2349, %f2354; + add.f32 %f2364, %f2363, %f2363; + neg.f32 %f2365, %f2354; + fma.rn.f32 %f2366, %f2365, %f2349, %f2364; + mul.rn.f32 %f2367, %f2351, %f2366; + add.f32 %f2368, %f2362, %f2354; + sub.f32 %f2369, %f2354, %f2368; + add.f32 %f2370, %f2362, %f2369; + add.f32 %f2371, %f2367, %f2370; + add.f32 %f2372, %f2368, %f2371; + sub.f32 %f2373, %f2368, %f2372; + add.f32 %f2374, %f2371, %f2373; + mul.rn.f32 %f2376, %f2347, %f1974; + mul.rn.f32 %f2378, %f2347, %f1976; + add.f32 %f2379, %f2376, %f2372; + sub.f32 %f2380, %f2376, %f2379; + add.f32 %f2381, %f2372, %f2380; + add.f32 %f2382, %f2374, %f2381; + add.f32 %f2383, %f2378, %f2382; + add.f32 %f2384, %f2379, %f2383; + sub.f32 %f2385, %f2379, %f2384; + add.f32 %f2386, %f2383, %f2385; + mul.rn.f32 %f2387, %f1825, %f2384; + neg.f32 %f2388, %f2387; + fma.rn.f32 %f2389, %f1825, %f2384, %f2388; + fma.rn.f32 %f2390, %f1825, %f2386, %f2389; + fma.rn.f32 %f2392, %f1990, %f2384, %f2390; + add.rn.f32 %f2393, %f2387, %f2392; + neg.f32 %f2394, %f2393; + add.rn.f32 %f2395, %f2387, %f2394; + add.rn.f32 %f2396, %f2395, %f2392; + mov.b32 %r1226, %f2393; + setp.eq.s32 %p1145, %r1226, 1118925336; + add.s32 %r1227, %r1226, -1; + mov.b32 %f2397, %r1227; + add.f32 %f2398, %f2396, 0f37000000; + selp.f32 %f449, %f2398, %f2396, %p1145; + selp.f32 %f2399, %f2397, %f2393, %p1145; + mul.rn.f32 %f2400, %f2399, %f1999; + cvt.rzi.f32.f32 %f2401, %f2400; + abs.f32 %f2402, %f2401; + setp.gt.f32 %p1146, %f2402, 0f42FC0000; + mov.b32 %r1228, %f2401; + and.b32 %r1229, %r1228, -2147483648; + or.b32 %r1230, %r1229, 1123811328; + mov.b32 %f2403, %r1230; + selp.f32 %f2404, %f2403, %f2401, %p1146; + fma.rn.f32 %f2406, %f2404, %f2005, %f2399; + fma.rn.f32 %f2408, %f2404, %f2007, %f2406; + mul.f32 %f2409, %f2408, 0f3FB8AA3B; + add.f32 %f2410, %f2404, 0f4B40007F; + mov.b32 %r1231, %f2410; + shl.b32 %r1232, %r1231, 23; + mov.b32 %f2411, %r1232; + ex2.approx.ftz.f32 %f2412, %f2409; + mul.f32 %f450, %f2412, %f2411; + setp.eq.f32 %p1147, %f450, 0f7F800000; + mov.f32 %f3345, 0f7F800000; + @%p1147 bra $L__BB6_756; + + fma.rn.f32 %f3345, %f450, %f449, %f450; + +$L__BB6_756: + setp.lt.f32 %p1148, %f447, 0f00000000; + and.pred %p75, %p1148, %p1089; + setp.eq.f32 %p1150, %f447, 0f00000000; + @%p1150 bra $L__BB6_760; + bra.uni $L__BB6_757; + +$L__BB6_760: + add.f32 %f2417, %f447, %f447; + selp.f32 %f3347, %f2417, 0f00000000, %p1089; + bra.uni $L__BB6_761; + +$L__BB6_757: + mov.b32 %r1233, %f3345; + xor.b32 %r1234, %r1233, -2147483648; + mov.b32 %f2413, %r1234; + selp.f32 %f3347, %f2413, %f3345, %p75; + setp.geu.f32 %p1151, %f447, 0f00000000; + @%p1151 bra $L__BB6_761; + + cvt.rzi.f32.f32 %f2415, %f1825; + setp.eq.f32 %p1152, %f2415, 0f40000000; + @%p1152 bra $L__BB6_761; + + mov.f32 %f3347, 0f7FFFFFFF; + +$L__BB6_761: + add.f32 %f2418, %f448, 0f40000000; + mov.b32 %r1235, %f2418; + setp.lt.s32 %p1154, %r1235, 2139095040; + @%p1154 bra $L__BB6_766; + + setp.gtu.f32 %p1155, %f448, 0f7F800000; + @%p1155 bra $L__BB6_765; + bra.uni $L__BB6_763; + +$L__BB6_765: + add.f32 %f3347, %f447, 0f40000000; + bra.uni $L__BB6_766; + +$L__BB6_763: + setp.neu.f32 %p1156, %f448, 0f7F800000; + @%p1156 bra $L__BB6_766; + + selp.f32 %f3347, 0fFF800000, 0f7F800000, %p75; + +$L__BB6_766: + mul.f32 %f2420, %f3347, 0fBF000000; + setp.eq.f32 %p1157, %f447, 0f3F800000; + selp.f32 %f2421, 0fBF000000, %f2420, %p1157; + fma.rn.f32 %f2424, %f2421, %f2023, %f1844; + cvt.sat.f32.f32 %f2427, %f2424; + fma.rm.f32 %f2429, %f2427, %f2026, %f2028; + add.f32 %f2430, %f2429, 0fCB40007F; neg.f32 %f2431, %f2430; - fma.rn.f32 %f2432, %f2079, %f2426, %f2431; - fma.rn.f32 %f2433, %f2079, %f2428, %f2432; - fma.rn.f32 %f2435, %f2265, %f2426, %f2433; - add.rn.f32 %f2436, %f2430, %f2435; - neg.f32 %f2437, %f2436; - add.rn.f32 %f2438, %f2430, %f2437; - add.rn.f32 %f2439, %f2438, %f2435; - mov.b32 %r251, %f2436; - setp.eq.s32 %p260, %r251, 1118925336; - add.s32 %r252, %r251, -1; - mov.b32 %f2440, %r252; - add.f32 %f2441, %f2439, 0f37000000; - selp.f32 %f2442, %f2440, %f2436, %p260; - selp.f32 %f515, %f2441, %f2439, %p260; - mul.f32 %f2443, %f2442, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2444, %f2443; - fma.rn.f32 %f2445, %f2444, %f2276, %f2442; - fma.rn.f32 %f2446, %f2444, %f2278, %f2445; - mul.f32 %f2447, %f2446, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2448, %f2447; - add.f32 %f2449, %f2444, 0f00000000; - ex2.approx.f32 %f2450, %f2449; - mul.f32 %f2451, %f2448, %f2450; - setp.lt.f32 %p261, %f2442, 0fC2D20000; - selp.f32 %f2452, 0f00000000, %f2451, %p261; - setp.gt.f32 %p262, %f2442, 0f42D20000; - selp.f32 %f3397, 0f7F800000, %f2452, %p262; - setp.eq.f32 %p263, %f3397, 0f7F800000; - @%p263 bra BB6_153; - - fma.rn.f32 %f3397, %f3397, %f515, %f3397; - -BB6_153: - setp.lt.f32 %p264, %f508, 0f00000000; - and.pred %p15, %p264, %p219; - mov.b32 %r253, %f3397; - xor.b32 %r254, %r253, -2147483648; - mov.b32 %f2453, %r254; - selp.f32 %f3399, %f2453, %f3397, %p15; - setp.eq.f32 %p266, %f508, 0f00000000; - @%p266 bra BB6_156; - bra.uni BB6_154; - -BB6_156: - add.f32 %f2456, %f508, %f508; - selp.f32 %f3399, %f2456, 0f00000000, %p219; - bra.uni BB6_157; - -BB6_154: - setp.geu.f32 %p267, %f508, 0f00000000; - @%p267 bra BB6_157; - - cvt.rzi.f32.f32 %f2455, %f2079; - setp.neu.f32 %p268, %f2455, 0f40000000; - selp.f32 %f3399, 0f7FFFFFFF, %f3399, %p268; - -BB6_157: - add.f32 %f2457, %f509, 0f40000000; - mov.b32 %r47, %f2457; - setp.lt.s32 %p270, %r47, 2139095040; - @%p270 bra BB6_162; - - setp.gtu.f32 %p271, %f509, 0f7F800000; - @%p271 bra BB6_161; - bra.uni BB6_159; - -BB6_161: - add.f32 %f3399, %f508, 0f40000000; - bra.uni BB6_162; - -BB6_159: - setp.neu.f32 %p272, %f509, 0f7F800000; - @%p272 bra BB6_162; - - selp.f32 %f3399, 0fFF800000, 0f7F800000, %p15; - -BB6_162: - mul.f32 %f2460, %f3399, 0fBF000000; - setp.eq.f32 %p273, %f508, 0f3F800000; - selp.f32 %f2461, 0fBF000000, %f2460, %p273; - mul.f32 %f2462, %f2461, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2463, %f2462; - fma.rn.f32 %f2465, %f2463, %f2276, %f2461; - fma.rn.f32 %f2467, %f2463, %f2278, %f2465; - mul.f32 %f2468, %f2467, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2469, %f2468; - add.f32 %f2470, %f2463, 0f00000000; - ex2.approx.f32 %f2471, %f2470; - mul.f32 %f2472, %f2469, %f2471; - setp.lt.f32 %p274, %f2461, 0fC2D20000; - selp.f32 %f2473, 0f00000000, %f2472, %p274; - setp.gt.f32 %p275, %f2461, 0f42D20000; - selp.f32 %f526, 0f7F800000, %f2473, %p275; - div.rn.f32 %f527, %f477, %f401; - abs.f32 %f528, %f527; - setp.lt.f32 %p276, %f528, 0f00800000; - mul.f32 %f2474, %f528, 0f4B800000; - selp.f32 %f2475, 0fC3170000, 0fC2FE0000, %p276; - selp.f32 %f2476, %f2474, %f528, %p276; - mov.b32 %r255, %f2476; - and.b32 %r256, %r255, 8388607; - or.b32 %r257, %r256, 1065353216; - mov.b32 %f2477, %r257; - shr.u32 %r258, %r255, 23; - cvt.rn.f32.u32 %f2478, %r258; - add.f32 %f2479, %f2475, %f2478; - setp.gt.f32 %p277, %f2477, 0f3FB504F3; - mul.f32 %f2480, %f2477, 0f3F000000; - add.f32 %f2481, %f2479, 0f3F800000; - selp.f32 %f2482, %f2480, %f2477, %p277; - selp.f32 %f2483, %f2481, %f2479, %p277; - add.f32 %f529, %f2482, 0fBF800000; - add.f32 %f2459, %f2482, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2458,%f2459; - // inline asm - add.f32 %f531, %f529, %f529; - mul.f32 %f2484, %f2458, %f531; - mul.f32 %f2485, %f2484, %f2484; - fma.rn.f32 %f2488, %f2234, %f2485, %f2233; - fma.rn.f32 %f2490, %f2488, %f2485, %f2236; - mul.rn.f32 %f2491, %f2490, %f2485; - mul.rn.f32 %f2492, %f2491, %f2484; - sub.f32 %f2493, %f529, %f2484; - neg.f32 %f2494, %f2484; - add.f32 %f2495, %f2493, %f2493; - fma.rn.f32 %f2496, %f2494, %f529, %f2495; - mul.rn.f32 %f2497, %f2458, %f2496; - add.f32 %f2498, %f2492, %f2484; - sub.f32 %f2499, %f2484, %f2498; - add.f32 %f2500, %f2492, %f2499; - add.f32 %f2501, %f2497, %f2500; - add.f32 %f2502, %f2498, %f2501; - sub.f32 %f2503, %f2498, %f2502; - add.f32 %f2504, %f2501, %f2503; - mul.rn.f32 %f532, %f2483, %f2091; - mul.rn.f32 %f533, %f2483, %f2092; - add.f32 %f2507, %f532, %f2502; - sub.f32 %f2508, %f532, %f2507; - add.f32 %f2509, %f2502, %f2508; - add.f32 %f2510, %f2504, %f2509; - add.f32 %f2511, %f533, %f2510; - add.f32 %f2512, %f2507, %f2511; - sub.f32 %f2513, %f2507, %f2512; - add.f32 %f2514, %f2511, %f2513; - mul.rn.f32 %f2516, %f2079, %f2512; - neg.f32 %f2517, %f2516; - fma.rn.f32 %f2518, %f2079, %f2512, %f2517; - fma.rn.f32 %f2519, %f2079, %f2514, %f2518; - fma.rn.f32 %f2521, %f2265, %f2512, %f2519; - add.rn.f32 %f2522, %f2516, %f2521; - neg.f32 %f2523, %f2522; - add.rn.f32 %f2524, %f2516, %f2523; - add.rn.f32 %f2525, %f2524, %f2521; - mov.b32 %r259, %f2522; - setp.eq.s32 %p278, %r259, 1118925336; - add.s32 %r260, %r259, -1; - mov.b32 %f2526, %r260; - add.f32 %f2527, %f2525, 0f37000000; - selp.f32 %f2528, %f2526, %f2522, %p278; - selp.f32 %f534, %f2527, %f2525, %p278; - mul.f32 %f2529, %f2528, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2530, %f2529; - fma.rn.f32 %f2531, %f2530, %f2276, %f2528; - fma.rn.f32 %f2532, %f2530, %f2278, %f2531; - mul.f32 %f2533, %f2532, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2534, %f2533; - add.f32 %f2535, %f2530, 0f00000000; - ex2.approx.f32 %f2536, %f2535; - mul.f32 %f2537, %f2534, %f2536; - setp.lt.f32 %p279, %f2528, 0fC2D20000; - selp.f32 %f2538, 0f00000000, %f2537, %p279; - setp.gt.f32 %p280, %f2528, 0f42D20000; - selp.f32 %f3400, 0f7F800000, %f2538, %p280; - setp.eq.f32 %p281, %f3400, 0f7F800000; - @%p281 bra BB6_164; - - fma.rn.f32 %f3400, %f3400, %f534, %f3400; - -BB6_164: - setp.lt.f32 %p282, %f527, 0f00000000; - and.pred %p16, %p282, %p219; - mov.b32 %r261, %f3400; - xor.b32 %r262, %r261, -2147483648; - mov.b32 %f2539, %r262; - selp.f32 %f3402, %f2539, %f3400, %p16; - setp.eq.f32 %p284, %f527, 0f00000000; - @%p284 bra BB6_167; - bra.uni BB6_165; - -BB6_167: - add.f32 %f2542, %f527, %f527; - selp.f32 %f3402, %f2542, 0f00000000, %p219; - bra.uni BB6_168; - -BB6_165: - setp.geu.f32 %p285, %f527, 0f00000000; - @%p285 bra BB6_168; - - cvt.rzi.f32.f32 %f2541, %f2079; - setp.neu.f32 %p286, %f2541, 0f40000000; - selp.f32 %f3402, 0f7FFFFFFF, %f3402, %p286; - -BB6_168: - add.f32 %f2543, %f528, 0f40000000; - mov.b32 %r48, %f2543; - setp.lt.s32 %p288, %r48, 2139095040; - @%p288 bra BB6_173; - - setp.gtu.f32 %p289, %f528, 0f7F800000; - @%p289 bra BB6_172; - bra.uni BB6_170; - -BB6_172: - add.f32 %f3402, %f527, 0f40000000; - bra.uni BB6_173; - -BB6_170: - setp.neu.f32 %p290, %f528, 0f7F800000; - @%p290 bra BB6_173; - - selp.f32 %f3402, 0fFF800000, 0f7F800000, %p16; - -BB6_173: - mul.f32 %f2546, %f3402, 0fBF000000; - setp.eq.f32 %p291, %f527, 0f3F800000; - selp.f32 %f2547, 0fBF000000, %f2546, %p291; - mul.f32 %f2548, %f2547, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2549, %f2548; - fma.rn.f32 %f2551, %f2549, %f2276, %f2547; - fma.rn.f32 %f2553, %f2549, %f2278, %f2551; - mul.f32 %f2554, %f2553, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2555, %f2554; - add.f32 %f2556, %f2549, 0f00000000; - ex2.approx.f32 %f2557, %f2556; - mul.f32 %f2558, %f2555, %f2557; - setp.lt.f32 %p292, %f2547, 0fC2D20000; - selp.f32 %f2559, 0f00000000, %f2558, %p292; - setp.gt.f32 %p293, %f2547, 0f42D20000; - selp.f32 %f2560, 0f7F800000, %f2559, %p293; - sub.f32 %f2561, %f526, %f2560; - mul.f32 %f2562, %f433, %f2561; - mul.f32 %f545, %f475, %f2562; - // inline asm - rcp.approx.ftz.f32 %f2544,%f415; - // inline asm - mul.f32 %f2563, %f2544, %f416; - mul.f32 %f2564, %f2563, %f2563; - fma.rn.f32 %f2567, %f2234, %f2564, %f2233; - fma.rn.f32 %f2569, %f2567, %f2564, %f2236; - mul.rn.f32 %f2570, %f2569, %f2564; - mul.rn.f32 %f2571, %f2570, %f2563; - sub.f32 %f2572, %f414, %f2563; - neg.f32 %f2573, %f2563; - add.f32 %f2574, %f2572, %f2572; - fma.rn.f32 %f2575, %f2573, %f414, %f2574; - mul.rn.f32 %f2576, %f2544, %f2575; - add.f32 %f2577, %f2571, %f2563; - sub.f32 %f2578, %f2563, %f2577; - add.f32 %f2579, %f2571, %f2578; - add.f32 %f2580, %f2576, %f2579; - add.f32 %f2581, %f2577, %f2580; - sub.f32 %f2582, %f2577, %f2581; - add.f32 %f2583, %f2580, %f2582; - add.f32 %f2584, %f417, %f2581; - sub.f32 %f2585, %f417, %f2584; - add.f32 %f2586, %f2581, %f2585; - add.f32 %f2587, %f2583, %f2586; - add.f32 %f2588, %f418, %f2587; - add.f32 %f2589, %f2584, %f2588; - sub.f32 %f2590, %f2584, %f2589; - add.f32 %f2591, %f2588, %f2590; - mul.rn.f32 %f2593, %f2079, %f2589; - neg.f32 %f2594, %f2593; - fma.rn.f32 %f2595, %f2079, %f2589, %f2594; - fma.rn.f32 %f2596, %f2079, %f2591, %f2595; - fma.rn.f32 %f2598, %f2265, %f2589, %f2596; - add.rn.f32 %f2599, %f2593, %f2598; - neg.f32 %f2600, %f2599; - add.rn.f32 %f2601, %f2593, %f2600; - add.rn.f32 %f2602, %f2601, %f2598; - mov.b32 %r263, %f2599; - setp.eq.s32 %p294, %r263, 1118925336; - add.s32 %r264, %r263, -1; - mov.b32 %f2603, %r264; - add.f32 %f2604, %f2602, 0f37000000; - selp.f32 %f2605, %f2603, %f2599, %p294; - selp.f32 %f546, %f2604, %f2602, %p294; - mul.f32 %f2606, %f2605, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2607, %f2606; - fma.rn.f32 %f2608, %f2607, %f2276, %f2605; - fma.rn.f32 %f2609, %f2607, %f2278, %f2608; - mul.f32 %f2610, %f2609, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2611, %f2610; - add.f32 %f2612, %f2607, 0f00000000; - ex2.approx.f32 %f2613, %f2612; - mul.f32 %f2614, %f2611, %f2613; - setp.lt.f32 %p295, %f2605, 0fC2D20000; - selp.f32 %f2615, 0f00000000, %f2614, %p295; - setp.gt.f32 %p296, %f2605, 0f42D20000; - selp.f32 %f3403, 0f7F800000, %f2615, %p296; - setp.eq.f32 %p297, %f3403, 0f7F800000; - @%p297 bra BB6_175; - - fma.rn.f32 %f3403, %f3403, %f546, %f3403; - -BB6_175: - mov.b32 %r265, %f3403; - xor.b32 %r266, %r265, -2147483648; - mov.b32 %f2616, %r266; - selp.f32 %f550, %f2616, %f3403, %p11; - selp.f32 %f3404, %f419, %f550, %p238; - @%p13 bra BB6_177; - - cvt.rzi.f32.f32 %f2618, %f2079; - setp.neu.f32 %p299, %f2618, 0f40000000; - selp.f32 %f3404, 0f7FFFFFFF, %f550, %p299; - -BB6_177: - selp.f32 %f2621, %f420, %f3404, %p240; - selp.f32 %f2622, %f2621, %f421, %p241; - selp.f32 %f2623, %f2622, %f3404, %p242; - mul.f32 %f2624, %f2623, 0fBF000000; - selp.f32 %f2625, 0fBF000000, %f2624, %p243; - mul.f32 %f2626, %f2625, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2627, %f2626; - fma.rn.f32 %f2629, %f2627, %f2276, %f2625; - fma.rn.f32 %f2631, %f2627, %f2278, %f2629; - mul.f32 %f2632, %f2631, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2633, %f2632; - add.f32 %f2634, %f2627, 0f00000000; - ex2.approx.f32 %f2635, %f2634; - mul.f32 %f2636, %f2633, %f2635; - setp.lt.f32 %p304, %f2625, 0fC2D20000; - selp.f32 %f2637, 0f00000000, %f2636, %p304; - setp.gt.f32 %p305, %f2625, 0f42D20000; - selp.f32 %f554, 0f7F800000, %f2637, %p305; - // inline asm - rcp.approx.ftz.f32 %f2619,%f425; - // inline asm - mul.f32 %f2638, %f2619, %f426; - mul.f32 %f2639, %f2638, %f2638; - fma.rn.f32 %f2642, %f2234, %f2639, %f2233; - fma.rn.f32 %f2644, %f2642, %f2639, %f2236; - mul.rn.f32 %f2645, %f2644, %f2639; - mul.rn.f32 %f2646, %f2645, %f2638; - sub.f32 %f2647, %f424, %f2638; - neg.f32 %f2648, %f2638; - add.f32 %f2649, %f2647, %f2647; - fma.rn.f32 %f2650, %f2648, %f424, %f2649; - mul.rn.f32 %f2651, %f2619, %f2650; - add.f32 %f2652, %f2646, %f2638; - sub.f32 %f2653, %f2638, %f2652; - add.f32 %f2654, %f2646, %f2653; - add.f32 %f2655, %f2651, %f2654; - add.f32 %f2656, %f2652, %f2655; - sub.f32 %f2657, %f2652, %f2656; - add.f32 %f2658, %f2655, %f2657; - add.f32 %f2659, %f427, %f2656; - sub.f32 %f2660, %f427, %f2659; - add.f32 %f2661, %f2656, %f2660; - add.f32 %f2662, %f2658, %f2661; - add.f32 %f2663, %f428, %f2662; - add.f32 %f2664, %f2659, %f2663; - sub.f32 %f2665, %f2659, %f2664; - add.f32 %f2666, %f2663, %f2665; - mul.rn.f32 %f2668, %f2079, %f2664; - neg.f32 %f2669, %f2668; - fma.rn.f32 %f2670, %f2079, %f2664, %f2669; - fma.rn.f32 %f2671, %f2079, %f2666, %f2670; - fma.rn.f32 %f2673, %f2265, %f2664, %f2671; - add.rn.f32 %f2674, %f2668, %f2673; - neg.f32 %f2675, %f2674; - add.rn.f32 %f2676, %f2668, %f2675; - add.rn.f32 %f2677, %f2676, %f2673; - mov.b32 %r267, %f2674; - setp.eq.s32 %p306, %r267, 1118925336; - add.s32 %r268, %r267, -1; - mov.b32 %f2678, %r268; - add.f32 %f2679, %f2677, 0f37000000; - selp.f32 %f2680, %f2678, %f2674, %p306; - selp.f32 %f555, %f2679, %f2677, %p306; - mul.f32 %f2681, %f2680, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2682, %f2681; - fma.rn.f32 %f2683, %f2682, %f2276, %f2680; - fma.rn.f32 %f2684, %f2682, %f2278, %f2683; - mul.f32 %f2685, %f2684, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2686, %f2685; - add.f32 %f2687, %f2682, 0f00000000; - ex2.approx.f32 %f2688, %f2687; - mul.f32 %f2689, %f2686, %f2688; - setp.lt.f32 %p307, %f2680, 0fC2D20000; - selp.f32 %f2690, 0f00000000, %f2689, %p307; - setp.gt.f32 %p308, %f2680, 0f42D20000; - selp.f32 %f3405, 0f7F800000, %f2690, %p308; - setp.eq.f32 %p309, %f3405, 0f7F800000; - @%p309 bra BB6_179; - - fma.rn.f32 %f3405, %f3405, %f555, %f3405; - -BB6_179: - mov.b32 %r269, %f3405; - xor.b32 %r270, %r269, -2147483648; - mov.b32 %f2691, %r270; - selp.f32 %f559, %f2691, %f3405, %p12; - selp.f32 %f3406, %f429, %f559, %p250; - @%p14 bra BB6_181; - - cvt.rzi.f32.f32 %f2693, %f2079; - setp.neu.f32 %p311, %f2693, 0f40000000; - selp.f32 %f3406, 0f7FFFFFFF, %f559, %p311; - -BB6_181: - selp.f32 %f2696, %f430, %f3406, %p252; - selp.f32 %f2697, %f2696, %f431, %p253; - selp.f32 %f2698, %f2697, %f3406, %p254; - mul.f32 %f2699, %f2698, 0fBF000000; - selp.f32 %f2700, 0fBF000000, %f2699, %p255; - mul.f32 %f2701, %f2700, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2702, %f2701; - fma.rn.f32 %f2704, %f2702, %f2276, %f2700; - fma.rn.f32 %f2706, %f2702, %f2278, %f2704; - mul.f32 %f2707, %f2706, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2708, %f2707; - add.f32 %f2709, %f2702, 0f00000000; - ex2.approx.f32 %f2710, %f2709; - mul.f32 %f2711, %f2708, %f2710; - setp.lt.f32 %p316, %f2700, 0fC2D20000; - selp.f32 %f2712, 0f00000000, %f2711, %p316; - setp.gt.f32 %p317, %f2700, 0f42D20000; - selp.f32 %f2713, 0f7F800000, %f2712, %p317; - mul.f32 %f2714, %f402, %f2713; - mul.f32 %f2715, %f403, %f554; - sub.f32 %f2716, %f2715, %f2714; - mul.f32 %f2717, %f434, %f2716; - mul.f32 %f563, %f489, %f2717; - // inline asm - rcp.approx.ftz.f32 %f2694,%f2365; - // inline asm - mul.f32 %f2718, %f2694, %f512; - mul.f32 %f2719, %f2718, %f2718; - fma.rn.f32 %f2722, %f2234, %f2719, %f2233; - fma.rn.f32 %f2724, %f2722, %f2719, %f2236; - mul.rn.f32 %f2725, %f2724, %f2719; - mul.rn.f32 %f2726, %f2725, %f2718; - sub.f32 %f2727, %f510, %f2718; - neg.f32 %f2728, %f2718; - add.f32 %f2729, %f2727, %f2727; - fma.rn.f32 %f2730, %f2728, %f510, %f2729; - mul.rn.f32 %f2731, %f2694, %f2730; - add.f32 %f2732, %f2726, %f2718; - sub.f32 %f2733, %f2718, %f2732; - add.f32 %f2734, %f2726, %f2733; - add.f32 %f2735, %f2731, %f2734; - add.f32 %f2736, %f2732, %f2735; - sub.f32 %f2737, %f2732, %f2736; - add.f32 %f2738, %f2735, %f2737; - add.f32 %f2739, %f513, %f2736; - sub.f32 %f2740, %f513, %f2739; - add.f32 %f2741, %f2736, %f2740; - add.f32 %f2742, %f2738, %f2741; - add.f32 %f2743, %f514, %f2742; - add.f32 %f2744, %f2739, %f2743; - sub.f32 %f2745, %f2739, %f2744; - add.f32 %f2746, %f2743, %f2745; - mul.rn.f32 %f2748, %f2079, %f2744; - neg.f32 %f2749, %f2748; - fma.rn.f32 %f2750, %f2079, %f2744, %f2749; - fma.rn.f32 %f2751, %f2079, %f2746, %f2750; - fma.rn.f32 %f2753, %f2265, %f2744, %f2751; - add.rn.f32 %f2754, %f2748, %f2753; - neg.f32 %f2755, %f2754; - add.rn.f32 %f2756, %f2748, %f2755; - add.rn.f32 %f2757, %f2756, %f2753; - mov.b32 %r271, %f2754; - setp.eq.s32 %p318, %r271, 1118925336; - add.s32 %r272, %r271, -1; - mov.b32 %f2758, %r272; - add.f32 %f2759, %f2757, 0f37000000; - selp.f32 %f2760, %f2758, %f2754, %p318; - selp.f32 %f564, %f2759, %f2757, %p318; - mul.f32 %f2761, %f2760, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2762, %f2761; - fma.rn.f32 %f2763, %f2762, %f2276, %f2760; - fma.rn.f32 %f2764, %f2762, %f2278, %f2763; - mul.f32 %f2765, %f2764, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2766, %f2765; - add.f32 %f2767, %f2762, 0f00000000; - ex2.approx.f32 %f2768, %f2767; - mul.f32 %f2769, %f2766, %f2768; - setp.lt.f32 %p319, %f2760, 0fC2D20000; - selp.f32 %f2770, 0f00000000, %f2769, %p319; - setp.gt.f32 %p320, %f2760, 0f42D20000; - selp.f32 %f3407, 0f7F800000, %f2770, %p320; - setp.eq.f32 %p321, %f3407, 0f7F800000; - @%p321 bra BB6_183; - - fma.rn.f32 %f3407, %f3407, %f564, %f3407; - -BB6_183: - mov.b32 %r273, %f3407; - xor.b32 %r274, %r273, -2147483648; - mov.b32 %f2771, %r274; - selp.f32 %f3409, %f2771, %f3407, %p15; - @%p266 bra BB6_186; - bra.uni BB6_184; - -BB6_186: - add.f32 %f2774, %f508, %f508; - selp.f32 %f3409, %f2774, 0f00000000, %p219; - bra.uni BB6_187; - -BB6_184: - setp.geu.f32 %p323, %f508, 0f00000000; - @%p323 bra BB6_187; - - cvt.rzi.f32.f32 %f2773, %f2079; - setp.neu.f32 %p324, %f2773, 0f40000000; - selp.f32 %f3409, 0f7FFFFFFF, %f3409, %p324; - -BB6_187: - @%p270 bra BB6_192; - - setp.gtu.f32 %p327, %f509, 0f7F800000; - @%p327 bra BB6_191; - bra.uni BB6_189; - -BB6_191: - add.f32 %f3409, %f508, 0f40000000; - bra.uni BB6_192; - -BB6_189: - setp.neu.f32 %p328, %f509, 0f7F800000; - @%p328 bra BB6_192; - - selp.f32 %f3409, 0fFF800000, 0f7F800000, %p15; - -BB6_192: - mul.f32 %f2777, %f3409, 0fBF000000; - selp.f32 %f2778, 0fBF000000, %f2777, %p273; - mul.f32 %f2779, %f2778, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2780, %f2779; - fma.rn.f32 %f2782, %f2780, %f2276, %f2778; - fma.rn.f32 %f2784, %f2780, %f2278, %f2782; - mul.f32 %f2785, %f2784, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2786, %f2785; - add.f32 %f2787, %f2780, 0f00000000; - ex2.approx.f32 %f2788, %f2787; - mul.f32 %f2789, %f2786, %f2788; - setp.lt.f32 %p330, %f2778, 0fC2D20000; - selp.f32 %f2790, 0f00000000, %f2789, %p330; - setp.gt.f32 %p331, %f2778, 0f42D20000; - selp.f32 %f575, 0f7F800000, %f2790, %p331; - // inline asm - rcp.approx.ftz.f32 %f2775,%f2459; - // inline asm - mul.f32 %f2791, %f2775, %f531; - mul.f32 %f2792, %f2791, %f2791; - fma.rn.f32 %f2795, %f2234, %f2792, %f2233; - fma.rn.f32 %f2797, %f2795, %f2792, %f2236; - mul.rn.f32 %f2798, %f2797, %f2792; - mul.rn.f32 %f2799, %f2798, %f2791; - sub.f32 %f2800, %f529, %f2791; - neg.f32 %f2801, %f2791; - add.f32 %f2802, %f2800, %f2800; - fma.rn.f32 %f2803, %f2801, %f529, %f2802; - mul.rn.f32 %f2804, %f2775, %f2803; - add.f32 %f2805, %f2799, %f2791; - sub.f32 %f2806, %f2791, %f2805; - add.f32 %f2807, %f2799, %f2806; - add.f32 %f2808, %f2804, %f2807; - add.f32 %f2809, %f2805, %f2808; - sub.f32 %f2810, %f2805, %f2809; - add.f32 %f2811, %f2808, %f2810; - add.f32 %f2812, %f532, %f2809; - sub.f32 %f2813, %f532, %f2812; - add.f32 %f2814, %f2809, %f2813; - add.f32 %f2815, %f2811, %f2814; - add.f32 %f2816, %f533, %f2815; - add.f32 %f2817, %f2812, %f2816; - sub.f32 %f2818, %f2812, %f2817; - add.f32 %f2819, %f2816, %f2818; - mul.rn.f32 %f2821, %f2079, %f2817; - neg.f32 %f2822, %f2821; - fma.rn.f32 %f2823, %f2079, %f2817, %f2822; - fma.rn.f32 %f2824, %f2079, %f2819, %f2823; - fma.rn.f32 %f2826, %f2265, %f2817, %f2824; - add.rn.f32 %f2827, %f2821, %f2826; - neg.f32 %f2828, %f2827; - add.rn.f32 %f2829, %f2821, %f2828; - add.rn.f32 %f2830, %f2829, %f2826; - mov.b32 %r275, %f2827; - setp.eq.s32 %p332, %r275, 1118925336; - add.s32 %r276, %r275, -1; - mov.b32 %f2831, %r276; - add.f32 %f2832, %f2830, 0f37000000; - selp.f32 %f2833, %f2831, %f2827, %p332; - selp.f32 %f576, %f2832, %f2830, %p332; - mul.f32 %f2834, %f2833, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2835, %f2834; - fma.rn.f32 %f2836, %f2835, %f2276, %f2833; - fma.rn.f32 %f2837, %f2835, %f2278, %f2836; - mul.f32 %f2838, %f2837, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2839, %f2838; - add.f32 %f2840, %f2835, 0f00000000; - ex2.approx.f32 %f2841, %f2840; - mul.f32 %f2842, %f2839, %f2841; - setp.lt.f32 %p333, %f2833, 0fC2D20000; - selp.f32 %f2843, 0f00000000, %f2842, %p333; - setp.gt.f32 %p334, %f2833, 0f42D20000; - selp.f32 %f3410, 0f7F800000, %f2843, %p334; - setp.eq.f32 %p335, %f3410, 0f7F800000; - @%p335 bra BB6_194; - - fma.rn.f32 %f3410, %f3410, %f576, %f3410; - -BB6_194: - mov.b32 %r277, %f3410; - xor.b32 %r278, %r277, -2147483648; - mov.b32 %f2844, %r278; - selp.f32 %f3412, %f2844, %f3410, %p16; - @%p284 bra BB6_197; - bra.uni BB6_195; - -BB6_197: - add.f32 %f2847, %f527, %f527; - selp.f32 %f3412, %f2847, 0f00000000, %p219; - bra.uni BB6_198; - -BB6_195: - setp.geu.f32 %p337, %f527, 0f00000000; - @%p337 bra BB6_198; - - cvt.rzi.f32.f32 %f2846, %f2079; - setp.neu.f32 %p338, %f2846, 0f40000000; - selp.f32 %f3412, 0f7FFFFFFF, %f3412, %p338; - -BB6_198: - @%p288 bra BB6_203; - - setp.gtu.f32 %p341, %f528, 0f7F800000; - @%p341 bra BB6_202; - bra.uni BB6_200; - -BB6_202: - add.f32 %f3412, %f527, 0f40000000; - bra.uni BB6_203; - -BB6_200: - setp.neu.f32 %p342, %f528, 0f7F800000; - @%p342 bra BB6_203; - - selp.f32 %f3412, 0fFF800000, 0f7F800000, %p16; - -BB6_203: - mul.f32 %f2848, %f3412, 0fBF000000; - selp.f32 %f2849, 0fBF000000, %f2848, %p291; - mul.f32 %f2850, %f2849, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2851, %f2850; - fma.rn.f32 %f2853, %f2851, %f2276, %f2849; - fma.rn.f32 %f2855, %f2851, %f2278, %f2853; - mul.f32 %f2856, %f2855, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2857, %f2856; - add.f32 %f2858, %f2851, 0f00000000; - ex2.approx.f32 %f2859, %f2858; - mul.f32 %f2860, %f2857, %f2859; - setp.lt.f32 %p344, %f2849, 0fC2D20000; - selp.f32 %f2861, 0f00000000, %f2860, %p344; - setp.gt.f32 %p345, %f2849, 0f42D20000; - selp.f32 %f2862, 0f7F800000, %f2861, %p345; - mul.f32 %f2863, %f477, %f2862; - mul.f32 %f2864, %f478, %f575; - sub.f32 %f2865, %f2864, %f2863; - mul.f32 %f2866, %f435, %f2865; - mul.f32 %f2867, %f475, %f2866; - mul.f32 %f2868, %f2867, %f437; - fma.rn.f32 %f2869, %f563, %f436, %f2868; - mul.f32 %f2870, %f475, %f3368; - fma.rn.f32 %f587, %f489, %f2870, %f3277; - mad.lo.s32 %r279, %r325, %r63, %r324; - add.s32 %r280, %r279, %r4; - mul.wide.s32 %rd85, %r280, 4; - add.s64 %rd86, %rd1, %rd85; - ld.global.f32 %f588, [%rd86]; - mul.f32 %f2871, %f507, %f507; - div.rn.f32 %f2872, %f2871, %f587; - add.f32 %f3387, %f2872, %f3387; - mul.f32 %f2873, %f545, %f507; - div.rn.f32 %f2874, %f2873, %f587; - add.f32 %f3386, %f2874, %f3386; - mul.f32 %f2875, %f475, %f489; - mul.f32 %f2876, %f2875, %f507; - div.rn.f32 %f2877, %f2876, %f587; - add.f32 %f3385, %f2877, %f3385; - div.rn.f32 %f2878, %f507, %f587; - add.f32 %f3384, %f2878, %f3384; - mul.f32 %f2879, %f2869, %f507; - div.rn.f32 %f2880, %f2879, %f587; - add.f32 %f3383, %f2880, %f3383; - mul.f32 %f2881, %f545, %f545; - div.rn.f32 %f2882, %f2881, %f587; - add.f32 %f3382, %f2882, %f3382; - mul.f32 %f2883, %f2875, %f545; - div.rn.f32 %f2884, %f2883, %f587; - add.f32 %f3381, %f2884, %f3381; - div.rn.f32 %f2885, %f545, %f587; - add.f32 %f3380, %f2885, %f3380; - mul.f32 %f2886, %f2869, %f545; - div.rn.f32 %f2887, %f2886, %f587; - add.f32 %f3379, %f2887, %f3379; - mul.f32 %f2888, %f2875, %f2875; - div.rn.f32 %f2889, %f2888, %f587; - add.f32 %f3378, %f2889, %f3378; - div.rn.f32 %f2890, %f2875, %f587; - add.f32 %f3377, %f2890, %f3377; - mul.f32 %f2891, %f2869, %f2875; - div.rn.f32 %f2892, %f2891, %f587; - add.f32 %f3376, %f2892, %f3376; - rcp.rn.f32 %f2893, %f587; - add.f32 %f3375, %f2893, %f3375; - div.rn.f32 %f2894, %f2869, %f587; - add.f32 %f3374, %f2894, %f3374; - mul.f32 %f2895, %f2869, %f2869; - div.rn.f32 %f2896, %f2895, %f587; - add.f32 %f3373, %f2896, %f3373; - add.f32 %f604, %f3365, %f587; - setp.leu.f32 %p346, %f604, 0f00000000; - @%p346 bra BB6_212; - - add.f32 %f605, %f3365, %f588; - setp.gt.f32 %p347, %f605, 0f00000000; - @%p347 bra BB6_206; - bra.uni BB6_205; - -BB6_206: - setp.lt.f32 %p348, %f604, 0f00800000; - mul.f32 %f2898, %f604, 0f4B000000; - selp.f32 %f607, %f2898, %f604, %p348; - selp.f32 %f2899, 0fC1B80000, 0f00000000, %p348; - mov.b32 %r281, %f607; - add.s32 %r282, %r281, -1059760811; - and.b32 %r283, %r282, -8388608; - sub.s32 %r284, %r281, %r283; - mov.b32 %f2900, %r284; - cvt.rn.f32.s32 %f2901, %r283; - mov.f32 %f2902, 0f34000000; - fma.rn.f32 %f2903, %f2901, %f2902, %f2899; - add.f32 %f2904, %f2900, 0fBF800000; - mov.f32 %f2905, 0f3E1039F6; - mov.f32 %f2906, 0fBE055027; - fma.rn.f32 %f2907, %f2906, %f2904, %f2905; - mov.f32 %f2908, 0fBDF8CDCC; - fma.rn.f32 %f2909, %f2907, %f2904, %f2908; - mov.f32 %f2910, 0f3E0F2955; - fma.rn.f32 %f2911, %f2909, %f2904, %f2910; - mov.f32 %f2912, 0fBE2AD8B9; - fma.rn.f32 %f2913, %f2911, %f2904, %f2912; - mov.f32 %f2914, 0f3E4CED0B; - fma.rn.f32 %f2915, %f2913, %f2904, %f2914; - mov.f32 %f2916, 0fBE7FFF22; - fma.rn.f32 %f2917, %f2915, %f2904, %f2916; - mov.f32 %f2918, 0f3EAAAA78; - fma.rn.f32 %f2919, %f2917, %f2904, %f2918; - mov.f32 %f2920, 0fBF000000; - fma.rn.f32 %f2921, %f2919, %f2904, %f2920; - mul.f32 %f2922, %f2904, %f2921; - fma.rn.f32 %f2923, %f2922, %f2904, %f2904; - mov.f32 %f2924, 0f3F317218; - fma.rn.f32 %f3413, %f2903, %f2924, %f2923; - setp.lt.u32 %p349, %r281, 2139095040; - @%p349 bra BB6_208; - - mov.f32 %f2925, 0f7F800000; - fma.rn.f32 %f3413, %f607, %f2925, %f2925; - -BB6_208: - setp.eq.f32 %p350, %f607, 0f00000000; - selp.f32 %f2926, 0fFF800000, %f3413, %p350; - mul.f32 %f2927, %f605, %f2926; - sub.f32 %f611, %f2927, %f587; - mul.f32 %f2928, %f605, 0f4B000000; - setp.lt.f32 %p351, %f605, 0f00800000; - selp.f32 %f612, %f2928, %f605, %p351; - selp.f32 %f2929, 0fC1B80000, 0f00000000, %p351; - mov.b32 %r285, %f612; - add.s32 %r286, %r285, -1059760811; - and.b32 %r287, %r286, -8388608; - sub.s32 %r288, %r285, %r287; - mov.b32 %f2930, %r288; - cvt.rn.f32.s32 %f2931, %r287; - fma.rn.f32 %f2933, %f2931, %f2902, %f2929; - add.f32 %f2934, %f2930, 0fBF800000; - fma.rn.f32 %f2937, %f2906, %f2934, %f2905; - fma.rn.f32 %f2939, %f2937, %f2934, %f2908; - fma.rn.f32 %f2941, %f2939, %f2934, %f2910; - fma.rn.f32 %f2943, %f2941, %f2934, %f2912; - fma.rn.f32 %f2945, %f2943, %f2934, %f2914; - fma.rn.f32 %f2947, %f2945, %f2934, %f2916; - fma.rn.f32 %f2949, %f2947, %f2934, %f2918; - fma.rn.f32 %f2951, %f2949, %f2934, %f2920; - mul.f32 %f2952, %f2934, %f2951; - fma.rn.f32 %f2953, %f2952, %f2934, %f2934; - fma.rn.f32 %f3414, %f2933, %f2924, %f2953; - setp.lt.u32 %p352, %r285, 2139095040; - @%p352 bra BB6_210; - - mov.f32 %f2955, 0f7F800000; - fma.rn.f32 %f3414, %f612, %f2955, %f2955; - -BB6_210: - setp.eq.f32 %p353, %f612, 0f00000000; - selp.f32 %f2956, 0fFF800000, %f3414, %p353; - mul.f32 %f2957, %f605, %f2956; - sub.f32 %f2958, %f611, %f2957; - add.f32 %f3415, %f588, %f2958; - bra.uni BB6_211; - -BB6_205: - neg.f32 %f2897, %f587; - sub.f32 %f3415, %f2897, %f3365; - -BB6_211: - add.f32 %f3388, %f3388, %f3415; - -BB6_212: - add.s32 %r325, %r325, 1; - setp.lt.s32 %p354, %r325, %r63; - @%p354 bra BB6_131; - - st.local.f32 [%rd2], %f3387; - st.local.f32 [%rd2+4], %f3386; - st.local.f32 [%rd2+20], %f3386; - st.local.f32 [%rd2+8], %f3385; - st.local.f32 [%rd2+40], %f3385; - st.local.f32 [%rd2+12], %f3384; - st.local.f32 [%rd2+60], %f3384; - st.local.f32 [%rd2+16], %f3383; - st.local.f32 [%rd2+80], %f3383; - st.local.f32 [%rd2+24], %f3382; - st.local.f32 [%rd2+28], %f3381; - st.local.f32 [%rd2+44], %f3381; - st.local.f32 [%rd2+32], %f3380; - st.local.f32 [%rd2+64], %f3380; - st.local.f32 [%rd2+36], %f3379; - st.local.f32 [%rd2+84], %f3379; - st.local.f32 [%rd2+48], %f3378; - st.local.f32 [%rd2+52], %f3377; - st.local.f32 [%rd2+68], %f3377; - st.local.f32 [%rd2+56], %f3376; - st.local.f32 [%rd2+88], %f3376; - st.local.f32 [%rd2+72], %f3375; - st.local.f32 [%rd2+76], %f3374; - st.local.f32 [%rd2+92], %f3374; - st.local.f32 [%rd2+96], %f3373; - add.s32 %r324, %r324, 1; - setp.lt.s32 %p355, %r324, %r63; - @%p355 bra BB6_130; - -BB6_214: - ld.local.f32 %f2960, [%rd2]; - rcp.rn.f32 %f621, %f2960; - ld.local.f32 %f2961, [%rd2+4]; - mul.f32 %f622, %f621, %f2961; - ld.local.f32 %f2962, [%rd2+8]; - ld.local.f32 %f2963, [%rd2+12]; - ld.local.f32 %f2964, [%rd2+16]; - ld.local.f32 %f2965, [%rd2+20]; - ld.local.f32 %f2966, [%rd2+24]; - ld.local.f32 %f2967, [%rd2+28]; - ld.local.f32 %f2968, [%rd2+32]; - ld.local.f32 %f2969, [%rd2+36]; - ld.local.f32 %f2970, [%rd2+40]; - ld.local.f32 %f2971, [%rd2+44]; - st.local.f32 [%rd2+4], %f622; - mul.f32 %f623, %f621, %f2962; - st.local.f32 [%rd2+8], %f623; - mul.f32 %f624, %f621, %f2963; - st.local.f32 [%rd2+12], %f624; - mul.f32 %f625, %f621, %f2964; - st.local.f32 [%rd2+16], %f625; - ld.local.f32 %f2972, [%rd2+4]; - fma.rn.f32 %f2973, %f2972, %f2965, 0f00000000; - sub.f32 %f2974, %f2966, %f2973; - ld.local.f32 %f626, [%rd2+20]; - st.local.f32 [%rd2+24], %f2974; - fma.rn.f32 %f2975, %f623, %f626, 0f00000000; - rcp.rn.f32 %f627, %f2974; - sub.f32 %f2976, %f2967, %f2975; - mul.f32 %f628, %f627, %f2976; - st.local.f32 [%rd2+28], %f628; - fma.rn.f32 %f2977, %f624, %f626, 0f00000000; - sub.f32 %f2978, %f2968, %f2977; - mul.f32 %f629, %f627, %f2978; - st.local.f32 [%rd2+32], %f629; - fma.rn.f32 %f2979, %f625, %f626, 0f00000000; - sub.f32 %f2980, %f2969, %f2979; - mul.f32 %f630, %f627, %f2980; - st.local.f32 [%rd2+36], %f630; - ld.local.f32 %f2981, [%rd2+4]; - fma.rn.f32 %f2982, %f2981, %f2970, 0f00000000; - sub.f32 %f631, %f2971, %f2982; - st.local.f32 [%rd2+44], %f631; - add.s64 %rd109, %rd2, 40; - add.s64 %rd108, %rd2, 8; - mov.u32 %r326, -1; - -BB6_215: - ld.local.f32 %f2983, [%rd109]; - ld.local.f32 %f2984, [%rd108]; - fma.rn.f32 %f3418, %f2984, %f2983, %f3418; - add.s64 %rd109, %rd109, 4; - add.s64 %rd108, %rd108, 20; - add.s32 %r326, %r326, 1; - setp.lt.s32 %p356, %r326, 1; - @%p356 bra BB6_215; - - ld.local.f32 %f2986, [%rd2+48]; - sub.f32 %f2987, %f2986, %f3418; - ld.local.f32 %f634, [%rd2+40]; - ld.local.f32 %f2988, [%rd2+52]; - ld.local.f32 %f2989, [%rd2+56]; - ld.local.f32 %f2990, [%rd2+60]; - ld.local.f32 %f2991, [%rd2+4]; - ld.local.f32 %f2992, [%rd2+64]; - st.local.f32 [%rd2+48], %f2987; - fma.rn.f32 %f2993, %f624, %f634, 0f00000000; - fma.rn.f32 %f2994, %f629, %f631, %f2993; - rcp.rn.f32 %f635, %f2987; - sub.f32 %f2995, %f2988, %f2994; - mul.f32 %f636, %f635, %f2995; - st.local.f32 [%rd2+52], %f636; - fma.rn.f32 %f2996, %f625, %f634, 0f00000000; - fma.rn.f32 %f2997, %f630, %f631, %f2996; - sub.f32 %f2998, %f2989, %f2997; - mul.f32 %f637, %f635, %f2998; - st.local.f32 [%rd2+56], %f637; - fma.rn.f32 %f2999, %f2991, %f2990, 0f00000000; - sub.f32 %f638, %f2992, %f2999; - st.local.f32 [%rd2+64], %f638; - add.s64 %rd111, %rd2, 60; - add.s64 %rd110, %rd2, 8; - mov.f32 %f3419, 0f00000000; - mov.u32 %r327, -1; - -BB6_217: - ld.local.f32 %f3000, [%rd111]; - ld.local.f32 %f3001, [%rd110]; - fma.rn.f32 %f3419, %f3001, %f3000, %f3419; - add.s64 %rd111, %rd111, 4; - add.s64 %rd110, %rd110, 20; - add.s32 %r327, %r327, 1; - setp.lt.s32 %p357, %r327, 1; - @%p357 bra BB6_217; - - ld.local.f32 %f3003, [%rd2+68]; - sub.f32 %f641, %f3003, %f3419; - st.local.f32 [%rd2+68], %f641; - add.s64 %rd113, %rd2, 60; - add.s64 %rd112, %rd2, 12; - mov.f32 %f3420, 0f00000000; - mov.u32 %r328, -1; - -BB6_219: - ld.local.f32 %f3004, [%rd113]; - ld.local.f32 %f3005, [%rd112]; - fma.rn.f32 %f3420, %f3005, %f3004, %f3420; - add.s64 %rd113, %rd113, 4; - add.s64 %rd112, %rd112, 20; - add.s32 %r328, %r328, 1; - setp.lt.s32 %p358, %r328, 2; - @%p358 bra BB6_219; - - ld.local.f32 %f3007, [%rd2+72]; - sub.f32 %f3008, %f3007, %f3420; - ld.local.f32 %f644, [%rd2+60]; - ld.local.f32 %f3009, [%rd2+76]; - ld.local.f32 %f3010, [%rd2+80]; - ld.local.f32 %f3011, [%rd2+4]; - ld.local.f32 %f3012, [%rd2+84]; - st.local.f32 [%rd2+72], %f3008; - fma.rn.f32 %f3013, %f625, %f644, 0f00000000; - fma.rn.f32 %f3014, %f630, %f638, %f3013; - fma.rn.f32 %f3015, %f637, %f641, %f3014; - rcp.rn.f32 %f645, %f3008; - sub.f32 %f3016, %f3009, %f3015; - mul.f32 %f646, %f645, %f3016; - st.local.f32 [%rd2+76], %f646; - fma.rn.f32 %f3017, %f3011, %f3010, 0f00000000; - sub.f32 %f647, %f3012, %f3017; - st.local.f32 [%rd2+84], %f647; - add.s64 %rd115, %rd2, 80; - add.s64 %rd114, %rd2, 8; - mov.f32 %f3421, 0f00000000; - mov.u32 %r329, -1; - -BB6_221: - ld.local.f32 %f3018, [%rd115]; - ld.local.f32 %f3019, [%rd114]; - fma.rn.f32 %f3421, %f3019, %f3018, %f3421; - add.s64 %rd115, %rd115, 4; - add.s64 %rd114, %rd114, 20; - add.s32 %r329, %r329, 1; - setp.lt.s32 %p359, %r329, 1; - @%p359 bra BB6_221; - - ld.local.f32 %f3021, [%rd2+88]; - sub.f32 %f650, %f3021, %f3421; - st.local.f32 [%rd2+88], %f650; - add.s64 %rd117, %rd2, 80; - add.s64 %rd116, %rd2, 12; - mov.f32 %f3422, 0f00000000; - mov.u32 %r330, -1; - -BB6_223: - ld.local.f32 %f3022, [%rd117]; - ld.local.f32 %f3023, [%rd116]; - fma.rn.f32 %f3422, %f3023, %f3022, %f3422; - add.s64 %rd117, %rd117, 4; - add.s64 %rd116, %rd116, 20; - add.s32 %r330, %r330, 1; - setp.lt.s32 %p360, %r330, 2; - @%p360 bra BB6_223; - - ld.local.f32 %f3025, [%rd2+92]; - sub.f32 %f653, %f3025, %f3422; - st.local.f32 [%rd2+92], %f653; - add.s64 %rd119, %rd2, 80; - add.s64 %rd118, %rd2, 16; - mov.f32 %f3423, 0f00000000; - mov.u32 %r331, -1; - -BB6_225: - ld.local.f32 %f3026, [%rd119]; - ld.local.f32 %f3027, [%rd118]; - fma.rn.f32 %f3423, %f3027, %f3026, %f3423; - add.s64 %rd119, %rd119, 4; - add.s64 %rd118, %rd118, 20; - add.s32 %r331, %r331, 1; - setp.lt.s32 %p361, %r331, 3; - @%p361 bra BB6_225; - - ld.param.u64 %rd106, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_17]; - ld.param.u64 %rd105, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_16]; - ld.param.u32 %r303, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_18]; - mul.wide.s32 %rd104, %r1, 4; - ld.param.u64 %rd103, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_15]; - ld.local.f32 %f3028, [%rd2+96]; - sub.f32 %f3029, %f3028, %f3423; - ld.local.f32 %f3030, [%rd2+80]; - st.local.f32 [%rd2+96], %f3029; - add.f32 %f3031, %f622, 0f00000000; - mov.f32 %f3032, 0f00000000; - sub.f32 %f3033, %f3032, %f3031; - add.f32 %f3034, %f623, 0f00000000; - fma.rn.f32 %f3035, %f628, %f3033, %f3034; - sub.f32 %f3036, %f3032, %f3035; - add.f32 %f3037, %f624, 0f00000000; - fma.rn.f32 %f3038, %f629, %f3033, %f3037; - fma.rn.f32 %f3039, %f636, %f3036, %f3038; - sub.f32 %f3040, %f3032, %f3039; - add.f32 %f3041, %f625, 0f00000000; - fma.rn.f32 %f3042, %f630, %f3033, %f3041; - fma.rn.f32 %f3043, %f637, %f3036, %f3042; - fma.rn.f32 %f3044, %f646, %f3040, %f3043; - sub.f32 %f3045, %f3032, %f3044; - div.rn.f32 %f3046, %f3045, %f3029; - fma.rn.f32 %f3047, %f653, %f3046, 0f00000000; - sub.f32 %f3048, %f3040, %f3047; - mul.f32 %f3049, %f645, %f3048; - fma.rn.f32 %f3050, %f641, %f3049, 0f00000000; - fma.rn.f32 %f3051, %f650, %f3046, %f3050; - sub.f32 %f3052, %f3036, %f3051; - mul.f32 %f3053, %f635, %f3052; - fma.rn.f32 %f3054, %f631, %f3053, 0f00000000; - fma.rn.f32 %f3055, %f638, %f3049, %f3054; - fma.rn.f32 %f3056, %f647, %f3046, %f3055; - sub.f32 %f3057, %f3033, %f3056; - mul.f32 %f3058, %f627, %f3057; - fma.rn.f32 %f3059, %f626, %f3058, 0f00000000; - fma.rn.f32 %f3060, %f634, %f3053, %f3059; - fma.rn.f32 %f3061, %f644, %f3049, %f3060; - fma.rn.f32 %f3062, %f3030, %f3046, %f3061; - mov.f32 %f3063, 0f3F800000; - sub.f32 %f3064, %f3063, %f3062; - mul.f32 %f3065, %f621, %f3064; - fma.rn.f32 %f3066, %f622, 0f00000000, 0f00000000; - sub.f32 %f3067, %f3063, %f3066; - fma.rn.f32 %f3068, %f623, 0f00000000, 0f00000000; - fma.rn.f32 %f3069, %f628, %f3067, %f3068; - sub.f32 %f3070, %f3032, %f3069; - fma.rn.f32 %f3071, %f624, 0f00000000, 0f00000000; - fma.rn.f32 %f3072, %f629, %f3067, %f3071; - fma.rn.f32 %f3073, %f636, %f3070, %f3072; - sub.f32 %f3074, %f3032, %f3073; - fma.rn.f32 %f3075, %f625, 0f00000000, 0f00000000; - fma.rn.f32 %f3076, %f630, %f3067, %f3075; - fma.rn.f32 %f3077, %f637, %f3070, %f3076; - fma.rn.f32 %f3078, %f646, %f3074, %f3077; - sub.f32 %f3079, %f3032, %f3078; - div.rn.f32 %f3080, %f3079, %f3029; - fma.rn.f32 %f3081, %f653, %f3080, 0f00000000; - sub.f32 %f3082, %f3074, %f3081; - mul.f32 %f3083, %f645, %f3082; - fma.rn.f32 %f3084, %f641, %f3083, 0f00000000; - fma.rn.f32 %f3085, %f650, %f3080, %f3084; - sub.f32 %f3086, %f3070, %f3085; - mul.f32 %f3087, %f635, %f3086; - fma.rn.f32 %f3088, %f631, %f3087, 0f00000000; - fma.rn.f32 %f3089, %f638, %f3083, %f3088; - fma.rn.f32 %f3090, %f647, %f3080, %f3089; - sub.f32 %f3091, %f3067, %f3090; - mul.f32 %f3092, %f627, %f3091; - sub.f32 %f3093, %f3032, %f3066; - fma.rn.f32 %f3094, %f628, %f3093, %f3068; - sub.f32 %f3095, %f3063, %f3094; - fma.rn.f32 %f3096, %f629, %f3093, %f3071; - fma.rn.f32 %f3097, %f636, %f3095, %f3096; - sub.f32 %f3098, %f3032, %f3097; - fma.rn.f32 %f3099, %f630, %f3093, %f3075; - fma.rn.f32 %f3100, %f637, %f3095, %f3099; - fma.rn.f32 %f3101, %f646, %f3098, %f3100; - sub.f32 %f3102, %f3032, %f3101; - div.rn.f32 %f3103, %f3102, %f3029; - fma.rn.f32 %f3104, %f653, %f3103, 0f00000000; - sub.f32 %f3105, %f3098, %f3104; - mul.f32 %f3106, %f645, %f3105; - fma.rn.f32 %f3107, %f641, %f3106, 0f00000000; - fma.rn.f32 %f3108, %f650, %f3103, %f3107; - sub.f32 %f3109, %f3095, %f3108; - mul.f32 %f3110, %f635, %f3109; - sub.f32 %f3111, %f3032, %f3094; - fma.rn.f32 %f3112, %f636, %f3111, %f3096; - sub.f32 %f3113, %f3063, %f3112; - fma.rn.f32 %f3114, %f637, %f3111, %f3099; - fma.rn.f32 %f3115, %f646, %f3113, %f3114; - sub.f32 %f3116, %f3032, %f3115; - div.rn.f32 %f3117, %f3116, %f3029; - fma.rn.f32 %f3118, %f653, %f3117, 0f00000000; - sub.f32 %f3119, %f3113, %f3118; - mul.f32 %f3120, %f645, %f3119; - sub.f32 %f3121, %f3032, %f3112; - fma.rn.f32 %f3122, %f646, %f3121, %f3114; - sub.f32 %f3123, %f3063, %f3122; - div.rn.f32 %f3124, %f3123, %f3029; - cvta.to.global.u64 %rd87, %rd103; - add.s64 %rd89, %rd87, %rd104; - st.global.f32 [%rd89], %f3370; - shl.b32 %r299, %r303, 2; - cvt.s64.s32 %rd90, %r299; - add.s64 %rd91, %rd89, %rd90; - st.global.f32 [%rd91], %f3369; - add.s64 %rd92, %rd91, %rd90; - st.global.f32 [%rd92], %f3368; - add.s64 %rd93, %rd92, %rd90; - st.global.f32 [%rd93], %f3277; - add.s64 %rd94, %rd93, %rd90; - st.global.f32 [%rd94], %f3366; - cvta.to.global.u64 %rd95, %rd105; - add.s64 %rd96, %rd95, %rd104; - st.global.f32 [%rd96], %f3065; - add.s64 %rd97, %rd96, %rd90; - st.global.f32 [%rd97], %f3092; - add.s64 %rd98, %rd97, %rd90; - st.global.f32 [%rd98], %f3110; - add.s64 %rd99, %rd98, %rd90; - st.global.f32 [%rd99], %f3120; - add.s64 %rd100, %rd99, %rd90; - st.global.f32 [%rd100], %f3124; - cvta.to.global.u64 %rd101, %rd106; - add.s64 %rd102, %rd101, %rd104; - st.global.f32 [%rd102], %f3388; - -BB6_227: + fma.rn.f32 %f2432, %f2421, %f1999, %f2431; + fma.rn.f32 %f2434, %f2421, %f2033, %f2432; + mov.b32 %r1236, %f2429; + shl.b32 %r1237, %r1236, 23; + mov.b32 %f2435, %r1237; + ex2.approx.ftz.f32 %f2436, %f2434; + mul.f32 %f459, %f2436, %f2435; + div.rn.f32 %f460, %f344, %f367; + abs.f32 %f461, %f460; + setp.lt.f32 %p1158, %f461, 0f00800000; + mul.f32 %f2437, %f461, 0f4B800000; + selp.f32 %f2438, %f2437, %f461, %p1158; + selp.f32 %f2439, 0fC3170000, 0fC2FE0000, %p1158; + mov.b32 %r1238, %f2438; + and.b32 %r1239, %r1238, 8388607; + or.b32 %r1240, %r1239, 1065353216; + mov.b32 %f2440, %r1240; + shr.u32 %r1241, %r1238, 23; + cvt.rn.f32.u32 %f2441, %r1241; + add.f32 %f2442, %f2439, %f2441; + setp.gt.f32 %p1159, %f2440, 0f3FB504F3; + mul.f32 %f2443, %f2440, 0f3F000000; + add.f32 %f2444, %f2442, 0f3F800000; + selp.f32 %f2445, %f2444, %f2442, %p1159; + selp.f32 %f2446, %f2443, %f2440, %p1159; + add.f32 %f2447, %f2446, 0fBF800000; + add.f32 %f2448, %f2446, 0f3F800000; + rcp.approx.ftz.f32 %f2449, %f2448; + add.f32 %f2450, %f2447, %f2447; + mul.f32 %f2452, %f2450, %f2449; + mul.f32 %f2453, %f2452, %f2452; + fma.rn.f32 %f2456, %f1956, %f2453, %f1955; + fma.rn.f32 %f2458, %f2456, %f2453, %f1958; + mul.rn.f32 %f2459, %f2458, %f2453; + mul.rn.f32 %f2460, %f2459, %f2452; + sub.f32 %f2461, %f2447, %f2452; + add.f32 %f2462, %f2461, %f2461; + neg.f32 %f2463, %f2452; + fma.rn.f32 %f2464, %f2463, %f2447, %f2462; + mul.rn.f32 %f2465, %f2449, %f2464; + add.f32 %f2466, %f2460, %f2452; + sub.f32 %f2467, %f2452, %f2466; + add.f32 %f2468, %f2460, %f2467; + add.f32 %f2469, %f2465, %f2468; + add.f32 %f2470, %f2466, %f2469; + sub.f32 %f2471, %f2466, %f2470; + add.f32 %f2472, %f2469, %f2471; + mul.rn.f32 %f2474, %f2445, %f1974; + mul.rn.f32 %f2476, %f2445, %f1976; + add.f32 %f2477, %f2474, %f2470; + sub.f32 %f2478, %f2474, %f2477; + add.f32 %f2479, %f2470, %f2478; + add.f32 %f2480, %f2472, %f2479; + add.f32 %f2481, %f2476, %f2480; + add.f32 %f2482, %f2477, %f2481; + sub.f32 %f2483, %f2477, %f2482; + add.f32 %f2484, %f2481, %f2483; + mul.rn.f32 %f2485, %f1825, %f2482; + neg.f32 %f2486, %f2485; + fma.rn.f32 %f2487, %f1825, %f2482, %f2486; + fma.rn.f32 %f2488, %f1825, %f2484, %f2487; + fma.rn.f32 %f2490, %f1990, %f2482, %f2488; + add.rn.f32 %f2491, %f2485, %f2490; + neg.f32 %f2492, %f2491; + add.rn.f32 %f2493, %f2485, %f2492; + add.rn.f32 %f2494, %f2493, %f2490; + mov.b32 %r1242, %f2491; + setp.eq.s32 %p1160, %r1242, 1118925336; + add.s32 %r1243, %r1242, -1; + mov.b32 %f2495, %r1243; + add.f32 %f2496, %f2494, 0f37000000; + selp.f32 %f462, %f2496, %f2494, %p1160; + selp.f32 %f2497, %f2495, %f2491, %p1160; + mul.rn.f32 %f2498, %f2497, %f1999; + cvt.rzi.f32.f32 %f2499, %f2498; + abs.f32 %f2500, %f2499; + setp.gt.f32 %p1161, %f2500, 0f42FC0000; + mov.b32 %r1244, %f2499; + and.b32 %r1245, %r1244, -2147483648; + or.b32 %r1246, %r1245, 1123811328; + mov.b32 %f2501, %r1246; + selp.f32 %f2502, %f2501, %f2499, %p1161; + fma.rn.f32 %f2504, %f2502, %f2005, %f2497; + fma.rn.f32 %f2506, %f2502, %f2007, %f2504; + mul.f32 %f2507, %f2506, 0f3FB8AA3B; + add.f32 %f2508, %f2502, 0f4B40007F; + mov.b32 %r1247, %f2508; + shl.b32 %r1248, %r1247, 23; + mov.b32 %f2509, %r1248; + ex2.approx.ftz.f32 %f2510, %f2507; + mul.f32 %f463, %f2510, %f2509; + setp.eq.f32 %p1162, %f463, 0f7F800000; + mov.f32 %f3348, 0f7F800000; + @%p1162 bra $L__BB6_768; + + fma.rn.f32 %f3348, %f463, %f462, %f463; + +$L__BB6_768: + setp.lt.f32 %p1163, %f460, 0f00000000; + and.pred %p76, %p1163, %p1089; + setp.eq.f32 %p1165, %f460, 0f00000000; + @%p1165 bra $L__BB6_772; + bra.uni $L__BB6_769; + +$L__BB6_772: + add.f32 %f2515, %f460, %f460; + selp.f32 %f3350, %f2515, 0f00000000, %p1089; + bra.uni $L__BB6_773; + +$L__BB6_769: + mov.b32 %r1249, %f3348; + xor.b32 %r1250, %r1249, -2147483648; + mov.b32 %f2511, %r1250; + selp.f32 %f3350, %f2511, %f3348, %p76; + setp.geu.f32 %p1166, %f460, 0f00000000; + @%p1166 bra $L__BB6_773; + + cvt.rzi.f32.f32 %f2513, %f1825; + setp.eq.f32 %p1167, %f2513, 0f40000000; + @%p1167 bra $L__BB6_773; + + mov.f32 %f3350, 0f7FFFFFFF; + +$L__BB6_773: + add.f32 %f2516, %f461, 0f40000000; + mov.b32 %r1251, %f2516; + setp.lt.s32 %p1169, %r1251, 2139095040; + @%p1169 bra $L__BB6_778; + + setp.gtu.f32 %p1170, %f461, 0f7F800000; + @%p1170 bra $L__BB6_777; + bra.uni $L__BB6_775; + +$L__BB6_777: + add.f32 %f3350, %f460, 0f40000000; + bra.uni $L__BB6_778; + +$L__BB6_775: + setp.neu.f32 %p1171, %f461, 0f7F800000; + @%p1171 bra $L__BB6_778; + + selp.f32 %f3350, 0fFF800000, 0f7F800000, %p76; + +$L__BB6_778: + mul.f32 %f2518, %f3350, 0fBF000000; + setp.eq.f32 %p1172, %f460, 0f3F800000; + selp.f32 %f2519, 0fBF000000, %f2518, %p1172; + fma.rn.f32 %f2522, %f2519, %f2023, %f1844; + cvt.sat.f32.f32 %f2525, %f2522; + fma.rm.f32 %f2527, %f2525, %f2026, %f2028; + add.f32 %f2528, %f2527, 0fCB40007F; + neg.f32 %f2529, %f2528; + fma.rn.f32 %f2530, %f2519, %f1999, %f2529; + fma.rn.f32 %f2532, %f2519, %f2033, %f2530; + mov.b32 %r1252, %f2527; + shl.b32 %r1253, %r1252, 23; + mov.b32 %f2533, %r1253; + ex2.approx.ftz.f32 %f2534, %f2532; + mul.f32 %f2535, %f2534, %f2533; + mul.f32 %f2536, %f344, %f2535; + mul.f32 %f2537, %f349, %f459; + sub.f32 %f2538, %f2537, %f2536; + div.rn.f32 %f2539, %f418, %f367; + mul.f32 %f2540, %f2539, %f2538; + mul.f32 %f472, %f392, %f2540; + add.f32 %f2541, %f380, 0f3F800000; + sub.f32 %f2542, %f2541, %f3294; + div.rn.f32 %f473, %f2542, %f369; + abs.f32 %f474, %f473; + setp.lt.f32 %p1173, %f474, 0f00800000; + mul.f32 %f2543, %f474, 0f4B800000; + selp.f32 %f2544, %f2543, %f474, %p1173; + selp.f32 %f2545, 0fC3170000, 0fC2FE0000, %p1173; + mov.b32 %r1254, %f2544; + and.b32 %r1255, %r1254, 8388607; + or.b32 %r1256, %r1255, 1065353216; + mov.b32 %f2546, %r1256; + shr.u32 %r1257, %r1254, 23; + cvt.rn.f32.u32 %f2547, %r1257; + add.f32 %f2548, %f2545, %f2547; + setp.gt.f32 %p1174, %f2546, 0f3FB504F3; + mul.f32 %f2549, %f2546, 0f3F000000; + add.f32 %f2550, %f2548, 0f3F800000; + selp.f32 %f2551, %f2550, %f2548, %p1174; + selp.f32 %f2552, %f2549, %f2546, %p1174; + add.f32 %f2553, %f2552, 0fBF800000; + add.f32 %f2554, %f2552, 0f3F800000; + rcp.approx.ftz.f32 %f2555, %f2554; + add.f32 %f2556, %f2553, %f2553; + mul.f32 %f2558, %f2556, %f2555; + mul.f32 %f2559, %f2558, %f2558; + fma.rn.f32 %f2562, %f1956, %f2559, %f1955; + fma.rn.f32 %f2564, %f2562, %f2559, %f1958; + mul.rn.f32 %f2565, %f2564, %f2559; + mul.rn.f32 %f2566, %f2565, %f2558; + sub.f32 %f2567, %f2553, %f2558; + add.f32 %f2568, %f2567, %f2567; + neg.f32 %f2569, %f2558; + fma.rn.f32 %f2570, %f2569, %f2553, %f2568; + mul.rn.f32 %f2571, %f2555, %f2570; + add.f32 %f2572, %f2566, %f2558; + sub.f32 %f2573, %f2558, %f2572; + add.f32 %f2574, %f2566, %f2573; + add.f32 %f2575, %f2571, %f2574; + add.f32 %f2576, %f2572, %f2575; + sub.f32 %f2577, %f2572, %f2576; + add.f32 %f2578, %f2575, %f2577; + mul.rn.f32 %f2580, %f2551, %f1974; + mul.rn.f32 %f2582, %f2551, %f1976; + add.f32 %f2583, %f2580, %f2576; + sub.f32 %f2584, %f2580, %f2583; + add.f32 %f2585, %f2576, %f2584; + add.f32 %f2586, %f2578, %f2585; + add.f32 %f2587, %f2582, %f2586; + add.f32 %f2588, %f2583, %f2587; + sub.f32 %f2589, %f2583, %f2588; + add.f32 %f2590, %f2587, %f2589; + mul.rn.f32 %f2591, %f1825, %f2588; + neg.f32 %f2592, %f2591; + fma.rn.f32 %f2593, %f1825, %f2588, %f2592; + fma.rn.f32 %f2594, %f1825, %f2590, %f2593; + fma.rn.f32 %f2596, %f1990, %f2588, %f2594; + add.rn.f32 %f2597, %f2591, %f2596; + neg.f32 %f2598, %f2597; + add.rn.f32 %f2599, %f2591, %f2598; + add.rn.f32 %f2600, %f2599, %f2596; + mov.b32 %r1258, %f2597; + setp.eq.s32 %p1175, %r1258, 1118925336; + add.s32 %r1259, %r1258, -1; + mov.b32 %f2601, %r1259; + add.f32 %f2602, %f2600, 0f37000000; + selp.f32 %f475, %f2602, %f2600, %p1175; + selp.f32 %f2603, %f2601, %f2597, %p1175; + mul.rn.f32 %f2604, %f2603, %f1999; + cvt.rzi.f32.f32 %f2605, %f2604; + abs.f32 %f2606, %f2605; + setp.gt.f32 %p1176, %f2606, 0f42FC0000; + mov.b32 %r1260, %f2605; + and.b32 %r1261, %r1260, -2147483648; + or.b32 %r1262, %r1261, 1123811328; + mov.b32 %f2607, %r1262; + selp.f32 %f2608, %f2607, %f2605, %p1176; + fma.rn.f32 %f2610, %f2608, %f2005, %f2603; + fma.rn.f32 %f2612, %f2608, %f2007, %f2610; + mul.f32 %f2613, %f2612, 0f3FB8AA3B; + add.f32 %f2614, %f2608, 0f4B40007F; + mov.b32 %r1263, %f2614; + shl.b32 %r1264, %r1263, 23; + mov.b32 %f2615, %r1264; + ex2.approx.ftz.f32 %f2616, %f2613; + mul.f32 %f476, %f2616, %f2615; + setp.eq.f32 %p1177, %f476, 0f7F800000; + mov.f32 %f3351, 0f7F800000; + @%p1177 bra $L__BB6_780; + + fma.rn.f32 %f3351, %f476, %f475, %f476; + +$L__BB6_780: + setp.lt.f32 %p1178, %f473, 0f00000000; + and.pred %p77, %p1178, %p1089; + setp.eq.f32 %p1180, %f473, 0f00000000; + @%p1180 bra $L__BB6_784; + bra.uni $L__BB6_781; + +$L__BB6_784: + add.f32 %f2621, %f473, %f473; + selp.f32 %f3353, %f2621, 0f00000000, %p1089; + bra.uni $L__BB6_785; + +$L__BB6_781: + mov.b32 %r1265, %f3351; + xor.b32 %r1266, %r1265, -2147483648; + mov.b32 %f2617, %r1266; + selp.f32 %f3353, %f2617, %f3351, %p77; + setp.geu.f32 %p1181, %f473, 0f00000000; + @%p1181 bra $L__BB6_785; + + cvt.rzi.f32.f32 %f2619, %f1825; + setp.eq.f32 %p1182, %f2619, 0f40000000; + @%p1182 bra $L__BB6_785; + + mov.f32 %f3353, 0f7FFFFFFF; + +$L__BB6_785: + add.f32 %f2622, %f474, 0f40000000; + mov.b32 %r1267, %f2622; + setp.lt.s32 %p1184, %r1267, 2139095040; + @%p1184 bra $L__BB6_790; + + setp.gtu.f32 %p1185, %f474, 0f7F800000; + @%p1185 bra $L__BB6_789; + bra.uni $L__BB6_787; + +$L__BB6_789: + add.f32 %f3353, %f473, 0f40000000; + bra.uni $L__BB6_790; + +$L__BB6_787: + setp.neu.f32 %p1186, %f474, 0f7F800000; + @%p1186 bra $L__BB6_790; + + selp.f32 %f3353, 0fFF800000, 0f7F800000, %p77; + +$L__BB6_790: + mul.f32 %f2624, %f3353, 0fBF000000; + setp.eq.f32 %p1187, %f473, 0f3F800000; + selp.f32 %f2625, 0fBF000000, %f2624, %p1187; + fma.rn.f32 %f2628, %f2625, %f2023, %f1844; + cvt.sat.f32.f32 %f2631, %f2628; + fma.rm.f32 %f2633, %f2631, %f2026, %f2028; + add.f32 %f2634, %f2633, 0fCB40007F; + neg.f32 %f2635, %f2634; + fma.rn.f32 %f2636, %f2625, %f1999, %f2635; + fma.rn.f32 %f2638, %f2625, %f2033, %f2636; + mov.b32 %r1268, %f2633; + shl.b32 %r1269, %r1268, 23; + mov.b32 %f2639, %r1269; + ex2.approx.ftz.f32 %f2640, %f2638; + mul.f32 %f485, %f2640, %f2639; + div.rn.f32 %f486, %f381, %f369; + abs.f32 %f487, %f486; + setp.lt.f32 %p1188, %f487, 0f00800000; + mul.f32 %f2641, %f487, 0f4B800000; + selp.f32 %f2642, %f2641, %f487, %p1188; + selp.f32 %f2643, 0fC3170000, 0fC2FE0000, %p1188; + mov.b32 %r1270, %f2642; + and.b32 %r1271, %r1270, 8388607; + or.b32 %r1272, %r1271, 1065353216; + mov.b32 %f2644, %r1272; + shr.u32 %r1273, %r1270, 23; + cvt.rn.f32.u32 %f2645, %r1273; + add.f32 %f2646, %f2643, %f2645; + setp.gt.f32 %p1189, %f2644, 0f3FB504F3; + mul.f32 %f2647, %f2644, 0f3F000000; + add.f32 %f2648, %f2646, 0f3F800000; + selp.f32 %f2649, %f2648, %f2646, %p1189; + selp.f32 %f2650, %f2647, %f2644, %p1189; + add.f32 %f2651, %f2650, 0fBF800000; + add.f32 %f2652, %f2650, 0f3F800000; + rcp.approx.ftz.f32 %f2653, %f2652; + add.f32 %f2654, %f2651, %f2651; + mul.f32 %f2656, %f2654, %f2653; + mul.f32 %f2657, %f2656, %f2656; + fma.rn.f32 %f2660, %f1956, %f2657, %f1955; + fma.rn.f32 %f2662, %f2660, %f2657, %f1958; + mul.rn.f32 %f2663, %f2662, %f2657; + mul.rn.f32 %f2664, %f2663, %f2656; + sub.f32 %f2665, %f2651, %f2656; + add.f32 %f2666, %f2665, %f2665; + neg.f32 %f2667, %f2656; + fma.rn.f32 %f2668, %f2667, %f2651, %f2666; + mul.rn.f32 %f2669, %f2653, %f2668; + add.f32 %f2670, %f2664, %f2656; + sub.f32 %f2671, %f2656, %f2670; + add.f32 %f2672, %f2664, %f2671; + add.f32 %f2673, %f2669, %f2672; + add.f32 %f2674, %f2670, %f2673; + sub.f32 %f2675, %f2670, %f2674; + add.f32 %f2676, %f2673, %f2675; + mul.rn.f32 %f2678, %f2649, %f1974; + mul.rn.f32 %f2680, %f2649, %f1976; + add.f32 %f2681, %f2678, %f2674; + sub.f32 %f2682, %f2678, %f2681; + add.f32 %f2683, %f2674, %f2682; + add.f32 %f2684, %f2676, %f2683; + add.f32 %f2685, %f2680, %f2684; + add.f32 %f2686, %f2681, %f2685; + sub.f32 %f2687, %f2681, %f2686; + add.f32 %f2688, %f2685, %f2687; + mul.rn.f32 %f2689, %f1825, %f2686; + neg.f32 %f2690, %f2689; + fma.rn.f32 %f2691, %f1825, %f2686, %f2690; + fma.rn.f32 %f2692, %f1825, %f2688, %f2691; + fma.rn.f32 %f2694, %f1990, %f2686, %f2692; + add.rn.f32 %f2695, %f2689, %f2694; + neg.f32 %f2696, %f2695; + add.rn.f32 %f2697, %f2689, %f2696; + add.rn.f32 %f2698, %f2697, %f2694; + mov.b32 %r1274, %f2695; + setp.eq.s32 %p1190, %r1274, 1118925336; + add.s32 %r1275, %r1274, -1; + mov.b32 %f2699, %r1275; + add.f32 %f2700, %f2698, 0f37000000; + selp.f32 %f488, %f2700, %f2698, %p1190; + selp.f32 %f2701, %f2699, %f2695, %p1190; + mul.rn.f32 %f2702, %f2701, %f1999; + cvt.rzi.f32.f32 %f2703, %f2702; + abs.f32 %f2704, %f2703; + setp.gt.f32 %p1191, %f2704, 0f42FC0000; + mov.b32 %r1276, %f2703; + and.b32 %r1277, %r1276, -2147483648; + or.b32 %r1278, %r1277, 1123811328; + mov.b32 %f2705, %r1278; + selp.f32 %f2706, %f2705, %f2703, %p1191; + fma.rn.f32 %f2708, %f2706, %f2005, %f2701; + fma.rn.f32 %f2710, %f2706, %f2007, %f2708; + mul.f32 %f2711, %f2710, 0f3FB8AA3B; + add.f32 %f2712, %f2706, 0f4B40007F; + mov.b32 %r1279, %f2712; + shl.b32 %r1280, %r1279, 23; + mov.b32 %f2713, %r1280; + ex2.approx.ftz.f32 %f2714, %f2711; + mul.f32 %f489, %f2714, %f2713; + setp.eq.f32 %p1192, %f489, 0f7F800000; + mov.f32 %f3354, 0f7F800000; + @%p1192 bra $L__BB6_792; + + fma.rn.f32 %f3354, %f489, %f488, %f489; + +$L__BB6_792: + setp.lt.f32 %p1193, %f486, 0f00000000; + and.pred %p78, %p1193, %p1089; + setp.eq.f32 %p1195, %f486, 0f00000000; + @%p1195 bra $L__BB6_796; + bra.uni $L__BB6_793; + +$L__BB6_796: + add.f32 %f2719, %f486, %f486; + selp.f32 %f3356, %f2719, 0f00000000, %p1089; + bra.uni $L__BB6_797; + +$L__BB6_793: + mov.b32 %r1281, %f3354; + xor.b32 %r1282, %r1281, -2147483648; + mov.b32 %f2715, %r1282; + selp.f32 %f3356, %f2715, %f3354, %p78; + setp.geu.f32 %p1196, %f486, 0f00000000; + @%p1196 bra $L__BB6_797; + + cvt.rzi.f32.f32 %f2717, %f1825; + setp.eq.f32 %p1197, %f2717, 0f40000000; + @%p1197 bra $L__BB6_797; + + mov.f32 %f3356, 0f7FFFFFFF; + +$L__BB6_797: + add.f32 %f2720, %f487, 0f40000000; + mov.b32 %r1283, %f2720; + setp.lt.s32 %p1199, %r1283, 2139095040; + @%p1199 bra $L__BB6_802; + + setp.gtu.f32 %p1200, %f487, 0f7F800000; + @%p1200 bra $L__BB6_801; + bra.uni $L__BB6_799; + +$L__BB6_801: + add.f32 %f3356, %f486, 0f40000000; + bra.uni $L__BB6_802; + +$L__BB6_799: + setp.neu.f32 %p1201, %f487, 0f7F800000; + @%p1201 bra $L__BB6_802; + + selp.f32 %f3356, 0fFF800000, 0f7F800000, %p78; + +$L__BB6_802: + mul.f32 %f2721, %f3356, 0fBF000000; + setp.eq.f32 %p1202, %f486, 0f3F800000; + selp.f32 %f2722, 0fBF000000, %f2721, %p1202; + fma.rn.f32 %f2725, %f2722, %f2023, %f1844; + cvt.sat.f32.f32 %f2728, %f2725; + fma.rm.f32 %f2730, %f2728, %f2026, %f2028; + add.f32 %f2731, %f2730, 0fCB40007F; + neg.f32 %f2732, %f2731; + fma.rn.f32 %f2733, %f2722, %f1999, %f2732; + fma.rn.f32 %f2735, %f2722, %f2033, %f2733; + mov.b32 %r1284, %f2730; + shl.b32 %r1285, %r1284, 23; + mov.b32 %f2736, %r1285; + ex2.approx.ftz.f32 %f2737, %f2735; + mul.f32 %f2738, %f2737, %f2736; + add.f32 %f2739, %f381, 0f3F800000; + mul.f32 %f2740, %f2739, %f485; + mul.f32 %f2741, %f381, %f2738; + sub.f32 %f2742, %f2740, %f2741; + div.rn.f32 %f2743, %f445, %f369; + mul.f32 %f498, %f2743, %f2742; + not.pred %p1203, %p55; + mov.f64 %fd1203, %fd449; + @%p1203 bra $L__BB6_804; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1286}, %fd449; + } + xor.b32 %r1287, %r1286, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1288, %temp}, %fd449; + } + mov.b64 %fd1203, {%r1288, %r1287}; + +$L__BB6_804: + setp.eq.f32 %p1204, %f320, 0f00000000; + @%p1204 bra $L__BB6_808; + bra.uni $L__BB6_805; + +$L__BB6_808: + mov.u32 %r1289, 0; + mov.b64 %fd1203, {%r1289, %r153}; + bra.uni $L__BB6_809; + +$L__BB6_805: + setp.gt.s32 %p1205, %r152, -1; + @%p1205 bra $L__BB6_809; + + cvt.rzi.f64.f64 %fd1002, %fd960; + setp.eq.f64 %p1206, %fd1002, 0d4000000000000000; + @%p1206 bra $L__BB6_809; + + mov.f64 %fd1203, 0dFFF8000000000000; + +$L__BB6_809: + selp.f64 %fd1204, %fd1203, %fd431, %p986; + @%p66 bra $L__BB6_814; + + setp.eq.s32 %p1208, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1290, %temp}, %fd960; + } + setp.eq.s32 %p1209, %r1290, 0; + and.pred %p1210, %p1208, %p1209; + @%p1210 bra $L__BB6_813; + bra.uni $L__BB6_811; + +$L__BB6_813: + mov.u32 %r1293, 0; + mov.b64 %fd1204, {%r1293, %r155}; + bra.uni $L__BB6_814; + +$L__BB6_811: + setp.ne.s32 %p1211, %r156, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1291, %temp}, %fd430; + } + setp.ne.s32 %p1212, %r1291, 0; + or.pred %p1213, %p1211, %p1212; + mov.f64 %fd1204, %fd1203; + @%p1213 bra $L__BB6_814; + + mov.u32 %r1292, 0; + mov.b64 %fd1204, {%r1292, %r159}; + +$L__BB6_814: + not.pred %p1214, %p56; + mov.f64 %fd1206, %fd450; + @%p1214 bra $L__BB6_816; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1294}, %fd450; + } + xor.b32 %r1295, %r1294, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1296, %temp}, %fd450; + } + mov.b64 %fd1206, {%r1296, %r1295}; + +$L__BB6_816: + @%p1204 bra $L__BB6_820; + bra.uni $L__BB6_817; + +$L__BB6_820: + mov.u32 %r1297, 0; + mov.b64 %fd1206, {%r1297, %r157}; + bra.uni $L__BB6_821; + +$L__BB6_817: + setp.gt.s32 %p1216, %r152, -1; + @%p1216 bra $L__BB6_821; + + cvt.rzi.f64.f64 %fd1006, %fd961; + setp.eq.f64 %p1217, %fd1006, 0d4008000000000000; + @%p1217 bra $L__BB6_821; + + mov.f64 %fd1206, 0dFFF8000000000000; + +$L__BB6_821: + selp.f64 %fd1207, %fd1206, %fd434, %p989; + @%p67 bra $L__BB6_826; + + setp.eq.s32 %p1219, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1298, %temp}, %fd961; + } + setp.eq.s32 %p1220, %r1298, 0; + and.pred %p1221, %p1219, %p1220; + @%p1221 bra $L__BB6_825; + bra.uni $L__BB6_823; + +$L__BB6_825: + mov.u32 %r1301, 0; + mov.b64 %fd1207, {%r1301, %r161}; + bra.uni $L__BB6_826; + +$L__BB6_823: + setp.ne.s32 %p1222, %r156, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1299, %temp}, %fd430; + } + setp.ne.s32 %p1223, %r1299, 0; + or.pred %p1224, %p1222, %p1223; + mov.f64 %fd1207, %fd1206; + @%p1224 bra $L__BB6_826; + + mov.u32 %r1300, 0; + mov.b64 %fd1207, {%r1300, %r164}; + +$L__BB6_826: + not.pred %p1225, %p57; + mov.f64 %fd1209, %fd451; + @%p1225 bra $L__BB6_828; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1302}, %fd451; + } + xor.b32 %r1303, %r1302, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1304, %temp}, %fd451; + } + mov.b64 %fd1209, {%r1304, %r1303}; + +$L__BB6_828: + setp.eq.f32 %p1226, %f553, 0f00000000; + @%p1226 bra $L__BB6_832; + bra.uni $L__BB6_829; + +$L__BB6_832: + mov.u32 %r1305, 0; + mov.b64 %fd1209, {%r1305, %r162}; + bra.uni $L__BB6_833; + +$L__BB6_829: + setp.gt.s32 %p1227, %r160, -1; + @%p1227 bra $L__BB6_833; + + cvt.rzi.f64.f64 %fd1010, %fd962; + setp.eq.f64 %p1228, %fd1010, 0d4010000000000000; + @%p1228 bra $L__BB6_833; + + mov.f64 %fd1209, 0dFFF8000000000000; + +$L__BB6_833: + selp.f64 %fd1210, %fd1209, %fd436, %p992; + @%p68 bra $L__BB6_838; + + setp.eq.s32 %p1230, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1306, %temp}, %fd962; + } + setp.eq.s32 %p1231, %r1306, 0; + and.pred %p1232, %p1230, %p1231; + @%p1232 bra $L__BB6_837; + bra.uni $L__BB6_835; + +$L__BB6_837: + mov.u32 %r1309, 0; + mov.b64 %fd1210, {%r1309, %r166}; + bra.uni $L__BB6_838; + +$L__BB6_835: + setp.ne.s32 %p1233, %r167, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1307, %temp}, %fd435; + } + setp.ne.s32 %p1234, %r1307, 0; + or.pred %p1235, %p1233, %p1234; + mov.f64 %fd1210, %fd1209; + @%p1235 bra $L__BB6_838; + + mov.u32 %r1308, 0; + mov.b64 %fd1210, {%r1308, %r170}; + +$L__BB6_838: + setp.eq.f32 %p1236, %f553, 0f3F800000; + selp.f64 %fd1013, 0d3FF0000000000000, %fd1210, %p1236; + setp.eq.f32 %p1237, %f320, 0f3F800000; + selp.f64 %fd1014, 0d3FF0000000000000, %fd1207, %p1237; + mul.f64 %fd1015, %fd1014, %fd433; + div.rn.f64 %fd1016, %fd1015, %fd1013; + selp.f64 %fd1017, 0d3FF0000000000000, %fd1204, %p1237; + mul.f64 %fd1018, %fd1017, %fd429; + div.rn.f64 %fd1019, %fd1018, %fd432; + add.f64 %fd1020, %fd1019, %fd428; + add.f64 %fd1021, %fd1020, %fd1016; + cvt.rn.f32.f64 %f2744, %fd1021; + div.rn.f32 %f2745, %f325, %f366; + mul.f32 %f499, %f2745, %f2744; + not.pred %p1238, %p58; + mov.f64 %fd1212, %fd452; + @%p1238 bra $L__BB6_840; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1310}, %fd452; + } + xor.b32 %r1311, %r1310, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1312, %temp}, %fd452; + } + mov.b64 %fd1212, {%r1312, %r1311}; + +$L__BB6_840: + setp.eq.f32 %p1239, %f322, 0f00000000; + @%p1239 bra $L__BB6_844; + bra.uni $L__BB6_841; + +$L__BB6_844: + mov.u32 %r1313, 0; + mov.b64 %fd1212, {%r1313, %r168}; + bra.uni $L__BB6_845; + +$L__BB6_841: + setp.gt.s32 %p1240, %r165, -1; + @%p1240 bra $L__BB6_845; + + cvt.rzi.f64.f64 %fd1023, %fd960; + setp.eq.f64 %p1241, %fd1023, 0d4000000000000000; + @%p1241 bra $L__BB6_845; + + mov.f64 %fd1212, 0dFFF8000000000000; + +$L__BB6_845: + selp.f64 %fd1213, %fd1212, %fd440, %p998; + @%p69 bra $L__BB6_850; + + setp.eq.s32 %p1243, %r124, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1314, %temp}, %fd960; + } + setp.eq.s32 %p1244, %r1314, 0; + and.pred %p1245, %p1243, %p1244; + @%p1245 bra $L__BB6_849; + bra.uni $L__BB6_847; + +$L__BB6_849: + mov.u32 %r1317, 0; + mov.b64 %fd1213, {%r1317, %r171}; + bra.uni $L__BB6_850; + +$L__BB6_847: + setp.ne.s32 %p1246, %r172, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1315, %temp}, %fd439; + } + setp.ne.s32 %p1247, %r1315, 0; + or.pred %p1248, %p1246, %p1247; + mov.f64 %fd1213, %fd1212; + @%p1248 bra $L__BB6_850; + + mov.u32 %r1316, 0; + mov.b64 %fd1213, {%r1316, %r175}; + +$L__BB6_850: + not.pred %p1249, %p59; + mov.f64 %fd1215, %fd453; + @%p1249 bra $L__BB6_852; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1318}, %fd453; + } + xor.b32 %r1319, %r1318, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1320, %temp}, %fd453; + } + mov.b64 %fd1215, {%r1320, %r1319}; + +$L__BB6_852: + @%p1239 bra $L__BB6_856; + bra.uni $L__BB6_853; + +$L__BB6_856: + mov.u32 %r1321, 0; + mov.b64 %fd1215, {%r1321, %r173}; + bra.uni $L__BB6_857; + +$L__BB6_853: + setp.gt.s32 %p1251, %r165, -1; + @%p1251 bra $L__BB6_857; + + cvt.rzi.f64.f64 %fd1027, %fd961; + setp.eq.f64 %p1252, %fd1027, 0d4008000000000000; + @%p1252 bra $L__BB6_857; + + mov.f64 %fd1215, 0dFFF8000000000000; + +$L__BB6_857: + selp.f64 %fd1216, %fd1215, %fd442, %p1002; + @%p70 bra $L__BB6_862; + + setp.eq.s32 %p1254, %r130, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1322, %temp}, %fd961; + } + setp.eq.s32 %p1255, %r1322, 0; + and.pred %p1256, %p1254, %p1255; + @%p1256 bra $L__BB6_861; + bra.uni $L__BB6_859; + +$L__BB6_861: + mov.u32 %r1325, 0; + mov.b64 %fd1216, {%r1325, %r176}; + bra.uni $L__BB6_862; + +$L__BB6_859: + setp.ne.s32 %p1257, %r172, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1323, %temp}, %fd439; + } + setp.ne.s32 %p1258, %r1323, 0; + or.pred %p1259, %p1257, %p1258; + mov.f64 %fd1216, %fd1215; + @%p1259 bra $L__BB6_862; + + mov.u32 %r1324, 0; + mov.b64 %fd1216, {%r1324, %r177}; + +$L__BB6_862: + mov.f64 %fd1218, %fd451; + @%p1225 bra $L__BB6_864; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r1326}, %fd451; + } + xor.b32 %r1327, %r1326, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r1328, %temp}, %fd451; + } + mov.b64 %fd1218, {%r1328, %r1327}; + +$L__BB6_864: + @%p1226 bra $L__BB6_868; + bra.uni $L__BB6_865; + +$L__BB6_868: + mov.u32 %r1329, 0; + mov.b64 %fd1218, {%r1329, %r162}; + bra.uni $L__BB6_869; + +$L__BB6_865: + setp.gt.s32 %p1262, %r160, -1; + @%p1262 bra $L__BB6_869; + + cvt.rzi.f64.f64 %fd1031, %fd962; + setp.eq.f64 %p1263, %fd1031, 0d4010000000000000; + @%p1263 bra $L__BB6_869; + + mov.f64 %fd1218, 0dFFF8000000000000; + +$L__BB6_869: + selp.f64 %fd1219, %fd1218, %fd436, %p992; + @%p68 bra $L__BB6_874; + + setp.eq.s32 %p1265, %r136, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1330, %temp}, %fd962; + } + setp.eq.s32 %p1266, %r1330, 0; + and.pred %p1267, %p1265, %p1266; + @%p1267 bra $L__BB6_873; + bra.uni $L__BB6_871; + +$L__BB6_873: + mov.u32 %r1333, 0; + mov.b64 %fd1219, {%r1333, %r166}; + bra.uni $L__BB6_874; + +$L__BB6_871: + setp.ne.s32 %p1268, %r167, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r1331, %temp}, %fd435; + } + setp.ne.s32 %p1269, %r1331, 0; + or.pred %p1270, %p1268, %p1269; + mov.f64 %fd1219, %fd1218; + @%p1270 bra $L__BB6_874; + + mov.u32 %r1332, 0; + mov.b64 %fd1219, {%r1332, %r170}; + +$L__BB6_874: + selp.f64 %fd1034, 0d3FF0000000000000, %fd1219, %p1236; + setp.eq.f32 %p1272, %f322, 0f3F800000; + selp.f64 %fd1035, 0d3FF0000000000000, %fd1216, %p1272; + mul.f64 %fd1036, %fd1035, %fd441; + div.rn.f64 %fd1037, %fd1036, %fd1034; + selp.f64 %fd1038, 0d3FF0000000000000, %fd1213, %p1272; + mul.f64 %fd1039, %fd1038, %fd438; + div.rn.f64 %fd1040, %fd1039, %fd432; + add.f64 %fd1041, %fd1040, %fd437; + add.f64 %fd1042, %fd1041, %fd1037; + cvt.rn.f32.f64 %f2746, %fd1042; + div.rn.f32 %f2747, %f326, %f368; + mul.f32 %f2748, %f2747, %f2746; + mul.f32 %f2749, %f379, %f498; + mul.f32 %f2750, %f2749, %f2748; + fma.rn.f32 %f2751, %f472, %f499, %f2750; + mul.f32 %f2752, %f379, %f3293; + fma.rn.f32 %f500, %f392, %f2752, %f3292; + mad.lo.s32 %r1334, %r1377, %r182, %r1376; + add.s32 %r1335, %r1334, %r2; + mul.wide.s32 %rd36, %r1335, 4; + add.s64 %rd37, %rd1, %rd36; + ld.global.f32 %f501, [%rd37]; + mul.f32 %f2753, %f419, %f419; + div.rn.f32 %f2754, %f2753, %f500; + add.f32 %f3324, %f3324, %f2754; + mul.f32 %f2755, %f446, %f419; + div.rn.f32 %f2756, %f2755, %f500; + add.f32 %f3323, %f3323, %f2756; + mul.f32 %f2757, %f379, %f392; + mul.f32 %f2758, %f2757, %f419; + div.rn.f32 %f2759, %f2758, %f500; + add.f32 %f3322, %f3322, %f2759; + div.rn.f32 %f2760, %f419, %f500; + add.f32 %f3321, %f3321, %f2760; + mul.f32 %f2761, %f2751, %f419; + div.rn.f32 %f2762, %f2761, %f500; + add.f32 %f3320, %f3320, %f2762; + mul.f32 %f2763, %f446, %f446; + div.rn.f32 %f2764, %f2763, %f500; + add.f32 %f3319, %f3319, %f2764; + mul.f32 %f2765, %f2757, %f446; + div.rn.f32 %f2766, %f2765, %f500; + add.f32 %f3318, %f3318, %f2766; + div.rn.f32 %f2767, %f446, %f500; + add.f32 %f3317, %f3317, %f2767; + mul.f32 %f2768, %f2751, %f446; + div.rn.f32 %f2769, %f2768, %f500; + add.f32 %f3316, %f3316, %f2769; + mul.f32 %f2770, %f2757, %f2757; + div.rn.f32 %f2771, %f2770, %f500; + add.f32 %f3315, %f3315, %f2771; + div.rn.f32 %f2772, %f2757, %f500; + add.f32 %f3314, %f3314, %f2772; + mul.f32 %f2773, %f2751, %f2757; + div.rn.f32 %f2774, %f2773, %f500; + add.f32 %f3313, %f3313, %f2774; + rcp.rn.f32 %f2775, %f500; + add.f32 %f3325, %f3325, %f2775; + div.rn.f32 %f2776, %f2751, %f500; + add.f32 %f3326, %f3326, %f2776; + mul.f32 %f2777, %f2751, %f2751; + div.rn.f32 %f2778, %f2777, %f500; + add.f32 %f3327, %f3327, %f2778; + add.f32 %f517, %f3290, %f500; + setp.leu.f32 %p1273, %f517, 0f00000000; + @%p1273 bra $L__BB6_882; + + add.f32 %f518, %f3290, %f501; + setp.gt.f32 %p1274, %f518, 0f00000000; + @%p1274 bra $L__BB6_877; + bra.uni $L__BB6_876; + +$L__BB6_877: + setp.lt.f32 %p1275, %f517, 0f00800000; + mul.f32 %f2781, %f517, 0f4B000000; + selp.f32 %f520, %f2781, %f517, %p1275; + selp.f32 %f2782, 0fC1B80000, 0f00000000, %p1275; + mov.b32 %r1336, %f520; + add.s32 %r1337, %r1336, -1059760811; + and.b32 %r1338, %r1337, -8388608; + sub.s32 %r1339, %r1336, %r1338; + mov.b32 %f2783, %r1339; + cvt.rn.f32.s32 %f2784, %r1338; + mov.f32 %f2785, 0f34000000; + fma.rn.f32 %f2786, %f2784, %f2785, %f2782; + add.f32 %f2787, %f2783, 0fBF800000; + mov.f32 %f2788, 0f3E1039F6; + mov.f32 %f2789, 0fBE055027; + fma.rn.f32 %f2790, %f2789, %f2787, %f2788; + mov.f32 %f2791, 0fBDF8CDCC; + fma.rn.f32 %f2792, %f2790, %f2787, %f2791; + mov.f32 %f2793, 0f3E0F2955; + fma.rn.f32 %f2794, %f2792, %f2787, %f2793; + mov.f32 %f2795, 0fBE2AD8B9; + fma.rn.f32 %f2796, %f2794, %f2787, %f2795; + mov.f32 %f2797, 0f3E4CED0B; + fma.rn.f32 %f2798, %f2796, %f2787, %f2797; + mov.f32 %f2799, 0fBE7FFF22; + fma.rn.f32 %f2800, %f2798, %f2787, %f2799; + mov.f32 %f2801, 0f3EAAAA78; + fma.rn.f32 %f2802, %f2800, %f2787, %f2801; + mov.f32 %f2803, 0fBF000000; + fma.rn.f32 %f2804, %f2802, %f2787, %f2803; + mul.f32 %f2805, %f2787, %f2804; + fma.rn.f32 %f2806, %f2805, %f2787, %f2787; + mov.f32 %f2807, 0f3F317218; + fma.rn.f32 %f3357, %f2786, %f2807, %f2806; + setp.lt.u32 %p1276, %r1336, 2139095040; + @%p1276 bra $L__BB6_879; + + mov.f32 %f2808, 0f7F800000; + fma.rn.f32 %f3357, %f520, %f2808, %f2808; + +$L__BB6_879: + setp.eq.f32 %p1277, %f520, 0f00000000; + selp.f32 %f2809, 0fFF800000, %f3357, %p1277; + mul.f32 %f2810, %f518, %f2809; + sub.f32 %f524, %f2810, %f500; + mul.f32 %f2811, %f518, 0f4B000000; + setp.lt.f32 %p1278, %f518, 0f00800000; + selp.f32 %f525, %f2811, %f518, %p1278; + selp.f32 %f2812, 0fC1B80000, 0f00000000, %p1278; + mov.b32 %r1340, %f525; + add.s32 %r1341, %r1340, -1059760811; + and.b32 %r1342, %r1341, -8388608; + sub.s32 %r1343, %r1340, %r1342; + mov.b32 %f2813, %r1343; + cvt.rn.f32.s32 %f2814, %r1342; + fma.rn.f32 %f2816, %f2814, %f2785, %f2812; + add.f32 %f2817, %f2813, 0fBF800000; + fma.rn.f32 %f2820, %f2789, %f2817, %f2788; + fma.rn.f32 %f2822, %f2820, %f2817, %f2791; + fma.rn.f32 %f2824, %f2822, %f2817, %f2793; + fma.rn.f32 %f2826, %f2824, %f2817, %f2795; + fma.rn.f32 %f2828, %f2826, %f2817, %f2797; + fma.rn.f32 %f2830, %f2828, %f2817, %f2799; + fma.rn.f32 %f2832, %f2830, %f2817, %f2801; + fma.rn.f32 %f2834, %f2832, %f2817, %f2803; + mul.f32 %f2835, %f2817, %f2834; + fma.rn.f32 %f2836, %f2835, %f2817, %f2817; + fma.rn.f32 %f3358, %f2816, %f2807, %f2836; + setp.lt.u32 %p1279, %r1340, 2139095040; + @%p1279 bra $L__BB6_881; + + mov.f32 %f2838, 0f7F800000; + fma.rn.f32 %f3358, %f525, %f2838, %f2838; + +$L__BB6_881: + setp.eq.f32 %p1280, %f525, 0f00000000; + selp.f32 %f2839, 0fFF800000, %f3358, %p1280; + mul.f32 %f2840, %f518, %f2839; + sub.f32 %f2841, %f524, %f2840; + add.f32 %f2842, %f501, %f2841; + add.f32 %f3359, %f3359, %f2842; + bra.uni $L__BB6_882; + +$L__BB6_876: + neg.f32 %f2779, %f500; + sub.f32 %f2780, %f2779, %f3290; + add.f32 %f3359, %f3359, %f2780; + +$L__BB6_882: + add.s32 %r1377, %r1377, 1; + setp.lt.s32 %p1281, %r1377, %r182; + @%p1281 bra $L__BB6_626; + + add.s32 %r1376, %r1376, 1; + setp.lt.s32 %p1282, %r1376, %r182; + @%p1282 bra $L__BB6_625; + +$L__BB6_884: + ld.param.u64 %rd59, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_17]; + ld.param.u64 %rd58, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_16]; + ld.param.u32 %r1363, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_18]; + mul.wide.s32 %rd57, %r1, 4; + ld.param.u64 %rd56, [_Z25kernel_MLEFit_SCMOSXYNBZ_PKfS0_S0_S0_ffffffffiiiPfS1_S1_i_param_15]; + rcp.rn.f32 %f2843, %f3324; + mov.f32 %f2844, 0f3F800000; + mul.f32 %f2845, %f2843, %f3323; + mul.f32 %f2846, %f2843, %f3322; + mul.f32 %f2847, %f2843, %f3321; + mul.f32 %f2848, %f2843, %f3320; + fma.rn.f32 %f2849, %f2845, %f3323, 0f00000000; + sub.f32 %f2851, %f3319, %f2849; + fma.rn.f32 %f2852, %f2846, %f3323, 0f00000000; + rcp.rn.f32 %f2853, %f2851; + sub.f32 %f2854, %f3318, %f2852; + mul.f32 %f2855, %f2853, %f2854; + fma.rn.f32 %f2856, %f2847, %f3323, 0f00000000; + sub.f32 %f2857, %f3317, %f2856; + mul.f32 %f2858, %f2853, %f2857; + fma.rn.f32 %f2859, %f2848, %f3323, 0f00000000; + sub.f32 %f2860, %f3316, %f2859; + mul.f32 %f2861, %f2853, %f2860; + fma.rn.f32 %f2862, %f2845, %f3322, 0f00000000; + sub.f32 %f2863, %f3318, %f2862; + fma.rn.f32 %f2864, %f2846, %f3322, 0f00000000; + fma.rn.f32 %f2865, %f2855, %f2863, %f2864; + sub.f32 %f2866, %f3315, %f2865; + fma.rn.f32 %f2867, %f2847, %f3322, 0f00000000; + fma.rn.f32 %f2868, %f2858, %f2863, %f2867; + rcp.rn.f32 %f2869, %f2866; + sub.f32 %f2870, %f3314, %f2868; + mul.f32 %f2871, %f2869, %f2870; + fma.rn.f32 %f2872, %f2848, %f3322, 0f00000000; + fma.rn.f32 %f2873, %f2861, %f2863, %f2872; + sub.f32 %f2874, %f3313, %f2873; + mul.f32 %f2875, %f2869, %f2874; + fma.rn.f32 %f2876, %f2845, %f3321, 0f00000000; + sub.f32 %f2877, %f3317, %f2876; + fma.rn.f32 %f2878, %f2846, %f3321, 0f00000000; + fma.rn.f32 %f2879, %f2855, %f2877, %f2878; + sub.f32 %f2880, %f3314, %f2879; + fma.rn.f32 %f2881, %f2847, %f3321, 0f00000000; + fma.rn.f32 %f2882, %f2858, %f2877, %f2881; + fma.rn.f32 %f2883, %f2871, %f2880, %f2882; + sub.f32 %f2884, %f3325, %f2883; + fma.rn.f32 %f2885, %f2848, %f3321, 0f00000000; + fma.rn.f32 %f2886, %f2861, %f2877, %f2885; + fma.rn.f32 %f2887, %f2875, %f2880, %f2886; + rcp.rn.f32 %f2888, %f2884; + sub.f32 %f2889, %f3326, %f2887; + mul.f32 %f2890, %f2888, %f2889; + fma.rn.f32 %f2891, %f2845, %f3320, 0f00000000; + sub.f32 %f2892, %f3316, %f2891; + fma.rn.f32 %f2893, %f2846, %f3320, 0f00000000; + fma.rn.f32 %f2894, %f2855, %f2892, %f2893; + sub.f32 %f2895, %f3313, %f2894; + fma.rn.f32 %f2896, %f2847, %f3320, 0f00000000; + fma.rn.f32 %f2897, %f2858, %f2892, %f2896; + fma.rn.f32 %f2898, %f2871, %f2895, %f2897; + sub.f32 %f2899, %f3326, %f2898; + fma.rn.f32 %f2900, %f2848, %f3320, 0f00000000; + fma.rn.f32 %f2901, %f2861, %f2892, %f2900; + fma.rn.f32 %f2902, %f2875, %f2895, %f2901; + fma.rn.f32 %f2903, %f2890, %f2899, %f2902; + sub.f32 %f2904, %f3327, %f2903; + add.f32 %f2905, %f2845, 0f00000000; + sub.f32 %f2906, %f1807, %f2905; + add.f32 %f2907, %f2846, 0f00000000; + fma.rn.f32 %f2908, %f2855, %f2906, %f2907; + sub.f32 %f2909, %f1807, %f2908; + add.f32 %f2910, %f2847, 0f00000000; + fma.rn.f32 %f2911, %f2858, %f2906, %f2910; + fma.rn.f32 %f2912, %f2871, %f2909, %f2911; + sub.f32 %f2913, %f1807, %f2912; + add.f32 %f2914, %f2848, 0f00000000; + fma.rn.f32 %f2915, %f2861, %f2906, %f2914; + fma.rn.f32 %f2916, %f2875, %f2909, %f2915; + fma.rn.f32 %f2917, %f2890, %f2913, %f2916; + sub.f32 %f2918, %f1807, %f2917; + div.rn.f32 %f2919, %f2918, %f2904; + fma.rn.f32 %f2920, %f2899, %f2919, 0f00000000; + sub.f32 %f2921, %f2913, %f2920; + mul.f32 %f2922, %f2888, %f2921; + fma.rn.f32 %f2923, %f2880, %f2922, 0f00000000; + fma.rn.f32 %f2924, %f2895, %f2919, %f2923; + sub.f32 %f2925, %f2909, %f2924; + mul.f32 %f2926, %f2869, %f2925; + fma.rn.f32 %f2927, %f2863, %f2926, 0f00000000; + fma.rn.f32 %f2928, %f2877, %f2922, %f2927; + fma.rn.f32 %f2929, %f2892, %f2919, %f2928; + sub.f32 %f2930, %f2906, %f2929; + mul.f32 %f2931, %f2853, %f2930; + fma.rn.f32 %f2932, %f3323, %f2931, 0f00000000; + fma.rn.f32 %f2933, %f3322, %f2926, %f2932; + fma.rn.f32 %f2934, %f3321, %f2922, %f2933; + fma.rn.f32 %f2935, %f3320, %f2919, %f2934; + sub.f32 %f2936, %f2844, %f2935; + mul.f32 %f2937, %f2843, %f2936; + fma.rn.f32 %f2938, %f2845, 0f00000000, 0f00000000; + sub.f32 %f2939, %f2844, %f2938; + fma.rn.f32 %f2940, %f2846, 0f00000000, 0f00000000; + fma.rn.f32 %f2941, %f2855, %f2939, %f2940; + sub.f32 %f2942, %f1807, %f2941; + fma.rn.f32 %f2943, %f2847, 0f00000000, 0f00000000; + fma.rn.f32 %f2944, %f2858, %f2939, %f2943; + fma.rn.f32 %f2945, %f2871, %f2942, %f2944; + sub.f32 %f2946, %f1807, %f2945; + fma.rn.f32 %f2947, %f2848, 0f00000000, 0f00000000; + fma.rn.f32 %f2948, %f2861, %f2939, %f2947; + fma.rn.f32 %f2949, %f2875, %f2942, %f2948; + fma.rn.f32 %f2950, %f2890, %f2946, %f2949; + sub.f32 %f2951, %f1807, %f2950; + div.rn.f32 %f2952, %f2951, %f2904; + fma.rn.f32 %f2953, %f2899, %f2952, 0f00000000; + sub.f32 %f2954, %f2946, %f2953; + mul.f32 %f2955, %f2888, %f2954; + fma.rn.f32 %f2956, %f2880, %f2955, 0f00000000; + fma.rn.f32 %f2957, %f2895, %f2952, %f2956; + sub.f32 %f2958, %f2942, %f2957; + mul.f32 %f2959, %f2869, %f2958; + fma.rn.f32 %f2960, %f2863, %f2959, 0f00000000; + fma.rn.f32 %f2961, %f2877, %f2955, %f2960; + fma.rn.f32 %f2962, %f2892, %f2952, %f2961; + sub.f32 %f2963, %f2939, %f2962; + mul.f32 %f2964, %f2853, %f2963; + sub.f32 %f2965, %f1807, %f2938; + fma.rn.f32 %f2966, %f2855, %f2965, %f2940; + sub.f32 %f2967, %f2844, %f2966; + fma.rn.f32 %f2968, %f2858, %f2965, %f2943; + fma.rn.f32 %f2969, %f2871, %f2967, %f2968; + sub.f32 %f2970, %f1807, %f2969; + fma.rn.f32 %f2971, %f2861, %f2965, %f2947; + fma.rn.f32 %f2972, %f2875, %f2967, %f2971; + fma.rn.f32 %f2973, %f2890, %f2970, %f2972; + sub.f32 %f2974, %f1807, %f2973; + div.rn.f32 %f2975, %f2974, %f2904; + fma.rn.f32 %f2976, %f2899, %f2975, 0f00000000; + sub.f32 %f2977, %f2970, %f2976; + mul.f32 %f2978, %f2888, %f2977; + fma.rn.f32 %f2979, %f2880, %f2978, 0f00000000; + fma.rn.f32 %f2980, %f2895, %f2975, %f2979; + sub.f32 %f2981, %f2967, %f2980; + mul.f32 %f2982, %f2869, %f2981; + sub.f32 %f2983, %f1807, %f2966; + fma.rn.f32 %f2984, %f2871, %f2983, %f2968; + sub.f32 %f2985, %f2844, %f2984; + fma.rn.f32 %f2986, %f2875, %f2983, %f2971; + fma.rn.f32 %f2987, %f2890, %f2985, %f2986; + sub.f32 %f2988, %f1807, %f2987; + div.rn.f32 %f2989, %f2988, %f2904; + fma.rn.f32 %f2990, %f2899, %f2989, 0f00000000; + sub.f32 %f2991, %f2985, %f2990; + mul.f32 %f2992, %f2888, %f2991; + sub.f32 %f2993, %f1807, %f2984; + fma.rn.f32 %f2994, %f2890, %f2993, %f2986; + sub.f32 %f2995, %f2844, %f2994; + div.rn.f32 %f2996, %f2995, %f2904; + cvta.to.global.u64 %rd38, %rd56; + add.s64 %rd40, %rd38, %rd57; + st.global.f32 [%rd40], %f3295; + add.s32 %r1348, %r1, %r1363; + mul.wide.s32 %rd41, %r1363, 4; + add.s64 %rd42, %rd40, %rd41; + st.global.f32 [%rd42], %f3294; + add.s32 %r1349, %r1348, %r1363; + shl.b32 %r1350, %r1363, 3; + cvt.s64.s32 %rd43, %r1350; + add.s64 %rd44, %rd40, %rd43; + st.global.f32 [%rd44], %f3293; + add.s32 %r1351, %r1349, %r1363; + mul.wide.s32 %rd45, %r1351, 4; + add.s64 %rd46, %rd38, %rd45; + st.global.f32 [%rd46], %f3292; + add.s64 %rd47, %rd44, %rd43; + st.global.f32 [%rd47], %f3291; + cvta.to.global.u64 %rd48, %rd58; + add.s64 %rd49, %rd48, %rd57; + st.global.f32 [%rd49], %f2937; + add.s64 %rd50, %rd49, %rd41; + st.global.f32 [%rd50], %f2964; + add.s64 %rd51, %rd49, %rd43; + st.global.f32 [%rd51], %f2982; + add.s64 %rd52, %rd48, %rd45; + st.global.f32 [%rd52], %f2992; + add.s64 %rd53, %rd51, %rd43; + st.global.f32 [%rd53], %f2996; + cvta.to.global.u64 %rd54, %rd59; + add.s64 %rd55, %rd54, %rd57; + st.global.f32 [%rd55], %f3359; + +$L__BB6_885: ret; -} +} // .globl _Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i .visible .entry _Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i( .param .u64 _Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_0, @@ -25910,4118 +44638,6341 @@ BB6_227: .param .u32 _Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_10 ) { - .local .align 16 .b8 __local_depot7[144]; - .reg .b64 %SP; - .reg .b64 %SPL; - .reg .pred %p<375>; - .reg .f32 %f<3407>; - .reg .b32 %r<342>; - .reg .b64 %rd<156>; - - - mov.u64 %SPL, __local_depot7; - ld.param.u64 %rd69, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_0]; - ld.param.u64 %rd70, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_1]; - ld.param.u64 %rd71, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_2]; - ld.param.f32 %f3338, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_3]; - ld.param.u32 %r71, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_4]; - ld.param.u32 %r72, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_5]; - ld.param.u32 %r73, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_6]; - ld.param.u32 %r74, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_10]; - cvta.to.global.u64 %rd1, %rd69; - add.u64 %rd2, %SPL, 0; - mov.u32 %r75, %ntid.x; - mov.u32 %r76, %ctaid.x; - mov.u32 %r77, %tid.x; - mad.lo.s32 %r1, %r75, %r76, %r77; - setp.ge.s32 %p13, %r1, %r74; - @%p13 bra BB7_228; - - mov.u32 %r78, 0; - mov.u64 %rd135, %rd2; - mov.u32 %r314, %r78; - -BB7_2: - st.local.u32 [%rd135], %r78; - add.s64 %rd135, %rd135, 4; - add.s32 %r314, %r314, 1; - setp.lt.u32 %p14, %r314, 36; - @%p14 bra BB7_2; - - mul.lo.s32 %r80, %r71, %r71; - mul.lo.s32 %r4, %r80, %r1; - mov.f32 %f629, 0f00000000; - setp.lt.s32 %p15, %r71, 1; - mov.f32 %f1, %f629; - mov.f32 %f2, %f629; - mov.f32 %f3, %f629; - @%p15 bra BB7_17; - - and.b32 %r5, %r71, 3; - shl.b32 %r6, %r71, 2; - mov.f32 %f632, 0f00000000; - mov.u32 %r81, 0; - mov.u32 %r315, %r81; - mov.f32 %f1, %f632; - mov.f32 %f2, %f632; - mov.f32 %f3, %f632; - -BB7_5: - cvt.rn.f32.s32 %f4, %r315; - setp.eq.s32 %p16, %r5, 0; - @%p16 bra BB7_6; - - setp.eq.s32 %p17, %r5, 1; - @%p17 bra BB7_8; - bra.uni BB7_9; - -BB7_8: - mov.u32 %r317, %r81; - bra.uni BB7_13; - -BB7_6: - mov.u32 %r319, %r81; - mov.f32 %f3235, %f1; - mov.f32 %f3236, %f2; - mov.f32 %f3237, %f3; - mov.f32 %f1, %f632; - mov.f32 %f2, %f632; - mov.f32 %f3, %f632; - bra.uni BB7_14; - -BB7_9: - setp.eq.s32 %p18, %r5, 2; - @%p18 bra BB7_10; - bra.uni BB7_11; - -BB7_10: - mov.u32 %r316, %r81; - bra.uni BB7_12; - -BB7_11: - add.s32 %r86, %r315, %r4; - mul.wide.s32 %rd77, %r86, 4; - add.s64 %rd78, %rd1, %rd77; - ld.global.f32 %f636, [%rd78]; - fma.rn.f32 %f3, %f4, %f636, %f3; - fma.rn.f32 %f2, %f636, 0f00000000, %f2; - add.f32 %f1, %f1, %f636; - mov.u32 %r316, 1; - -BB7_12: - neg.s32 %r87, %r316; - and.b32 %r88, %r87, %r71; - add.s32 %r89, %r88, %r315; - add.s32 %r90, %r89, %r4; - mul.wide.s32 %rd79, %r90, 4; - add.s64 %rd80, %rd1, %rd79; - ld.global.f32 %f637, [%rd80]; - fma.rn.f32 %f3, %f4, %f637, %f3; - cvt.rn.f32.s32 %f638, %r316; - fma.rn.f32 %f2, %f638, %f637, %f2; - add.f32 %f1, %f1, %f637; - add.s32 %r317, %r316, 1; - -BB7_13: - mad.lo.s32 %r91, %r317, %r71, %r315; - add.s32 %r92, %r91, %r4; - mul.wide.s32 %rd81, %r92, 4; - add.s64 %rd82, %rd1, %rd81; - ld.global.f32 %f639, [%rd82]; - fma.rn.f32 %f3237, %f4, %f639, %f3; - cvt.rn.f32.s32 %f640, %r317; - fma.rn.f32 %f3236, %f640, %f639, %f2; - add.f32 %f3235, %f1, %f639; - add.s32 %r319, %r317, 1; - mov.f32 %f1, %f3235; - mov.f32 %f2, %f3236; - mov.f32 %f3, %f3237; - -BB7_14: - setp.lt.u32 %p19, %r71, 4; - @%p19 bra BB7_16; - -BB7_15: - mad.lo.s32 %r93, %r319, %r71, %r315; - add.s32 %r94, %r93, %r4; - mul.wide.s32 %rd83, %r94, 4; - add.s64 %rd84, %rd1, %rd83; - ld.global.f32 %f641, [%rd84]; - fma.rn.f32 %f642, %f4, %f641, %f3237; - cvt.rn.f32.s32 %f643, %r319; - fma.rn.f32 %f644, %f643, %f641, %f3236; - add.f32 %f645, %f3235, %f641; - cvt.s64.s32 %rd85, %r6; - add.s64 %rd86, %rd84, %rd85; - ld.global.f32 %f646, [%rd86]; - fma.rn.f32 %f647, %f4, %f646, %f642; - add.s32 %r95, %r319, 1; - cvt.rn.f32.s32 %f648, %r95; - fma.rn.f32 %f649, %f648, %f646, %f644; - add.f32 %f650, %f645, %f646; - add.s64 %rd87, %rd86, %rd85; - ld.global.f32 %f651, [%rd87]; - fma.rn.f32 %f652, %f4, %f651, %f647; - add.s32 %r96, %r319, 2; - cvt.rn.f32.s32 %f653, %r96; - fma.rn.f32 %f654, %f653, %f651, %f649; - add.f32 %f655, %f650, %f651; - add.s64 %rd88, %rd87, %rd85; - ld.global.f32 %f656, [%rd88]; - fma.rn.f32 %f3237, %f4, %f656, %f652; - add.s32 %r97, %r319, 3; - cvt.rn.f32.s32 %f657, %r97; - fma.rn.f32 %f3236, %f657, %f656, %f654; - add.f32 %f3235, %f655, %f656; - add.s32 %r319, %r319, 4; - setp.lt.s32 %p20, %r319, %r71; - mov.f32 %f1, %f3235; - mov.f32 %f2, %f3236; - mov.f32 %f3, %f3237; - @%p20 bra BB7_15; - -BB7_16: - add.s32 %r315, %r315, 1; - setp.lt.s32 %p21, %r315, %r71; - @%p21 bra BB7_5; - -BB7_17: - div.rn.f32 %f3343, %f3, %f1; - div.rn.f32 %f3342, %f2, %f1; - mov.f32 %f660, 0f3F000000; - div.rn.f32 %f661, %f660, %f3338; - div.rn.f32 %f40, %f661, %f3338; - mov.f32 %f3246, 0f51BA43B7; - mov.f32 %f3247, %f629; - @%p15 bra BB7_36; - - and.b32 %r16, %r71, 3; - mov.f32 %f3247, 0f00000000; - mov.u32 %r98, 0; - mov.f32 %f3246, 0f51BA43B7; - mov.u32 %r320, %r98; - -BB7_19: - mov.u32 %r321, %r98; - -BB7_20: - cvt.rn.f32.s32 %f666, %r321; - mul.f32 %f667, %f666, %f666; - mul.f32 %f45, %f40, %f667; - neg.f32 %f46, %f45; - mul.f32 %f47, %f45, 0fBFB8AA3B; - mov.f32 %f665, 0f00000000; - mov.f32 %f3264, %f665; - mov.f32 %f3265, %f665; - mov.u32 %r322, %r98; - -BB7_21: - sub.s32 %r102, %r322, %r320; - cvt.rn.f32.s32 %f50, %r102; - mul.lo.s32 %r20, %r322, %r71; - setp.eq.s32 %p23, %r16, 0; - @%p23 bra BB7_22; - - setp.eq.s32 %p24, %r16, 1; - @%p24 bra BB7_26; - bra.uni BB7_24; - -BB7_26: - mul.f32 %f682, %f50, %f50; - mul.f32 %f3255, %f40, %f682; - neg.f32 %f683, %f3255; - mul.f32 %f684, %f3255, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f685, %f684; - mov.f32 %f686, 0fBF317200; - fma.rn.f32 %f687, %f685, %f686, %f683; - mov.f32 %f688, 0fB5BFBE8E; - fma.rn.f32 %f689, %f685, %f688, %f687; - mul.f32 %f690, %f689, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f691, %f690; - add.f32 %f692, %f685, 0f00000000; - ex2.approx.f32 %f693, %f692; - mul.f32 %f3254, %f691, %f693; - mov.u32 %r324, 0; - bra.uni BB7_29; - -BB7_22: - mov.f32 %f3258, %f3264; - mov.f32 %f3259, %f3265; - mov.u32 %r326, %r98; - mov.f32 %f3264, %f665; - mov.f32 %f3265, %f665; - bra.uni BB7_30; - -BB7_24: - setp.ne.s32 %p25, %r16, 2; - @%p25 bra BB7_27; - - mul.f32 %f670, %f50, %f50; - mul.f32 %f3255, %f40, %f670; - neg.f32 %f671, %f3255; - mul.f32 %f672, %f3255, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f673, %f672; - mov.f32 %f674, 0fBF317200; - fma.rn.f32 %f675, %f673, %f674, %f671; - mov.f32 %f676, 0fB5BFBE8E; - fma.rn.f32 %f677, %f673, %f676, %f675; - mul.f32 %f678, %f677, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f679, %f678; - add.f32 %f680, %f673, 0f00000000; - ex2.approx.f32 %f681, %f680; - mul.f32 %f3254, %f679, %f681; - mov.u32 %r323, 0; - bra.uni BB7_28; - -BB7_27: - setp.lt.f32 %p26, %f45, 0fC2D20000; - mul.f32 %f694, %f50, %f50; - mul.f32 %f3255, %f40, %f694; - neg.f32 %f695, %f3255; - mul.f32 %f696, %f3255, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f697, %f696; - mov.f32 %f698, 0fBF317200; - fma.rn.f32 %f699, %f697, %f698, %f695; - mov.f32 %f700, 0fB5BFBE8E; - fma.rn.f32 %f701, %f697, %f700, %f699; - mul.f32 %f702, %f701, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f703, %f702; - add.f32 %f704, %f697, 0f00000000; - ex2.approx.f32 %f705, %f704; - mul.f32 %f3254, %f703, %f705; - setp.gt.f32 %p27, %f3255, 0f42D20000; - selp.f32 %f706, 0f00000000, %f3254, %p27; - setp.lt.f32 %p28, %f3255, 0fC2D20000; - selp.f32 %f707, 0f7F800000, %f706, %p28; - cvt.rzi.f32.f32 %f708, %f47; - fma.rn.f32 %f709, %f708, %f698, %f46; - fma.rn.f32 %f710, %f708, %f700, %f709; - mul.f32 %f711, %f710, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f712, %f711; - add.f32 %f713, %f708, 0f00000000; - ex2.approx.f32 %f714, %f713; - mul.f32 %f715, %f712, %f714; - setp.gt.f32 %p29, %f45, 0f42D20000; - selp.f32 %f716, 0f00000000, %f715, %p29; - selp.f32 %f717, 0f7F800000, %f716, %p26; - mul.f32 %f718, %f707, %f717; - add.s32 %r106, %r20, %r4; - mul.wide.s32 %rd89, %r106, 4; - add.s64 %rd90, %rd1, %rd89; - ld.global.f32 %f719, [%rd90]; - fma.rn.f32 %f3265, %f719, %f718, %f3265; - add.f32 %f3264, %f3264, %f718; - mov.u32 %r323, 1; - -BB7_28: - sub.s32 %r107, %r321, %r323; - cvt.rn.f32.s32 %f720, %r107; - mul.f32 %f721, %f720, %f720; - setp.gt.f32 %p30, %f3255, 0f42D20000; - selp.f32 %f722, 0f00000000, %f3254, %p30; - setp.lt.f32 %p31, %f3255, 0fC2D20000; - selp.f32 %f723, 0f7F800000, %f722, %p31; - mul.f32 %f724, %f40, %f721; - neg.f32 %f725, %f724; - mul.f32 %f726, %f724, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f727, %f726; - mov.f32 %f728, 0fBF317200; - fma.rn.f32 %f729, %f727, %f728, %f725; - mov.f32 %f730, 0fB5BFBE8E; - fma.rn.f32 %f731, %f727, %f730, %f729; - mul.f32 %f732, %f731, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f733, %f732; - add.f32 %f734, %f727, 0f00000000; - ex2.approx.f32 %f735, %f734; - mul.f32 %f736, %f733, %f735; - setp.gt.f32 %p32, %f724, 0f42D20000; - selp.f32 %f737, 0f00000000, %f736, %p32; - setp.lt.f32 %p33, %f724, 0fC2D20000; - selp.f32 %f738, 0f7F800000, %f737, %p33; - mul.f32 %f739, %f723, %f738; - add.s32 %r108, %r323, %r20; - add.s32 %r109, %r108, %r4; - mul.wide.s32 %rd91, %r109, 4; - add.s64 %rd92, %rd1, %rd91; - ld.global.f32 %f740, [%rd92]; - fma.rn.f32 %f3265, %f740, %f739, %f3265; - add.f32 %f3264, %f3264, %f739; - add.s32 %r324, %r323, 1; - -BB7_29: - sub.s32 %r110, %r321, %r324; - cvt.rn.f32.s32 %f741, %r110; - mul.f32 %f742, %f741, %f741; - setp.gt.f32 %p34, %f3255, 0f42D20000; - selp.f32 %f743, 0f00000000, %f3254, %p34; - setp.lt.f32 %p35, %f3255, 0fC2D20000; - selp.f32 %f744, 0f7F800000, %f743, %p35; - mul.f32 %f745, %f40, %f742; - neg.f32 %f746, %f745; - mul.f32 %f747, %f745, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f748, %f747; - mov.f32 %f749, 0fBF317200; - fma.rn.f32 %f750, %f748, %f749, %f746; - mov.f32 %f751, 0fB5BFBE8E; - fma.rn.f32 %f752, %f748, %f751, %f750; - mul.f32 %f753, %f752, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f754, %f753; - add.f32 %f755, %f748, 0f00000000; - ex2.approx.f32 %f756, %f755; - mul.f32 %f757, %f754, %f756; - setp.gt.f32 %p36, %f745, 0f42D20000; - selp.f32 %f758, 0f00000000, %f757, %p36; - setp.lt.f32 %p37, %f745, 0fC2D20000; - selp.f32 %f759, 0f7F800000, %f758, %p37; - mul.f32 %f760, %f744, %f759; - add.s32 %r111, %r324, %r20; - add.s32 %r112, %r111, %r4; - mul.wide.s32 %rd93, %r112, 4; - add.s64 %rd94, %rd1, %rd93; - ld.global.f32 %f761, [%rd94]; - fma.rn.f32 %f3259, %f761, %f760, %f3265; - add.f32 %f3258, %f3264, %f760; - add.s32 %r326, %r324, 1; - mov.f32 %f3264, %f3258; - mov.f32 %f3265, %f3259; - -BB7_30: - setp.lt.u32 %p38, %r71, 4; - @%p38 bra BB7_33; - - mul.f32 %f762, %f50, %f50; - mul.f32 %f763, %f40, %f762; - neg.f32 %f764, %f763; - mul.f32 %f765, %f763, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f766, %f765; - mov.f32 %f767, 0fBF317200; - fma.rn.f32 %f768, %f766, %f767, %f764; - mov.f32 %f769, 0fB5BFBE8E; - fma.rn.f32 %f770, %f766, %f769, %f768; + .reg .pred %p<746>; + .reg .f32 %f<3164>; + .reg .b32 %r<850>; + .reg .f64 %fd<627>; + .reg .b64 %rd<57>; + + + ld.param.u64 %rd3, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_0]; + ld.param.u64 %rd4, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_1]; + ld.param.u64 %rd5, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_2]; + ld.param.f32 %f3064, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_3]; + ld.param.u32 %r104, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_4]; + ld.param.u32 %r105, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_5]; + ld.param.u32 %r106, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_6]; + ld.param.u32 %r107, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_10]; + cvta.to.global.u64 %rd1, %rd3; + mov.u32 %r108, %ntid.x; + mov.u32 %r109, %ctaid.x; + mov.u32 %r110, %tid.x; + mad.lo.s32 %r1, %r109, %r108, %r110; + setp.ge.s32 %p43, %r1, %r107; + @%p43 bra $L__BB7_463; + + mul.lo.s32 %r111, %r104, %r104; + mul.lo.s32 %r2, %r111, %r1; + setp.lt.s32 %p44, %r104, 1; + mov.f32 %f510, 0f00000000; + mov.f32 %f2978, %f510; + mov.f32 %f2979, %f510; + mov.f32 %f2980, %f510; + @%p44 bra $L__BB7_11; + + add.s32 %r3, %r104, -1; + and.b32 %r4, %r104, 3; + sub.s32 %r5, %r104, %r4; + shl.b32 %r6, %r104, 2; + mov.u32 %r112, 0; + setp.lt.u32 %p45, %r3, 3; + setp.eq.s32 %p47, %r4, 0; + setp.eq.s32 %p48, %r4, 1; + setp.eq.s32 %p49, %r4, 2; + cvt.s64.s32 %rd11, %r6; + mov.u32 %r837, %r112; + +$L__BB7_3: + cvt.rn.f32.s32 %f4, %r837; + mov.u32 %r840, %r112; + @%p45 bra $L__BB7_6; + + mov.u32 %r840, %r112; + mov.u32 %r839, %r5; + +$L__BB7_5: + mad.lo.s32 %r115, %r840, %r104, %r837; + add.s32 %r116, %r115, %r2; + mul.wide.s32 %rd9, %r116, 4; + add.s64 %rd10, %rd1, %rd9; + ld.global.f32 %f515, [%rd10]; + fma.rn.f32 %f516, %f515, %f4, %f2978; + cvt.rn.f32.s32 %f517, %r840; + fma.rn.f32 %f518, %f515, %f517, %f2979; + add.f32 %f519, %f2980, %f515; + add.s64 %rd12, %rd10, %rd11; + ld.global.f32 %f520, [%rd12]; + fma.rn.f32 %f521, %f520, %f4, %f516; + add.s32 %r117, %r840, 1; + cvt.rn.f32.s32 %f522, %r117; + fma.rn.f32 %f523, %f520, %f522, %f518; + add.f32 %f524, %f519, %f520; + add.s64 %rd13, %rd12, %rd11; + ld.global.f32 %f525, [%rd13]; + fma.rn.f32 %f526, %f525, %f4, %f521; + add.s32 %r118, %r840, 2; + cvt.rn.f32.s32 %f527, %r118; + fma.rn.f32 %f528, %f525, %f527, %f523; + add.f32 %f529, %f524, %f525; + add.s64 %rd14, %rd13, %rd11; + ld.global.f32 %f530, [%rd14]; + fma.rn.f32 %f2978, %f530, %f4, %f526; + add.s32 %r119, %r840, 3; + cvt.rn.f32.s32 %f531, %r119; + fma.rn.f32 %f2979, %f530, %f531, %f528; + add.f32 %f2980, %f529, %f530; + add.s32 %r840, %r840, 4; + add.s32 %r839, %r839, -4; + setp.ne.s32 %p46, %r839, 0; + @%p46 bra $L__BB7_5; + +$L__BB7_6: + @%p47 bra $L__BB7_10; + + mad.lo.s32 %r13, %r840, %r104, %r837; + add.s32 %r120, %r13, %r2; + mul.wide.s32 %rd15, %r120, 4; + add.s64 %rd16, %rd1, %rd15; + ld.global.f32 %f532, [%rd16]; + fma.rn.f32 %f2978, %f532, %f4, %f2978; + cvt.rn.f32.s32 %f533, %r840; + fma.rn.f32 %f2979, %f532, %f533, %f2979; + add.f32 %f2980, %f2980, %f532; + @%p48 bra $L__BB7_10; + + add.s32 %r14, %r13, %r104; + add.s32 %r121, %r14, %r2; + mul.wide.s32 %rd17, %r121, 4; + add.s64 %rd18, %rd1, %rd17; + ld.global.f32 %f534, [%rd18]; + fma.rn.f32 %f2978, %f534, %f4, %f2978; + add.s32 %r122, %r840, 1; + cvt.rn.f32.s32 %f535, %r122; + fma.rn.f32 %f2979, %f534, %f535, %f2979; + add.f32 %f2980, %f2980, %f534; + @%p49 bra $L__BB7_10; + + add.s32 %r123, %r840, 2; + add.s32 %r124, %r14, %r104; + add.s32 %r125, %r124, %r2; + mul.wide.s32 %rd19, %r125, 4; + add.s64 %rd20, %rd1, %rd19; + ld.global.f32 %f536, [%rd20]; + fma.rn.f32 %f2978, %f536, %f4, %f2978; + cvt.rn.f32.s32 %f537, %r123; + fma.rn.f32 %f2979, %f536, %f537, %f2979; + add.f32 %f2980, %f2980, %f536; + +$L__BB7_10: + add.s32 %r837, %r837, 1; + setp.lt.s32 %p50, %r837, %r104; + @%p50 bra $L__BB7_3; + +$L__BB7_11: + div.rn.f32 %f3069, %f2978, %f2980; + div.rn.f32 %f3068, %f2979, %f2980; + mov.f32 %f3066, 0f51BA43B7; + mov.f32 %f2987, %f510; + @%p44 bra $L__BB7_51; + + mov.f32 %f542, 0f3F000000; + div.rn.f32 %f543, %f542, %f3064; + div.rn.f32 %f544, %f543, %f3064; + cvt.f64.f32 %fd1, %f544; + mov.f64 %fd246, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r16}, %fd246; + } + and.b32 %r17, %r16, 2146435072; + and.b32 %r18, %r16, 2147483647; + setp.gt.s32 %p52, %r16, -1; + selp.b32 %r19, 2146435072, 0, %p52; + mov.u32 %r126, 0; + or.b32 %r20, %r19, -2147483648; + setp.eq.s32 %p54, %r17, 1062207488; + setp.lt.s32 %p55, %r16, 0; + setp.ne.s32 %p60, %r18, 1071644672; + setp.eq.s32 %p87, %r18, 2146435072; + mov.u32 %r841, %r126; + mov.f32 %f2987, %f510; + +$L__BB7_13: + mov.u32 %r842, %r126; + +$L__BB7_14: + mov.f32 %f2990, 0f00000000; + mov.f32 %f2991, %f2990; + mov.u32 %r843, %r126; + +$L__BB7_15: + sub.s32 %r24, %r843, %r841; + cvt.rn.f32.s32 %f547, %r24; + cvt.f64.f32 %fd2, %f547; + { + .reg .b32 %temp; + mov.b64 {%temp, %r25}, %fd2; + } + abs.f64 %fd247, %fd2; + { // callseq 154, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd247; + .param .b64 param1; + st.param.f64 [param1+0], %fd246; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 154 + setp.lt.s32 %p53, %r25, 0; + and.pred %p1, %p53, %p54; + selp.b32 %r130, %r25, 0, %p54; + or.b32 %r131, %r130, 2146435072; + selp.b32 %r26, %r131, %r130, %p55; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r132}, %fd4; + } + and.b32 %r27, %r132, 2146435072; + setp.ne.s32 %p56, %r27, 2146435072; + setp.gtu.f64 %p57, %fd247, 0d7FF0000000000000; + setp.gt.f64 %p58, %fd247, 0d3FF0000000000000; + selp.b32 %r133, 2146435072, 0, %p58; + xor.b32 %r134, %r133, 2146435072; + selp.b32 %r135, %r134, %r133, %p55; + setp.eq.s32 %p59, %r24, -1; + selp.b32 %r28, 1072693248, %r135, %p59; + and.b32 %r29, %r25, 2147483647; + and.pred %p61, %p60, %p1; + selp.b32 %r30, %r20, %r19, %p61; + mul.lo.s32 %r31, %r843, %r104; + or.pred %p2, %p56, %p57; + mov.u32 %r844, %r126; + +$L__BB7_16: + not.pred %p62, %p1; + mov.f64 %fd562, %fd3; + @%p62 bra $L__BB7_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r136}, %fd3; + } + xor.b32 %r137, %r136, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r138, %temp}, %fd3; + } + mov.b64 %fd562, {%r138, %r137}; + +$L__BB7_18: + setp.eq.s32 %p63, %r24, 0; + @%p63 bra $L__BB7_22; + + setp.gt.s32 %p64, %r25, -1; + @%p64 bra $L__BB7_23; + + cvt.rzi.f64.f64 %fd250, %fd246; + setp.eq.f64 %p65, %fd250, 0d4000000000000000; + @%p65 bra $L__BB7_23; + + mov.f64 %fd562, 0dFFF8000000000000; + bra.uni $L__BB7_23; + +$L__BB7_22: + mov.u32 %r139, 0; + mov.b64 %fd562, {%r139, %r26}; + +$L__BB7_23: + selp.f64 %fd563, %fd562, %fd4, %p56; + @%p2 bra $L__BB7_28; + + { + .reg .b32 %temp; + mov.b64 {%r140, %temp}, %fd246; + } + setp.eq.s32 %p68, %r140, 0; + and.pred %p69, %p87, %p68; + @%p69 bra $L__BB7_27; + bra.uni $L__BB7_25; + +$L__BB7_27: + mov.u32 %r143, 0; + mov.b64 %fd563, {%r143, %r28}; + bra.uni $L__BB7_28; + +$L__BB7_25: + setp.ne.s32 %p70, %r29, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r141, %temp}, %fd2; + } + setp.ne.s32 %p71, %r141, 0; + or.pred %p72, %p70, %p71; + mov.f64 %fd563, %fd562; + @%p72 bra $L__BB7_28; + + mov.u32 %r142, 0; + mov.b64 %fd563, {%r142, %r30}; + +$L__BB7_28: + setp.eq.s32 %p73, %r24, 1; + selp.f64 %fd253, 0d3FF0000000000000, %fd563, %p73; + mov.f64 %fd254, 0d3FF0000000000000; + mul.f64 %fd13, %fd253, %fd1; + neg.f64 %fd255, %fd13; + mov.f64 %fd256, 0d4338000000000000; + mov.f64 %fd257, 0d3FF71547652B82FE; + fma.rn.f64 %fd258, %fd255, %fd257, %fd256; + { + .reg .b32 %temp; + mov.b64 {%r33, %temp}, %fd258; + } + mov.f64 %fd259, 0dC338000000000000; + add.rn.f64 %fd260, %fd258, %fd259; + mov.f64 %fd261, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd262, %fd260, %fd261, %fd255; + mov.f64 %fd263, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd264, %fd260, %fd263, %fd262; + mov.f64 %fd265, 0d3E928AF3FCA213EA; + mov.f64 %fd266, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd267, %fd266, %fd264, %fd265; + mov.f64 %fd268, 0d3EC71DEE62401315; + fma.rn.f64 %fd269, %fd267, %fd264, %fd268; + mov.f64 %fd270, 0d3EFA01997C89EB71; + fma.rn.f64 %fd271, %fd269, %fd264, %fd270; + mov.f64 %fd272, 0d3F2A01A014761F65; + fma.rn.f64 %fd273, %fd271, %fd264, %fd272; + mov.f64 %fd274, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd275, %fd273, %fd264, %fd274; + mov.f64 %fd276, 0d3F81111111122322; + fma.rn.f64 %fd277, %fd275, %fd264, %fd276; + mov.f64 %fd278, 0d3FA55555555502A1; + fma.rn.f64 %fd279, %fd277, %fd264, %fd278; + mov.f64 %fd280, 0d3FC5555555555511; + fma.rn.f64 %fd281, %fd279, %fd264, %fd280; + mov.f64 %fd282, 0d3FE000000000000B; + fma.rn.f64 %fd283, %fd281, %fd264, %fd282; + fma.rn.f64 %fd284, %fd283, %fd264, %fd254; + fma.rn.f64 %fd285, %fd284, %fd264, %fd254; + { + .reg .b32 %temp; + mov.b64 {%r34, %temp}, %fd285; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r35}, %fd285; + } + shl.b32 %r144, %r33, 20; + add.s32 %r145, %r35, %r144; + mov.b64 %fd564, {%r34, %r145}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r146}, %fd255; + } + mov.b32 %f548, %r146; + abs.f32 %f42, %f548; + setp.lt.f32 %p74, %f42, 0f4086232B; + @%p74 bra $L__BB7_31; + + setp.gt.f64 %p75, %fd13, 0d8000000000000000; + mov.f64 %fd286, 0d7FF0000000000000; + sub.f64 %fd287, %fd286, %fd13; + selp.f64 %fd564, 0d0000000000000000, %fd287, %p75; + setp.geu.f32 %p76, %f42, 0f40874800; + @%p76 bra $L__BB7_31; + + shr.u32 %r147, %r33, 31; + add.s32 %r148, %r33, %r147; + shr.s32 %r149, %r148, 1; + shl.b32 %r150, %r149, 20; + add.s32 %r151, %r35, %r150; + mov.b64 %fd288, {%r34, %r151}; + sub.s32 %r152, %r33, %r149; + shl.b32 %r153, %r152, 20; + add.s32 %r154, %r153, 1072693248; + mov.u32 %r155, 0; + mov.b64 %fd289, {%r155, %r154}; + mul.f64 %fd564, %fd288, %fd289; + +$L__BB7_31: + sub.s32 %r36, %r842, %r844; + cvt.rn.f32.s32 %f549, %r36; + cvt.f64.f32 %fd18, %f549; + { + .reg .b32 %temp; + mov.b64 {%temp, %r37}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 155, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd246; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd566, [retval0+0]; + } // callseq 155 + setp.lt.s32 %p77, %r37, 0; + and.pred %p3, %p77, %p54; + not.pred %p79, %p3; + @%p79 bra $L__BB7_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r156}, %fd566; + } + xor.b32 %r157, %r156, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r158, %temp}, %fd566; + } + mov.b64 %fd566, {%r158, %r157}; + +$L__BB7_33: + setp.eq.s32 %p80, %r36, 0; + @%p80 bra $L__BB7_37; + + setp.gt.s32 %p81, %r37, -1; + @%p81 bra $L__BB7_38; + + cvt.rzi.f64.f64 %fd292, %fd246; + setp.eq.f64 %p82, %fd292, 0d4000000000000000; + @%p82 bra $L__BB7_38; + + mov.f64 %fd566, 0dFFF8000000000000; + bra.uni $L__BB7_38; + +$L__BB7_37: + mov.u32 %r159, 0; + selp.b32 %r160, %r37, 0, %p54; + or.b32 %r161, %r160, 2146435072; + selp.b32 %r162, %r161, %r160, %p55; + mov.b64 %fd566, {%r159, %r162}; + +$L__BB7_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r163}, %fd25; + } + and.b32 %r164, %r163, 2146435072; + setp.ne.s32 %p85, %r164, 2146435072; + mov.f64 %fd567, %fd566; + @%p85 bra $L__BB7_44; + + setp.gtu.f64 %p86, %fd19, 0d7FF0000000000000; + mov.f64 %fd567, %fd25; + @%p86 bra $L__BB7_44; + + { + .reg .b32 %temp; + mov.b64 {%r165, %temp}, %fd246; + } + setp.eq.s32 %p88, %r165, 0; + and.pred %p89, %p87, %p88; + @%p89 bra $L__BB7_43; + bra.uni $L__BB7_41; + +$L__BB7_43: + mov.u32 %r170, 0; + setp.gt.f64 %p96, %fd19, 0d3FF0000000000000; + selp.b32 %r171, 2146435072, 0, %p96; + xor.b32 %r172, %r171, 2146435072; + selp.b32 %r173, %r172, %r171, %p55; + setp.eq.s32 %p97, %r36, -1; + selp.b32 %r174, 1072693248, %r173, %p97; + mov.b64 %fd567, {%r170, %r174}; + bra.uni $L__BB7_44; + +$L__BB7_41: + { + .reg .b32 %temp; + mov.b64 {%r166, %temp}, %fd18; + } + and.b32 %r167, %r37, 2147483647; + setp.ne.s32 %p90, %r167, 2146435072; + setp.ne.s32 %p91, %r166, 0; + or.pred %p92, %p90, %p91; + mov.f64 %fd567, %fd566; + @%p92 bra $L__BB7_44; + + and.pred %p94, %p60, %p3; + selp.b32 %r168, %r20, %r19, %p94; + mov.u32 %r169, 0; + mov.b64 %fd567, {%r169, %r168}; + +$L__BB7_44: + setp.eq.s32 %p98, %r36, 1; + selp.f64 %fd295, 0d3FF0000000000000, %fd567, %p98; + mul.f64 %fd29, %fd295, %fd1; + neg.f64 %fd297, %fd29; + fma.rn.f64 %fd300, %fd297, %fd257, %fd256; + { + .reg .b32 %temp; + mov.b64 {%r38, %temp}, %fd300; + } + add.rn.f64 %fd302, %fd300, %fd259; + fma.rn.f64 %fd304, %fd302, %fd261, %fd297; + fma.rn.f64 %fd306, %fd302, %fd263, %fd304; + fma.rn.f64 %fd309, %fd266, %fd306, %fd265; + fma.rn.f64 %fd311, %fd309, %fd306, %fd268; + fma.rn.f64 %fd313, %fd311, %fd306, %fd270; + fma.rn.f64 %fd315, %fd313, %fd306, %fd272; + fma.rn.f64 %fd317, %fd315, %fd306, %fd274; + fma.rn.f64 %fd319, %fd317, %fd306, %fd276; + fma.rn.f64 %fd321, %fd319, %fd306, %fd278; + fma.rn.f64 %fd323, %fd321, %fd306, %fd280; + fma.rn.f64 %fd325, %fd323, %fd306, %fd282; + fma.rn.f64 %fd326, %fd325, %fd306, %fd254; + fma.rn.f64 %fd327, %fd326, %fd306, %fd254; + { + .reg .b32 %temp; + mov.b64 {%r39, %temp}, %fd327; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r40}, %fd327; + } + shl.b32 %r175, %r38, 20; + add.s32 %r176, %r40, %r175; + mov.b64 %fd568, {%r39, %r176}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r177}, %fd297; + } + mov.b32 %f550, %r177; + abs.f32 %f43, %f550; + setp.lt.f32 %p99, %f43, 0f4086232B; + @%p99 bra $L__BB7_47; + + setp.gt.f64 %p100, %fd29, 0d8000000000000000; + mov.f64 %fd328, 0d7FF0000000000000; + sub.f64 %fd329, %fd328, %fd29; + selp.f64 %fd568, 0d0000000000000000, %fd329, %p100; + setp.geu.f32 %p101, %f43, 0f40874800; + @%p101 bra $L__BB7_47; + + shr.u32 %r178, %r38, 31; + add.s32 %r179, %r38, %r178; + shr.s32 %r180, %r179, 1; + shl.b32 %r181, %r180, 20; + add.s32 %r182, %r40, %r181; + mov.b64 %fd330, {%r39, %r182}; + sub.s32 %r183, %r38, %r180; + shl.b32 %r184, %r183, 20; + add.s32 %r185, %r184, 1072693248; + mov.u32 %r186, 0; + mov.b64 %fd331, {%r186, %r185}; + mul.f64 %fd568, %fd330, %fd331; + +$L__BB7_47: + add.s32 %r187, %r844, %r31; + add.s32 %r188, %r187, %r2; + mul.wide.s32 %rd21, %r188, 4; + add.s64 %rd22, %rd1, %rd21; + ld.global.f32 %f551, [%rd22]; + cvt.f64.f32 %fd332, %f551; + mul.f64 %fd333, %fd564, %fd568; + cvt.f64.f32 %fd334, %f2991; + fma.rn.f64 %fd335, %fd333, %fd332, %fd334; + cvt.rn.f32.f64 %f2991, %fd335; + cvt.f64.f32 %fd336, %f2990; + add.f64 %fd337, %fd333, %fd336; + cvt.rn.f32.f64 %f2990, %fd337; + add.s32 %r844, %r844, 1; + setp.lt.s32 %p102, %r844, %r104; + @%p102 bra $L__BB7_16; + + add.s32 %r843, %r843, 1; + setp.lt.s32 %p103, %r843, %r104; + @%p103 bra $L__BB7_15; + + div.rn.f32 %f552, %f2991, %f2990; + max.f32 %f2987, %f2987, %f552; + min.f32 %f3066, %f3066, %f552; + add.s32 %r842, %r842, 1; + setp.lt.s32 %p104, %r842, %r104; + @%p104 bra $L__BB7_14; + + add.s32 %r841, %r841, 1; + setp.lt.s32 %p105, %r841, %r104; + @%p105 bra $L__BB7_13; + +$L__BB7_51: + sub.f32 %f554, %f2987, %f3066; + add.f32 %f555, %f554, %f554; + fma.rn.f32 %f556, %f554, 0f40000000, %f555; + mul.f32 %f557, %f556, 0f40490FD8; + mul.f32 %f558, %f557, %f3064; + mul.f32 %f559, %f558, %f3064; + max.f32 %f3067, %f510, %f559; + setp.lt.s32 %p106, %r106, 1; + mov.f32 %f3065, %f3064; + @%p106 bra $L__BB7_373; + + cvt.rn.f32.s32 %f562, %r104; + mul.f32 %f51, %f562, 0f3F000000; + cvt.rn.f32.s32 %f52, %r105; + mov.u32 %r845, 0; + cvta.to.global.u64 %rd23, %rd4; + mov.f64 %fd339, 0d4008000000000000; + mov.f64 %fd345, 0d4014000000000000; + cvta.to.global.u64 %rd29, %rd5; + mov.f32 %f3065, %f3064; + +$L__BB7_53: + mov.f32 %f3013, 0f00000000; + mov.f32 %f3014, %f3013; + mov.f32 %f3015, %f3013; + mov.f32 %f3016, %f3013; + mov.f32 %f3017, %f3013; + mov.f32 %f3018, %f3013; + mov.f32 %f3019, %f3013; + mov.f32 %f3020, %f3013; + mov.f32 %f3021, %f3013; + mov.f32 %f3022, %f3013; + mov.f32 %f3023, %f3013; + mov.f32 %f3024, %f3013; + @%p44 bra $L__BB7_372; + + mov.f32 %f3013, 0f00000000; + mov.f32 %f587, 0f3F000000; + div.rn.f32 %f588, %f587, %f3065; + div.rn.f32 %f60, %f588, %f3065; + div.rn.f32 %f589, %f587, %f3064; + div.rn.f32 %f61, %f589, %f3064; + div.rn.f32 %f590, %f3067, 0fC0206C98; + div.rn.f32 %f62, %f590, %f3065; + cvt.f64.f32 %fd34, %f590; + div.rn.f32 %f63, %f590, %f3064; + div.rn.f32 %f64, %f62, %f3065; + mov.f32 %f591, 0fC0000000; + div.rn.f32 %f65, %f591, %f3065; + div.rn.f32 %f592, %f3067, 0f40206C98; + cvt.f64.f32 %fd35, %f592; + div.rn.f32 %f66, %f63, %f3064; + div.rn.f32 %f67, %f591, %f3064; + shl.b32 %r195, %r1, 1; + mul.wide.s32 %rd24, %r195, 4; + add.s64 %rd25, %rd23, %rd24; + ld.global.f32 %f68, [%rd25+4]; + ld.global.f32 %f69, [%rd25]; + mov.u32 %r846, 0; + +$L__BB7_55: + mov.u32 %r847, 0; + mov.f32 %f2839, 0f00000000; + cvt.rn.f32.s32 %f82, %r846; + sub.f32 %f83, %f82, %f3069; + add.f32 %f84, %f83, 0f3F000000; + sqrt.rn.f32 %f593, %f60; + mul.f32 %f594, %f84, %f593; + abs.f32 %f85, %f594; + setp.ge.f32 %p108, %f85, 0f3F8060FE; + mul.f32 %f595, %f594, %f594; + selp.f32 %f596, %f85, %f595, %p108; + selp.f32 %f597, 0f3789CA3C, 0f38B1E96A, %p108; + selp.f32 %f598, 0fB9F560B9, 0fBA574D20, %p108; + fma.rn.f32 %f599, %f597, %f596, %f598; + selp.f32 %f600, 0f3BAC840B, 0f3BAAD5EA, %p108; + fma.rn.f32 %f601, %f599, %f596, %f600; + selp.f32 %f602, 0fBD0C8162, 0fBCDC1BE7, %p108; + fma.rn.f32 %f603, %f601, %f596, %f602; + selp.f32 %f604, 0f3E1CF906, 0f3DE718AF, %p108; + fma.rn.f32 %f605, %f603, %f596, %f604; + selp.f32 %f606, 0f3F6A937E, 0fBEC093AC, %p108; + fma.rn.f32 %f607, %f605, %f596, %f606; + selp.f32 %f608, 0f3F20D842, 0f3E0375D3, %p108; + fma.rn.f32 %f609, %f607, %f596, %f608; + neg.f32 %f610, %f85; + selp.f32 %f611, %f610, %f594, %p108; + fma.rn.f32 %f86, %f609, %f611, %f611; + mov.b32 %r197, %f594; + and.b32 %r47, %r197, -2147483648; + add.f32 %f87, %f83, 0fBF000000; + mul.f32 %f612, %f87, %f593; + abs.f32 %f88, %f612; + setp.ge.f32 %p109, %f88, 0f3F8060FE; + mul.f32 %f613, %f612, %f612; + selp.f32 %f614, %f88, %f613, %p109; + selp.f32 %f615, 0f3789CA3C, 0f38B1E96A, %p109; + selp.f32 %f616, 0fB9F560B9, 0fBA574D20, %p109; + fma.rn.f32 %f617, %f615, %f614, %f616; + selp.f32 %f618, 0f3BAC840B, 0f3BAAD5EA, %p109; + fma.rn.f32 %f619, %f617, %f614, %f618; + selp.f32 %f620, 0fBD0C8162, 0fBCDC1BE7, %p109; + fma.rn.f32 %f621, %f619, %f614, %f620; + selp.f32 %f622, 0f3E1CF906, 0f3DE718AF, %p109; + fma.rn.f32 %f623, %f621, %f614, %f622; + selp.f32 %f624, 0f3F6A937E, 0fBEC093AC, %p109; + fma.rn.f32 %f625, %f623, %f614, %f624; + selp.f32 %f626, 0f3F20D842, 0f3E0375D3, %p109; + fma.rn.f32 %f627, %f625, %f614, %f626; + neg.f32 %f628, %f88; + selp.f32 %f629, %f628, %f612, %p109; + fma.rn.f32 %f89, %f627, %f629, %f629; + mov.b32 %r198, %f612; + and.b32 %r48, %r198, -2147483648; + sqrt.rn.f32 %f90, %f61; + add.f32 %f630, %f82, 0f3F000000; + sub.f32 %f91, %f630, %f3069; + div.rn.f32 %f92, %f91, %f3065; + mov.f32 %f631, 0f3F800000; + cvt.rzi.f32.f32 %f632, %f631; + add.f32 %f633, %f632, %f632; + mov.f32 %f634, 0f40000000; + sub.f32 %f635, %f634, %f633; + abs.f32 %f93, %f635; + setp.eq.f32 %p110, %f93, 0f3F800000; + abs.f32 %f94, %f92; + setp.lt.f32 %p111, %f94, 0f00800000; + mul.f32 %f636, %f94, 0f4B800000; + selp.f32 %f637, %f636, %f94, %p111; + selp.f32 %f638, 0fC3170000, 0fC2FE0000, %p111; + mov.b32 %r199, %f637; + and.b32 %r200, %r199, 8388607; + or.b32 %r201, %r200, 1065353216; + mov.b32 %f639, %r201; + shr.u32 %r202, %r199, 23; + cvt.rn.f32.u32 %f640, %r202; + add.f32 %f641, %f638, %f640; + setp.gt.f32 %p112, %f639, 0f3FB504F3; + mul.f32 %f642, %f639, 0f3F000000; + add.f32 %f643, %f641, 0f3F800000; + selp.f32 %f644, %f643, %f641, %p112; + selp.f32 %f645, %f642, %f639, %p112; + add.f32 %f646, %f645, 0fBF800000; + add.f32 %f647, %f645, 0f3F800000; + rcp.approx.ftz.f32 %f648, %f647; + add.f32 %f649, %f646, %f646; + mul.f32 %f650, %f649, %f648; + mul.f32 %f651, %f650, %f650; + mov.f32 %f652, 0f3C4CAF63; + mov.f32 %f653, 0f3B18F0FE; + fma.rn.f32 %f654, %f653, %f651, %f652; + mov.f32 %f655, 0f3DAAAABD; + fma.rn.f32 %f656, %f654, %f651, %f655; + mul.rn.f32 %f657, %f656, %f651; + mul.rn.f32 %f658, %f657, %f650; + sub.f32 %f659, %f646, %f650; + add.f32 %f660, %f659, %f659; + neg.f32 %f661, %f650; + fma.rn.f32 %f662, %f661, %f646, %f660; + mul.rn.f32 %f663, %f648, %f662; + add.f32 %f664, %f658, %f650; + sub.f32 %f665, %f650, %f664; + add.f32 %f666, %f658, %f665; + add.f32 %f667, %f663, %f666; + add.f32 %f668, %f664, %f667; + sub.f32 %f669, %f664, %f668; + add.f32 %f670, %f667, %f669; + mov.f32 %f671, 0f3F317200; + mul.rn.f32 %f672, %f644, %f671; + mov.f32 %f673, 0f35BFBE8E; + mul.rn.f32 %f674, %f644, %f673; + add.f32 %f675, %f672, %f668; + sub.f32 %f676, %f672, %f675; + add.f32 %f677, %f668, %f676; + add.f32 %f678, %f670, %f677; + add.f32 %f679, %f674, %f678; + add.f32 %f680, %f675, %f679; + sub.f32 %f681, %f675, %f680; + add.f32 %f682, %f679, %f681; + mul.rn.f32 %f683, %f634, %f680; + neg.f32 %f684, %f683; + fma.rn.f32 %f685, %f634, %f680, %f684; + fma.rn.f32 %f686, %f634, %f682, %f685; + fma.rn.f32 %f688, %f2839, %f680, %f686; + add.rn.f32 %f689, %f683, %f688; + neg.f32 %f690, %f689; + add.rn.f32 %f691, %f683, %f690; + add.rn.f32 %f692, %f691, %f688; + mov.b32 %r203, %f689; + setp.eq.s32 %p113, %r203, 1118925336; + add.s32 %r204, %r203, -1; + mov.b32 %f693, %r204; + add.f32 %f694, %f692, 0f37000000; + selp.f32 %f95, %f694, %f692, %p113; + selp.f32 %f695, %f693, %f689, %p113; + mov.f32 %f696, 0f3FB8AA3B; + mul.rn.f32 %f697, %f695, %f696; + cvt.rzi.f32.f32 %f698, %f697; + abs.f32 %f699, %f698; + setp.gt.f32 %p114, %f699, 0f42FC0000; + mov.b32 %r205, %f698; + and.b32 %r206, %r205, -2147483648; + or.b32 %r207, %r206, 1123811328; + mov.b32 %f700, %r207; + selp.f32 %f701, %f700, %f698, %p114; + mov.f32 %f702, 0fBF317218; + fma.rn.f32 %f703, %f701, %f702, %f695; + mov.f32 %f704, 0f3102E308; + fma.rn.f32 %f705, %f701, %f704, %f703; + mul.f32 %f706, %f705, 0f3FB8AA3B; + add.f32 %f707, %f701, 0f4B40007F; + mov.b32 %r208, %f707; + shl.b32 %r209, %r208, 23; + mov.b32 %f708, %r209; + ex2.approx.ftz.f32 %f709, %f706; + mul.f32 %f96, %f709, %f708; + setp.lt.f32 %p115, %f92, 0f00000000; + and.pred %p4, %p115, %p110; + div.rn.f32 %f97, %f87, %f3065; + abs.f32 %f98, %f97; + setp.lt.f32 %p116, %f98, 0f00800000; + mul.f32 %f710, %f98, 0f4B800000; + selp.f32 %f711, %f710, %f98, %p116; + selp.f32 %f712, 0fC3170000, 0fC2FE0000, %p116; + mov.b32 %r210, %f711; + and.b32 %r211, %r210, 8388607; + or.b32 %r212, %r211, 1065353216; + mov.b32 %f713, %r212; + shr.u32 %r213, %r210, 23; + cvt.rn.f32.u32 %f714, %r213; + add.f32 %f715, %f712, %f714; + setp.gt.f32 %p117, %f713, 0f3FB504F3; + mul.f32 %f716, %f713, 0f3F000000; + add.f32 %f717, %f715, 0f3F800000; + selp.f32 %f718, %f717, %f715, %p117; + selp.f32 %f719, %f716, %f713, %p117; + add.f32 %f720, %f719, 0fBF800000; + add.f32 %f721, %f719, 0f3F800000; + rcp.approx.ftz.f32 %f722, %f721; + add.f32 %f723, %f720, %f720; + mul.f32 %f724, %f723, %f722; + mul.f32 %f725, %f724, %f724; + fma.rn.f32 %f726, %f653, %f725, %f652; + fma.rn.f32 %f727, %f726, %f725, %f655; + mul.rn.f32 %f728, %f727, %f725; + mul.rn.f32 %f729, %f728, %f724; + sub.f32 %f730, %f720, %f724; + add.f32 %f731, %f730, %f730; + neg.f32 %f732, %f724; + fma.rn.f32 %f733, %f732, %f720, %f731; + mul.rn.f32 %f734, %f722, %f733; + add.f32 %f735, %f729, %f724; + sub.f32 %f736, %f724, %f735; + add.f32 %f737, %f729, %f736; + add.f32 %f738, %f734, %f737; + add.f32 %f739, %f735, %f738; + sub.f32 %f740, %f735, %f739; + add.f32 %f741, %f738, %f740; + mul.rn.f32 %f742, %f718, %f671; + mul.rn.f32 %f743, %f718, %f673; + add.f32 %f744, %f742, %f739; + sub.f32 %f745, %f742, %f744; + add.f32 %f746, %f739, %f745; + add.f32 %f747, %f741, %f746; + add.f32 %f748, %f743, %f747; + add.f32 %f749, %f744, %f748; + sub.f32 %f750, %f744, %f749; + add.f32 %f751, %f748, %f750; + mul.rn.f32 %f752, %f634, %f749; + neg.f32 %f753, %f752; + fma.rn.f32 %f754, %f634, %f749, %f753; + fma.rn.f32 %f755, %f634, %f751, %f754; + fma.rn.f32 %f756, %f2839, %f749, %f755; + add.rn.f32 %f757, %f752, %f756; + neg.f32 %f758, %f757; + add.rn.f32 %f759, %f752, %f758; + add.rn.f32 %f760, %f759, %f756; + mov.b32 %r214, %f757; + setp.eq.s32 %p118, %r214, 1118925336; + add.s32 %r215, %r214, -1; + mov.b32 %f761, %r215; + add.f32 %f762, %f760, 0f37000000; + selp.f32 %f99, %f762, %f760, %p118; + selp.f32 %f763, %f761, %f757, %p118; + mul.rn.f32 %f764, %f763, %f696; + cvt.rzi.f32.f32 %f765, %f764; + abs.f32 %f766, %f765; + setp.gt.f32 %p119, %f766, 0f42FC0000; + mov.b32 %r216, %f765; + and.b32 %r217, %r216, -2147483648; + or.b32 %r218, %r217, 1123811328; + mov.b32 %f767, %r218; + selp.f32 %f768, %f767, %f765, %p119; + fma.rn.f32 %f769, %f768, %f702, %f763; + fma.rn.f32 %f770, %f768, %f704, %f769; mul.f32 %f771, %f770, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f772, %f771; - add.f32 %f773, %f766, 0f00000000; - ex2.approx.f32 %f774, %f773; - mul.f32 %f775, %f772, %f774; - setp.gt.f32 %p39, %f763, 0f42D20000; - selp.f32 %f776, 0f00000000, %f775, %p39; - setp.lt.f32 %p40, %f763, 0fC2D20000; - selp.f32 %f75, 0f7F800000, %f776, %p40; - mov.f32 %f3264, %f3258; - mov.f32 %f3265, %f3259; - -BB7_32: - sub.s32 %r113, %r321, %r326; - cvt.rn.f32.s32 %f777, %r113; - mul.f32 %f778, %f777, %f777; - mul.f32 %f779, %f40, %f778; - neg.f32 %f780, %f779; - mul.f32 %f781, %f779, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f782, %f781; - fma.rn.f32 %f784, %f782, %f767, %f780; - fma.rn.f32 %f786, %f782, %f769, %f784; - mul.f32 %f787, %f786, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f788, %f787; - add.f32 %f789, %f782, 0f00000000; - ex2.approx.f32 %f790, %f789; - mul.f32 %f791, %f788, %f790; - setp.gt.f32 %p41, %f779, 0f42D20000; - selp.f32 %f792, 0f00000000, %f791, %p41; - setp.lt.f32 %p42, %f779, 0fC2D20000; - selp.f32 %f793, 0f7F800000, %f792, %p42; - mul.f32 %f794, %f75, %f793; - add.s32 %r114, %r326, %r20; - add.s32 %r115, %r114, %r4; - mul.wide.s32 %rd95, %r115, 4; - add.s64 %rd96, %rd1, %rd95; - ld.global.f32 %f795, [%rd96]; - fma.rn.f32 %f796, %f795, %f794, %f3265; - add.f32 %f797, %f3264, %f794; - add.s32 %r116, %r326, 1; - sub.s32 %r117, %r321, %r116; - cvt.rn.f32.s32 %f798, %r117; - mul.f32 %f799, %f798, %f798; - mul.f32 %f800, %f40, %f799; - neg.f32 %f801, %f800; - mul.f32 %f802, %f800, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f803, %f802; - fma.rn.f32 %f804, %f803, %f767, %f801; - fma.rn.f32 %f805, %f803, %f769, %f804; - mul.f32 %f806, %f805, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f807, %f806; - add.f32 %f808, %f803, 0f00000000; - ex2.approx.f32 %f809, %f808; - mul.f32 %f810, %f807, %f809; - setp.gt.f32 %p43, %f800, 0f42D20000; - selp.f32 %f811, 0f00000000, %f810, %p43; - setp.lt.f32 %p44, %f800, 0fC2D20000; - selp.f32 %f812, 0f7F800000, %f811, %p44; - mul.f32 %f813, %f75, %f812; - ld.global.f32 %f814, [%rd96+4]; - fma.rn.f32 %f815, %f814, %f813, %f796; - add.f32 %f816, %f797, %f813; - add.s32 %r118, %r326, 2; - sub.s32 %r119, %r321, %r118; - cvt.rn.f32.s32 %f817, %r119; - mul.f32 %f818, %f817, %f817; - mul.f32 %f819, %f40, %f818; + add.f32 %f772, %f768, 0f4B40007F; + mov.b32 %r219, %f772; + shl.b32 %r220, %r219, 23; + mov.b32 %f773, %r220; + ex2.approx.ftz.f32 %f774, %f771; + mul.f32 %f100, %f774, %f773; + cvt.f64.f32 %fd338, %f3065; + { + .reg .b32 %temp; + mov.b64 {%temp, %r49}, %fd338; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r50}, %fd339; + } + and.b32 %r51, %r50, 2146435072; + setp.eq.s32 %p121, %r51, 1073741824; + abs.f64 %fd340, %fd338; + { // callseq 156, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd340; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd36, [retval0+0]; + } // callseq 156 + setp.lt.s32 %p122, %r49, 0; + and.pred %p6, %p122, %p121; + setp.lt.s32 %p123, %r50, 0; + add.f64 %fd341, %fd338, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r221}, %fd341; + } + and.b32 %r52, %r221, 2146435072; + setp.ne.s32 %p124, %r52, 2146435072; + setp.gtu.f64 %p125, %fd340, 0d7FF0000000000000; + and.b32 %r53, %r50, 2147483647; + setp.gt.f64 %p126, %fd340, 0d3FF0000000000000; + selp.b32 %r222, 2146435072, 0, %p126; + xor.b32 %r223, %r222, 2146435072; + selp.b32 %r224, %r223, %r222, %p123; + setp.eq.f32 %p127, %f3065, 0fBF800000; + selp.b32 %r54, 1072693248, %r224, %p127; + setp.gt.s32 %p128, %r50, -1; + selp.b32 %r55, 2146435072, 0, %p128; + cvt.f64.f32 %fd342, %f3064; + { + .reg .b32 %temp; + mov.b64 {%temp, %r56}, %fd342; + } + abs.f64 %fd343, %fd342; + { // callseq 157, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd343; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd37, [retval0+0]; + } // callseq 157 + setp.lt.s32 %p129, %r56, 0; + and.pred %p7, %p129, %p121; + add.f64 %fd344, %fd342, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r225}, %fd344; + } + and.b32 %r57, %r225, 2146435072; + setp.ne.s32 %p130, %r57, 2146435072; + setp.gtu.f64 %p131, %fd343, 0d7FF0000000000000; + add.f32 %f775, %f82, 0f3F800000; + sub.f32 %f776, %f775, %f3069; + div.rn.f32 %f101, %f776, %f3065; + abs.f32 %f102, %f101; + setp.lt.f32 %p132, %f102, 0f00800000; + mul.f32 %f777, %f102, 0f4B800000; + selp.f32 %f778, %f777, %f102, %p132; + selp.f32 %f779, 0fC3170000, 0fC2FE0000, %p132; + mov.b32 %r226, %f778; + and.b32 %r227, %r226, 8388607; + or.b32 %r228, %r227, 1065353216; + mov.b32 %f780, %r228; + shr.u32 %r229, %r226, 23; + cvt.rn.f32.u32 %f781, %r229; + add.f32 %f782, %f779, %f781; + setp.gt.f32 %p133, %f780, 0f3FB504F3; + mul.f32 %f783, %f780, 0f3F000000; + add.f32 %f784, %f782, 0f3F800000; + selp.f32 %f785, %f784, %f782, %p133; + selp.f32 %f786, %f783, %f780, %p133; + add.f32 %f787, %f786, 0fBF800000; + add.f32 %f788, %f786, 0f3F800000; + rcp.approx.ftz.f32 %f789, %f788; + add.f32 %f790, %f787, %f787; + mul.f32 %f791, %f790, %f789; + mul.f32 %f792, %f791, %f791; + fma.rn.f32 %f793, %f653, %f792, %f652; + fma.rn.f32 %f794, %f793, %f792, %f655; + mul.rn.f32 %f795, %f794, %f792; + mul.rn.f32 %f796, %f795, %f791; + sub.f32 %f797, %f787, %f791; + add.f32 %f798, %f797, %f797; + neg.f32 %f799, %f791; + fma.rn.f32 %f800, %f799, %f787, %f798; + mul.rn.f32 %f801, %f789, %f800; + add.f32 %f802, %f796, %f791; + sub.f32 %f803, %f791, %f802; + add.f32 %f804, %f796, %f803; + add.f32 %f805, %f801, %f804; + add.f32 %f806, %f802, %f805; + sub.f32 %f807, %f802, %f806; + add.f32 %f808, %f805, %f807; + mul.rn.f32 %f809, %f785, %f671; + mul.rn.f32 %f810, %f785, %f673; + add.f32 %f811, %f809, %f806; + sub.f32 %f812, %f809, %f811; + add.f32 %f813, %f806, %f812; + add.f32 %f814, %f808, %f813; + add.f32 %f815, %f810, %f814; + add.f32 %f816, %f811, %f815; + sub.f32 %f817, %f811, %f816; + add.f32 %f818, %f815, %f817; + mul.rn.f32 %f819, %f634, %f816; neg.f32 %f820, %f819; - mul.f32 %f821, %f819, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f822, %f821; - fma.rn.f32 %f823, %f822, %f767, %f820; - fma.rn.f32 %f824, %f822, %f769, %f823; - mul.f32 %f825, %f824, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f826, %f825; - add.f32 %f827, %f822, 0f00000000; - ex2.approx.f32 %f828, %f827; - mul.f32 %f829, %f826, %f828; - setp.gt.f32 %p45, %f819, 0f42D20000; - selp.f32 %f830, 0f00000000, %f829, %p45; - setp.lt.f32 %p46, %f819, 0fC2D20000; - selp.f32 %f831, 0f7F800000, %f830, %p46; - mul.f32 %f832, %f75, %f831; - ld.global.f32 %f833, [%rd96+8]; - fma.rn.f32 %f834, %f833, %f832, %f815; - add.f32 %f835, %f816, %f832; - add.s32 %r120, %r326, 3; - sub.s32 %r121, %r321, %r120; - cvt.rn.f32.s32 %f836, %r121; - mul.f32 %f837, %f836, %f836; - mul.f32 %f838, %f40, %f837; - neg.f32 %f839, %f838; - mul.f32 %f840, %f838, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f841, %f840; - fma.rn.f32 %f842, %f841, %f767, %f839; - fma.rn.f32 %f843, %f841, %f769, %f842; - mul.f32 %f844, %f843, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f845, %f844; - add.f32 %f846, %f841, 0f00000000; - ex2.approx.f32 %f847, %f846; - mul.f32 %f848, %f845, %f847; - setp.gt.f32 %p47, %f838, 0f42D20000; - selp.f32 %f849, 0f00000000, %f848, %p47; - setp.lt.f32 %p48, %f838, 0fC2D20000; - selp.f32 %f850, 0f7F800000, %f849, %p48; - mul.f32 %f851, %f75, %f850; - ld.global.f32 %f852, [%rd96+12]; - fma.rn.f32 %f3265, %f852, %f851, %f834; - add.f32 %f3264, %f835, %f851; - add.s32 %r326, %r326, 4; - setp.lt.s32 %p49, %r326, %r71; - @%p49 bra BB7_32; - -BB7_33: - add.s32 %r322, %r322, 1; - setp.lt.s32 %p50, %r322, %r71; - @%p50 bra BB7_21; - - div.rn.f32 %f853, %f3265, %f3264; - max.f32 %f3247, %f3247, %f853; - min.f32 %f3246, %f3246, %f853; - add.s32 %r321, %r321, 1; - setp.lt.s32 %p51, %r321, %r71; - @%p51 bra BB7_20; - - add.s32 %r320, %r320, 1; - setp.lt.s32 %p52, %r320, %r71; - @%p52 bra BB7_19; - -BB7_36: - sub.f32 %f855, %f3247, %f3246; - add.f32 %f856, %f855, %f855; - fma.rn.f32 %f857, %f855, 0f40000000, %f856; - mul.f32 %f858, %f857, 0f40490FD8; - mul.f32 %f859, %f858, %f3338; - mul.f32 %f860, %f859, %f3338; - max.f32 %f3341, %f629, %f860; - setp.lt.s32 %p53, %r73, 1; - @%p53 bra BB7_37; - - cvt.rn.f32.s32 %f863, %r71; - mul.f32 %f87, %f863, 0f3F000000; - cvt.rn.f32.s32 %f88, %r72; - mov.u32 %r327, 0; - cvta.to.global.u64 %rd97, %rd70; - cvta.to.global.u64 %rd103, %rd71; - mov.f32 %f3339, %f3338; - -BB7_39: - mov.f32 %f3287, 0f00000000; - mov.f32 %f3288, %f3287; - mov.f32 %f3289, %f3287; - mov.f32 %f3290, %f3287; - mov.f32 %f3291, %f3287; - mov.f32 %f3292, %f3287; - mov.f32 %f3293, %f3287; - mov.f32 %f3294, %f3287; - mov.f32 %f3295, %f3287; - mov.f32 %f3296, %f3287; - mov.f32 %f3297, %f3287; - mov.f32 %f3298, %f3287; - @%p15 bra BB7_120; - - div.rn.f32 %f889, %f660, %f3339; - div.rn.f32 %f96, %f889, %f3339; - div.rn.f32 %f890, %f660, %f3338; - div.rn.f32 %f97, %f890, %f3338; - neg.f32 %f891, %f3341; - div.rn.f32 %f892, %f891, 0f40206C98; - div.rn.f32 %f98, %f892, %f3339; - div.rn.f32 %f99, %f892, %f3338; - div.rn.f32 %f100, %f98, %f3339; - mov.f32 %f893, 0fC0000000; - div.rn.f32 %f101, %f893, %f3339; - div.rn.f32 %f102, %f99, %f3338; - div.rn.f32 %f103, %f893, %f3338; - mul.f32 %f894, %f3339, %f3339; - mul.f32 %f895, %f3339, %f894; - div.rn.f32 %f104, %f892, %f895; - mul.f32 %f896, %f3338, %f3338; - mul.f32 %f897, %f3338, %f896; - div.rn.f32 %f105, %f892, %f897; - mul.f32 %f898, %f894, %f894; - mul.f32 %f899, %f3339, %f898; - div.rn.f32 %f900, %f3341, 0f40206C98; - div.rn.f32 %f106, %f900, %f899; - mul.f32 %f901, %f896, %f896; - mul.f32 %f902, %f3338, %f901; - div.rn.f32 %f107, %f900, %f902; - mov.u32 %r328, 0; - mov.f32 %f3287, 0f00000000; - mov.f32 %f3288, %f3287; - mov.f32 %f3289, %f3287; - mov.f32 %f3290, %f3287; - mov.f32 %f3291, %f3287; - mov.f32 %f3292, %f3287; - mov.f32 %f3293, %f3287; - mov.f32 %f3294, %f3287; - mov.f32 %f3295, %f3287; - mov.f32 %f3296, %f3287; - mov.f32 %f3297, %f3287; - mov.f32 %f3298, %f3287; - -BB7_41: - mov.u32 %r329, 0; - cvt.rn.f32.s32 %f120, %r328; - sub.f32 %f121, %f120, %f3343; - add.f32 %f122, %f121, 0f3F800000; - sqrt.rn.f32 %f903, %f96; - mul.f32 %f123, %f122, %f903; - abs.f32 %f124, %f123; - mul.f32 %f125, %f123, %f123; - mul.f32 %f126, %f121, %f903; - abs.f32 %f127, %f126; - sqrt.rn.f32 %f129, %f97; - shl.b32 %r129, %r1, 1; - mul.wide.s32 %rd98, %r129, 4; - add.s64 %rd99, %rd97, %rd98; - ld.global.f32 %f130, [%rd99+4]; - ld.global.f32 %f131, [%rd99]; - add.f32 %f904, %f120, 0f3F800000; - sub.f32 %f132, %f904, %f3343; - div.rn.f32 %f133, %f132, %f3339; - mov.f32 %f905, 0f3F800000; - cvt.rzi.f32.f32 %f906, %f905; - add.f32 %f907, %f906, %f906; - mov.f32 %f908, 0f40000000; - sub.f32 %f909, %f908, %f907; - abs.f32 %f134, %f909; - setp.eq.f32 %p55, %f134, 0f3F800000; - abs.f32 %f135, %f133; - setp.lt.f32 %p56, %f135, 0f00800000; - mul.f32 %f910, %f135, 0f4B800000; - selp.f32 %f911, 0fC3170000, 0fC2FE0000, %p56; - selp.f32 %f912, %f910, %f135, %p56; - mov.b32 %r130, %f912; - and.b32 %r131, %r130, 8388607; - or.b32 %r132, %r131, 1065353216; - mov.b32 %f913, %r132; - shr.u32 %r133, %r130, 23; - cvt.rn.f32.u32 %f914, %r133; - add.f32 %f915, %f911, %f914; - setp.gt.f32 %p57, %f913, 0f3FB504F3; - mul.f32 %f916, %f913, 0f3F000000; - add.f32 %f917, %f915, 0f3F800000; - selp.f32 %f918, %f916, %f913, %p57; - selp.f32 %f919, %f917, %f915, %p57; - add.f32 %f136, %f918, 0fBF800000; - add.f32 %f137, %f918, 0f3F800000; - add.f32 %f138, %f136, %f136; - mov.f32 %f920, 0f3F317200; - mul.rn.f32 %f139, %f919, %f920; - mov.f32 %f921, 0f35BFBE8E; - mul.rn.f32 %f140, %f919, %f921; - setp.lt.f32 %p58, %f133, 0f00000000; - and.pred %p1, %p58, %p55; - add.f32 %f922, %f133, %f133; - selp.f32 %f141, %f922, 0f00000000, %p55; - div.rn.f32 %f142, %f121, %f3339; - abs.f32 %f143, %f142; - setp.lt.f32 %p59, %f143, 0f00800000; - mul.f32 %f924, %f143, 0f4B800000; - selp.f32 %f925, 0fC3170000, 0fC2FE0000, %p59; - selp.f32 %f926, %f924, %f143, %p59; - mov.b32 %r134, %f926; - and.b32 %r135, %r134, 8388607; - or.b32 %r136, %r135, 1065353216; - mov.b32 %f927, %r136; - shr.u32 %r137, %r134, 23; - cvt.rn.f32.u32 %f928, %r137; - add.f32 %f929, %f925, %f928; - setp.gt.f32 %p60, %f927, 0f3FB504F3; - mul.f32 %f930, %f927, 0f3F000000; - add.f32 %f931, %f929, 0f3F800000; - selp.f32 %f932, %f930, %f927, %p60; - selp.f32 %f933, %f931, %f929, %p60; - add.f32 %f144, %f932, 0fBF800000; - add.f32 %f145, %f932, 0f3F800000; - add.f32 %f146, %f144, %f144; - mul.rn.f32 %f147, %f933, %f920; - mul.rn.f32 %f148, %f933, %f921; - setp.lt.f32 %p61, %f142, 0f00000000; - and.pred %p2, %p61, %p55; - add.f32 %f934, %f142, %f142; - selp.f32 %f149, %f934, 0f00000000, %p55; - mul.f32 %f936, %f122, %f122; - mul.f32 %f150, %f122, %f936; - -BB7_42: - setp.ltu.f32 %p62, %f124, 0f3F800000; - @%p62 bra BB7_44; - bra.uni BB7_43; - -BB7_44: - mov.f32 %f956, 0f3BA0C9F8; - mov.f32 %f957, 0fBA1268FB; - fma.rn.f32 %f958, %f957, %f125, %f956; - mov.f32 %f959, 0fBCDABFD4; - fma.rn.f32 %f960, %f958, %f125, %f959; - mov.f32 %f961, 0f3DE70331; - fma.rn.f32 %f962, %f960, %f125, %f961; - mov.f32 %f963, 0fBEC09330; - fma.rn.f32 %f964, %f962, %f125, %f963; - mov.f32 %f965, 0f3F906EBA; - fma.rn.f32 %f966, %f964, %f125, %f965; - mul.f32 %f3299, %f123, %f966; - bra.uni BB7_45; - -BB7_43: - mov.f32 %f3055, 0f3F800000; - setp.ltu.f32 %p63, %f124, 0f407AD445; - mov.f32 %f938, 0f3A03BB71; - mov.f32 %f939, 0fB7B730FB; - fma.rn.f32 %f940, %f939, %f124, %f938; - mov.f32 %f941, 0fBBACA3B3; - fma.rn.f32 %f942, %f940, %f124, %f941; - mov.f32 %f943, 0f3D0A7445; - fma.rn.f32 %f944, %f942, %f124, %f943; - mov.f32 %f945, 0fBE1B3B75; - fma.rn.f32 %f946, %f944, %f124, %f945; - mov.f32 %f947, 0fBF6B385A; - fma.rn.f32 %f948, %f946, %f124, %f947; - mov.f32 %f949, 0fBFD0316E; - fma.rn.f32 %f950, %f948, %f124, %f949; - mov.f32 %f951, 0fBA031CCE; - fma.rn.f32 %f952, %f950, %f124, %f951; - ex2.approx.ftz.f32 %f953, %f952; - sub.f32 %f955, %f3055, %f953; - mov.b32 %r138, %f955; - selp.b32 %r139, %r138, 1065353216, %p63; - mov.b32 %r140, %f123; - and.b32 %r141, %r140, -2147483648; - or.b32 %r142, %r139, %r141; - mov.b32 %f3299, %r142; - -BB7_45: - setp.ltu.f32 %p64, %f127, 0f3F800000; - @%p64 bra BB7_47; - bra.uni BB7_46; - -BB7_47: - cvt.rn.f32.s32 %f3097, %r328; - sub.f32 %f3096, %f3097, %f3343; - mul.f32 %f3095, %f3096, %f903; - mul.f32 %f3094, %f3095, %f3095; - mov.f32 %f985, 0f3BA0C9F8; - mov.f32 %f986, 0fBA1268FB; - fma.rn.f32 %f987, %f986, %f3094, %f985; - mov.f32 %f988, 0fBCDABFD4; - fma.rn.f32 %f989, %f987, %f3094, %f988; - mov.f32 %f990, 0f3DE70331; - fma.rn.f32 %f991, %f989, %f3094, %f990; - mov.f32 %f992, 0fBEC09330; - fma.rn.f32 %f993, %f991, %f3094, %f992; - mov.f32 %f994, 0f3F906EBA; - fma.rn.f32 %f995, %f993, %f3094, %f994; - mul.f32 %f3300, %f3095, %f995; - bra.uni BB7_48; - -BB7_46: - cvt.rn.f32.s32 %f3110, %r328; - sub.f32 %f3109, %f3110, %f3343; - mul.f32 %f3108, %f3109, %f903; - mov.f32 %f3056, 0f3F800000; - setp.ltu.f32 %p65, %f127, 0f407AD445; - mov.f32 %f967, 0f3A03BB71; - mov.f32 %f968, 0fB7B730FB; - fma.rn.f32 %f969, %f968, %f127, %f967; - mov.f32 %f970, 0fBBACA3B3; - fma.rn.f32 %f971, %f969, %f127, %f970; - mov.f32 %f972, 0f3D0A7445; - fma.rn.f32 %f973, %f971, %f127, %f972; - mov.f32 %f974, 0fBE1B3B75; - fma.rn.f32 %f975, %f973, %f127, %f974; - mov.f32 %f976, 0fBF6B385A; - fma.rn.f32 %f977, %f975, %f127, %f976; - mov.f32 %f978, 0fBFD0316E; - fma.rn.f32 %f979, %f977, %f127, %f978; - mov.f32 %f980, 0fBA031CCE; - fma.rn.f32 %f981, %f979, %f127, %f980; - ex2.approx.ftz.f32 %f982, %f981; - sub.f32 %f984, %f3056, %f982; - mov.b32 %r143, %f984; - selp.b32 %r144, %r143, 1065353216, %p65; - mov.b32 %r145, %f3108; - and.b32 %r146, %r145, -2147483648; - or.b32 %r147, %r144, %r146; - mov.b32 %f3300, %r147; - -BB7_48: - sub.f32 %f996, %f3299, %f3300; - mul.f32 %f170, %f996, 0f3F000000; - cvt.rn.f32.s32 %f171, %r329; - sub.f32 %f172, %f171, %f3342; - add.f32 %f173, %f172, 0f3F800000; - mul.f32 %f174, %f173, %f129; - abs.f32 %f175, %f174; - setp.ltu.f32 %p66, %f175, 0f3F800000; - @%p66 bra BB7_50; - bra.uni BB7_49; - -BB7_50: - mul.f32 %f1015, %f174, %f174; - mov.f32 %f1016, 0f3BA0C9F8; - mov.f32 %f1017, 0fBA1268FB; - fma.rn.f32 %f1018, %f1017, %f1015, %f1016; - mov.f32 %f1019, 0fBCDABFD4; - fma.rn.f32 %f1020, %f1018, %f1015, %f1019; - mov.f32 %f1021, 0f3DE70331; - fma.rn.f32 %f1022, %f1020, %f1015, %f1021; - mov.f32 %f1023, 0fBEC09330; - fma.rn.f32 %f1024, %f1022, %f1015, %f1023; - mov.f32 %f1025, 0f3F906EBA; - fma.rn.f32 %f1026, %f1024, %f1015, %f1025; - mul.f32 %f3301, %f174, %f1026; - bra.uni BB7_51; - -BB7_49: - mov.f32 %f3057, 0f3F800000; - mov.f32 %f997, 0f3A03BB71; - mov.f32 %f998, 0fB7B730FB; - fma.rn.f32 %f999, %f998, %f175, %f997; - mov.f32 %f1000, 0fBBACA3B3; - fma.rn.f32 %f1001, %f999, %f175, %f1000; - mov.f32 %f1002, 0f3D0A7445; - fma.rn.f32 %f1003, %f1001, %f175, %f1002; - mov.f32 %f1004, 0fBE1B3B75; - fma.rn.f32 %f1005, %f1003, %f175, %f1004; - mov.f32 %f1006, 0fBF6B385A; - fma.rn.f32 %f1007, %f1005, %f175, %f1006; - mov.f32 %f1008, 0fBFD0316E; - fma.rn.f32 %f1009, %f1007, %f175, %f1008; - mov.f32 %f1010, 0fBA031CCE; - fma.rn.f32 %f1011, %f1009, %f175, %f1010; - ex2.approx.ftz.f32 %f1012, %f1011; - sub.f32 %f1014, %f3057, %f1012; - mov.b32 %r148, %f1014; - setp.ltu.f32 %p67, %f175, 0f407AD445; - selp.b32 %r149, %r148, 1065353216, %p67; - mov.b32 %r150, %f174; - and.b32 %r151, %r150, -2147483648; - or.b32 %r152, %r149, %r151; - mov.b32 %f3301, %r152; - -BB7_51: - cvt.rn.f32.s32 %f3112, %r329; - sub.f32 %f3111, %f3112, %f3342; - mul.f32 %f179, %f3111, %f129; - abs.f32 %f180, %f179; - setp.ltu.f32 %p68, %f180, 0f3F800000; - @%p68 bra BB7_53; - bra.uni BB7_52; - -BB7_53: - mul.f32 %f1045, %f179, %f179; - mov.f32 %f1046, 0f3BA0C9F8; - mov.f32 %f1047, 0fBA1268FB; - fma.rn.f32 %f1048, %f1047, %f1045, %f1046; - mov.f32 %f1049, 0fBCDABFD4; - fma.rn.f32 %f1050, %f1048, %f1045, %f1049; - mov.f32 %f1051, 0f3DE70331; - fma.rn.f32 %f1052, %f1050, %f1045, %f1051; - mov.f32 %f1053, 0fBEC09330; - fma.rn.f32 %f1054, %f1052, %f1045, %f1053; - mov.f32 %f1055, 0f3F906EBA; - fma.rn.f32 %f1056, %f1054, %f1045, %f1055; - mul.f32 %f3302, %f179, %f1056; - bra.uni BB7_54; - -BB7_52: - mov.f32 %f3058, 0f3F800000; - mov.f32 %f1027, 0f3A03BB71; - mov.f32 %f1028, 0fB7B730FB; - fma.rn.f32 %f1029, %f1028, %f180, %f1027; - mov.f32 %f1030, 0fBBACA3B3; - fma.rn.f32 %f1031, %f1029, %f180, %f1030; - mov.f32 %f1032, 0f3D0A7445; - fma.rn.f32 %f1033, %f1031, %f180, %f1032; - mov.f32 %f1034, 0fBE1B3B75; - fma.rn.f32 %f1035, %f1033, %f180, %f1034; - mov.f32 %f1036, 0fBF6B385A; - fma.rn.f32 %f1037, %f1035, %f180, %f1036; - mov.f32 %f1038, 0fBFD0316E; - fma.rn.f32 %f1039, %f1037, %f180, %f1038; - mov.f32 %f1040, 0fBA031CCE; - fma.rn.f32 %f1041, %f1039, %f180, %f1040; - ex2.approx.ftz.f32 %f1042, %f1041; - sub.f32 %f1044, %f3058, %f1042; - mov.b32 %r153, %f1044; - setp.ltu.f32 %p69, %f180, 0f407AD445; - selp.b32 %r154, %r153, 1065353216, %p69; - mov.b32 %r155, %f179; - and.b32 %r156, %r155, -2147483648; - or.b32 %r157, %r154, %r156; - mov.b32 %f3302, %r157; - -BB7_54: - cvt.rn.f32.s32 %f3059, %r328; - sub.f32 %f1059, %f3301, %f3302; - mul.f32 %f184, %f1059, 0f3F000000; - mul.f32 %f1060, %f170, %f3341; - fma.rn.f32 %f185, %f184, %f1060, %f3246; - mad.lo.s32 %r158, %r329, %r71, %r328; - add.s32 %r159, %r158, %r4; - mul.wide.s32 %rd101, %r159, 4; - add.s64 %rd102, %rd1, %rd101; - ld.global.f32 %f186, [%rd102]; - add.f32 %f1061, %f171, %f130; - fma.rn.f32 %f1062, %f88, %f1061, %f131; - add.f32 %f1063, %f3059, %f1062; - cvt.rzi.s32.f32 %r160, %f1063; - mul.wide.s32 %rd104, %r160, 4; - add.s64 %rd105, %rd103, %rd104; - ld.global.f32 %f3337, [%rd105]; - // inline asm - rcp.approx.ftz.f32 %f1057,%f137; - // inline asm - mul.f32 %f1064, %f1057, %f138; - mul.f32 %f1065, %f1064, %f1064; - mov.f32 %f1066, 0f3C4CAF63; - mov.f32 %f1067, 0f3B18F0FE; - fma.rn.f32 %f1068, %f1067, %f1065, %f1066; - mov.f32 %f1069, 0f3DAAAABD; - fma.rn.f32 %f1070, %f1068, %f1065, %f1069; - mul.rn.f32 %f1071, %f1070, %f1065; - mul.rn.f32 %f1072, %f1071, %f1064; - sub.f32 %f1073, %f136, %f1064; - neg.f32 %f1074, %f1064; - add.f32 %f1075, %f1073, %f1073; - fma.rn.f32 %f1076, %f1074, %f136, %f1075; - mul.rn.f32 %f1077, %f1057, %f1076; - add.f32 %f1078, %f1072, %f1064; - sub.f32 %f1079, %f1064, %f1078; - add.f32 %f1080, %f1072, %f1079; - add.f32 %f1081, %f1077, %f1080; - add.f32 %f1082, %f1078, %f1081; - sub.f32 %f1083, %f1078, %f1082; - add.f32 %f1084, %f1081, %f1083; - add.f32 %f1085, %f139, %f1082; - sub.f32 %f1086, %f139, %f1085; - add.f32 %f1087, %f1082, %f1086; - add.f32 %f1088, %f1084, %f1087; - add.f32 %f1089, %f140, %f1088; - add.f32 %f1090, %f1085, %f1089; - sub.f32 %f1091, %f1085, %f1090; - add.f32 %f1092, %f1089, %f1091; - mul.rn.f32 %f1094, %f908, %f1090; - neg.f32 %f1095, %f1094; - fma.rn.f32 %f1096, %f908, %f1090, %f1095; - fma.rn.f32 %f1097, %f908, %f1092, %f1096; - mov.f32 %f1098, 0f00000000; - fma.rn.f32 %f1099, %f1098, %f1090, %f1097; - add.rn.f32 %f1100, %f1094, %f1099; - neg.f32 %f1101, %f1100; - add.rn.f32 %f1102, %f1094, %f1101; - add.rn.f32 %f1103, %f1102, %f1099; - mov.b32 %r161, %f1100; - setp.eq.s32 %p70, %r161, 1118925336; - add.s32 %r162, %r161, -1; - mov.b32 %f1104, %r162; - add.f32 %f1105, %f1103, 0f37000000; - selp.f32 %f1106, %f1104, %f1100, %p70; - selp.f32 %f188, %f1105, %f1103, %p70; - mul.f32 %f1107, %f1106, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1108, %f1107; - mov.f32 %f1109, 0fBF317200; - fma.rn.f32 %f1110, %f1108, %f1109, %f1106; - mov.f32 %f1111, 0fB5BFBE8E; - fma.rn.f32 %f1112, %f1108, %f1111, %f1110; - mul.f32 %f1113, %f1112, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1114, %f1113; - add.f32 %f1115, %f1108, 0f00000000; - ex2.approx.f32 %f1116, %f1115; - mul.f32 %f1117, %f1114, %f1116; - setp.lt.f32 %p71, %f1106, 0fC2D20000; - selp.f32 %f1118, 0f00000000, %f1117, %p71; - setp.gt.f32 %p72, %f1106, 0f42D20000; - selp.f32 %f3303, 0f7F800000, %f1118, %p72; - setp.eq.f32 %p73, %f3303, 0f7F800000; - @%p73 bra BB7_56; - - fma.rn.f32 %f3303, %f3303, %f188, %f3303; - -BB7_56: - setp.geu.f32 %p350, %f133, 0f00000000; - mov.b32 %r163, %f3303; - xor.b32 %r164, %r163, -2147483648; - mov.b32 %f1119, %r164; - selp.f32 %f192, %f1119, %f3303, %p1; - setp.eq.f32 %p74, %f133, 0f00000000; - selp.f32 %f3304, %f141, %f192, %p74; - @%p350 bra BB7_58; - - cvt.rzi.f32.f32 %f1121, %f908; - setp.neu.f32 %p75, %f1121, 0f40000000; - selp.f32 %f3304, 0f7FFFFFFF, %f192, %p75; - -BB7_58: - abs.f32 %f3067, %f133; - add.f32 %f3066, %f3067, 0f40000000; - mov.b32 %r299, %f3066; - mov.f32 %f3065, 0f00000000; - mov.f32 %f3064, 0f3DAAAABD; - mov.f32 %f3063, 0f3C4CAF63; - mov.f32 %f3062, 0f3B18F0FE; - mov.f32 %f3061, 0fB5BFBE8E; - mov.f32 %f3060, 0fBF317200; - add.f32 %f1124, %f133, 0f40000000; - setp.gtu.f32 %p76, %f3067, 0f7F800000; - selp.f32 %f1125, %f1124, %f3304, %p76; - selp.f32 %f1126, 0fFF800000, 0f7F800000, %p1; - setp.neu.f32 %p77, %f3067, 0f7F800000; - selp.f32 %f1127, %f1125, %f1126, %p77; - setp.gt.s32 %p78, %r299, 2139095039; - selp.f32 %f1128, %f1127, %f3304, %p78; - mul.f32 %f1129, %f1128, 0fBF000000; - setp.eq.f32 %p79, %f133, 0f3F800000; - selp.f32 %f1130, 0fBF000000, %f1129, %p79; - mul.f32 %f1131, %f1130, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1132, %f1131; - fma.rn.f32 %f1134, %f1132, %f3060, %f1130; - fma.rn.f32 %f1136, %f1132, %f3061, %f1134; - mul.f32 %f1137, %f1136, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1138, %f1137; - add.f32 %f1139, %f1132, 0f00000000; - ex2.approx.f32 %f1140, %f1139; - mul.f32 %f1141, %f1138, %f1140; - setp.lt.f32 %p80, %f1130, 0fC2D20000; - selp.f32 %f1142, 0f00000000, %f1141, %p80; - setp.gt.f32 %p81, %f1130, 0f42D20000; - selp.f32 %f196, 0f7F800000, %f1142, %p81; - // inline asm - rcp.approx.ftz.f32 %f1122,%f145; - // inline asm - mul.f32 %f1143, %f1122, %f146; - mul.f32 %f1144, %f1143, %f1143; - fma.rn.f32 %f1147, %f3062, %f1144, %f3063; - fma.rn.f32 %f1149, %f1147, %f1144, %f3064; - mul.rn.f32 %f1150, %f1149, %f1144; - mul.rn.f32 %f1151, %f1150, %f1143; - sub.f32 %f1152, %f144, %f1143; - neg.f32 %f1153, %f1143; - add.f32 %f1154, %f1152, %f1152; - fma.rn.f32 %f1155, %f1153, %f144, %f1154; - mul.rn.f32 %f1156, %f1122, %f1155; - add.f32 %f1157, %f1151, %f1143; - sub.f32 %f1158, %f1143, %f1157; - add.f32 %f1159, %f1151, %f1158; + fma.rn.f32 %f821, %f634, %f816, %f820; + fma.rn.f32 %f822, %f634, %f818, %f821; + fma.rn.f32 %f823, %f2839, %f816, %f822; + add.rn.f32 %f824, %f819, %f823; + neg.f32 %f825, %f824; + add.rn.f32 %f826, %f819, %f825; + add.rn.f32 %f827, %f826, %f823; + mov.b32 %r230, %f824; + setp.eq.s32 %p134, %r230, 1118925336; + add.s32 %r231, %r230, -1; + mov.b32 %f828, %r231; + add.f32 %f829, %f827, 0f37000000; + selp.f32 %f103, %f829, %f827, %p134; + selp.f32 %f830, %f828, %f824, %p134; + mul.rn.f32 %f831, %f830, %f696; + cvt.rzi.f32.f32 %f832, %f831; + abs.f32 %f833, %f832; + setp.gt.f32 %p135, %f833, 0f42FC0000; + mov.b32 %r232, %f832; + and.b32 %r233, %r232, -2147483648; + or.b32 %r234, %r233, 1123811328; + mov.b32 %f834, %r234; + selp.f32 %f835, %f834, %f832, %p135; + fma.rn.f32 %f836, %f835, %f702, %f830; + fma.rn.f32 %f837, %f835, %f704, %f836; + mul.f32 %f838, %f837, 0f3FB8AA3B; + add.f32 %f839, %f835, 0f4B40007F; + mov.b32 %r235, %f839; + shl.b32 %r236, %r235, 23; + mov.b32 %f840, %r236; + ex2.approx.ftz.f32 %f841, %f838; + mul.f32 %f104, %f841, %f840; + setp.gt.f64 %p137, %fd343, 0d3FF0000000000000; + selp.b32 %r237, 2146435072, 0, %p137; + xor.b32 %r238, %r237, 2146435072; + selp.b32 %r239, %r238, %r237, %p123; + setp.eq.f32 %p138, %f3064, 0fBF800000; + selp.b32 %r58, 1072693248, %r239, %p138; + div.rn.f32 %f105, %f83, %f3065; + abs.f32 %f106, %f105; + setp.lt.f32 %p139, %f106, 0f00800000; + mul.f32 %f842, %f106, 0f4B800000; + selp.f32 %f843, %f842, %f106, %p139; + selp.f32 %f844, 0fC3170000, 0fC2FE0000, %p139; + mov.b32 %r240, %f843; + and.b32 %r241, %r240, 8388607; + or.b32 %r242, %r241, 1065353216; + mov.b32 %f845, %r242; + shr.u32 %r243, %r240, 23; + cvt.rn.f32.u32 %f846, %r243; + add.f32 %f847, %f844, %f846; + setp.gt.f32 %p140, %f845, 0f3FB504F3; + mul.f32 %f848, %f845, 0f3F000000; + add.f32 %f849, %f847, 0f3F800000; + selp.f32 %f850, %f849, %f847, %p140; + selp.f32 %f851, %f848, %f845, %p140; + add.f32 %f852, %f851, 0fBF800000; + add.f32 %f853, %f851, 0f3F800000; + rcp.approx.ftz.f32 %f854, %f853; + add.f32 %f855, %f852, %f852; + mul.f32 %f856, %f855, %f854; + mul.f32 %f857, %f856, %f856; + fma.rn.f32 %f858, %f653, %f857, %f652; + fma.rn.f32 %f859, %f858, %f857, %f655; + mul.rn.f32 %f860, %f859, %f857; + mul.rn.f32 %f861, %f860, %f856; + sub.f32 %f862, %f852, %f856; + add.f32 %f863, %f862, %f862; + neg.f32 %f864, %f856; + fma.rn.f32 %f865, %f864, %f852, %f863; + mul.rn.f32 %f866, %f854, %f865; + add.f32 %f867, %f861, %f856; + sub.f32 %f868, %f856, %f867; + add.f32 %f869, %f861, %f868; + add.f32 %f870, %f866, %f869; + add.f32 %f871, %f867, %f870; + sub.f32 %f872, %f867, %f871; + add.f32 %f873, %f870, %f872; + mul.rn.f32 %f874, %f850, %f671; + mul.rn.f32 %f875, %f850, %f673; + add.f32 %f876, %f874, %f871; + sub.f32 %f877, %f874, %f876; + add.f32 %f878, %f871, %f877; + add.f32 %f879, %f873, %f878; + add.f32 %f880, %f875, %f879; + add.f32 %f881, %f876, %f880; + sub.f32 %f882, %f876, %f881; + add.f32 %f883, %f880, %f882; + mul.rn.f32 %f884, %f634, %f881; + neg.f32 %f885, %f884; + fma.rn.f32 %f886, %f634, %f881, %f885; + fma.rn.f32 %f887, %f634, %f883, %f886; + fma.rn.f32 %f888, %f2839, %f881, %f887; + add.rn.f32 %f889, %f884, %f888; + neg.f32 %f890, %f889; + add.rn.f32 %f891, %f884, %f890; + add.rn.f32 %f892, %f891, %f888; + mov.b32 %r244, %f889; + setp.eq.s32 %p141, %r244, 1118925336; + add.s32 %r245, %r244, -1; + mov.b32 %f893, %r245; + add.f32 %f894, %f892, 0f37000000; + selp.f32 %f107, %f894, %f892, %p141; + selp.f32 %f895, %f893, %f889, %p141; + mul.rn.f32 %f896, %f895, %f696; + cvt.rzi.f32.f32 %f897, %f896; + abs.f32 %f898, %f897; + setp.gt.f32 %p142, %f898, 0f42FC0000; + mov.b32 %r246, %f897; + and.b32 %r247, %r246, -2147483648; + or.b32 %r248, %r247, 1123811328; + mov.b32 %f899, %r248; + selp.f32 %f900, %f899, %f897, %p142; + fma.rn.f32 %f901, %f900, %f702, %f895; + fma.rn.f32 %f902, %f900, %f704, %f901; + mul.f32 %f903, %f902, 0f3FB8AA3B; + add.f32 %f904, %f900, 0f4B40007F; + mov.b32 %r249, %f904; + shl.b32 %r250, %r249, 23; + mov.b32 %f905, %r250; + ex2.approx.ftz.f32 %f906, %f903; + mul.f32 %f108, %f906, %f905; + { + .reg .b32 %temp; + mov.b64 {%temp, %r251}, %fd345; + } + and.b32 %r252, %r251, 2146435072; + setp.eq.s32 %p144, %r252, 1074790400; + { // callseq 158, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd340; + .param .b64 param1; + st.param.f64 [param1+0], %fd345; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd38, [retval0+0]; + } // callseq 158 + and.pred %p10, %p122, %p144; + selp.b32 %r253, %r49, 0, %p144; + setp.lt.s32 %p145, %r251, 0; + or.b32 %r254, %r253, 2146435072; + selp.b32 %r59, %r254, %r253, %p145; + add.f64 %fd346, %fd338, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r255}, %fd346; + } + and.b32 %r60, %r255, 2146435072; + setp.ne.s32 %p146, %r60, 2146435072; + cvt.f64.f32 %fd39, %f84; + { + .reg .b32 %temp; + mov.b64 {%temp, %r61}, %fd39; + } + abs.f64 %fd347, %fd39; + { // callseq 159, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd347; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd40, [retval0+0]; + } // callseq 159 + setp.lt.s32 %p147, %r61, 0; + and.pred %p11, %p147, %p121; + and.b32 %r62, %r251, 2147483647; + selp.b32 %r256, %r223, %r222, %p145; + selp.b32 %r63, 1072693248, %r256, %p127; + add.f64 %fd41, %fd39, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r257}, %fd41; + } + and.b32 %r64, %r257, 2146435072; + setp.ne.s32 %p148, %r64, 2146435072; + setp.gt.s32 %p149, %r251, -1; + selp.b32 %r258, 2146435072, 0, %p149; + setp.ne.s32 %p150, %r62, 1071644672; + and.pred %p151, %p150, %p10; + or.b32 %r259, %r258, -2147483648; + selp.b32 %r65, %r259, %r258, %p151; + setp.gtu.f64 %p152, %fd347, 0d7FF0000000000000; + cvt.f64.f32 %fd42, %f87; + { + .reg .b32 %temp; + mov.b64 {%temp, %r66}, %fd42; + } + abs.f64 %fd348, %fd42; + { // callseq 160, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd348; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd43, [retval0+0]; + } // callseq 160 + setp.lt.s32 %p153, %r66, 0; + and.pred %p12, %p153, %p121; + setp.gt.f64 %p154, %fd347, 0d3FF0000000000000; + selp.b32 %r260, 2146435072, 0, %p154; + xor.b32 %r261, %r260, 2146435072; + selp.b32 %r262, %r261, %r260, %p123; + setp.eq.f32 %p155, %f84, 0fBF800000; + selp.b32 %r67, 1072693248, %r262, %p155; + add.f64 %fd44, %fd42, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r263}, %fd44; + } + and.b32 %r68, %r263, 2146435072; + setp.ne.s32 %p156, %r68, 2146435072; + setp.gtu.f64 %p157, %fd348, 0d7FF0000000000000; + setp.gt.f64 %p158, %fd348, 0d3FF0000000000000; + selp.b32 %r264, 2146435072, 0, %p158; + xor.b32 %r265, %r264, 2146435072; + selp.b32 %r266, %r265, %r264, %p123; + setp.eq.f32 %p159, %f87, 0fBF800000; + selp.b32 %r69, 1072693248, %r266, %p159; + { // callseq 161, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd343; + .param .b64 param1; + st.param.f64 [param1+0], %fd345; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd45, [retval0+0]; + } // callseq 161 + and.pred %p13, %p129, %p144; + selp.b32 %r267, %r56, 0, %p144; + or.b32 %r268, %r267, 2146435072; + selp.b32 %r70, %r268, %r267, %p145; + add.f64 %fd349, %fd342, 0d4014000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r269}, %fd349; + } + and.b32 %r71, %r269, 2146435072; + setp.ne.s32 %p160, %r71, 2146435072; + selp.b32 %r270, %r238, %r237, %p145; + selp.b32 %r72, 1072693248, %r270, %p138; + and.pred %p161, %p150, %p13; + selp.b32 %r73, %r259, %r258, %p161; + mov.f64 %fd350, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r74}, %fd350; + } + and.b32 %r75, %r74, 2147483647; + setp.gt.s32 %p162, %r74, -1; + selp.b32 %r76, 2146435072, 0, %p162; + or.pred %p16, %p124, %p125; + or.pred %p17, %p130, %p131; + or.pred %p20, %p146, %p125; + or.pred %p21, %p148, %p152; + or.pred %p22, %p156, %p157; + or.pred %p23, %p160, %p131; + shr.s32 %r271, %r74, 31; + and.b32 %r77, %r271, 2146435072; + +$L__BB7_56: + cvt.rn.f32.s32 %f2844, %r846; + sub.f32 %f2843, %f2844, %f3069; + add.f32 %f2842, %f2843, 0f3F000000; + mul.f32 %f2841, %f2842, %f593; + abs.f32 %f2840, %f2841; + setp.ltu.f32 %p163, %f2840, 0f3F8060FE; + mov.f32 %f3025, %f86; + @%p163 bra $L__BB7_58; + + mov.f32 %f2950, 0f3F800000; + ex2.approx.ftz.f32 %f907, %f86; + sub.f32 %f909, %f2950, %f907; + mov.b32 %r272, %f909; + or.b32 %r273, %r47, %r272; + mov.b32 %f3025, %r273; + +$L__BB7_58: + cvt.rn.f32.s32 %f2849, %r846; + sub.f32 %f2848, %f2849, %f3069; + add.f32 %f2847, %f2848, 0fBF000000; + mul.f32 %f2846, %f2847, %f593; + abs.f32 %f2845, %f2846; + setp.ltu.f32 %p164, %f2845, 0f3F8060FE; + mov.f32 %f3026, %f89; + @%p164 bra $L__BB7_60; + + mov.f32 %f2949, 0f3F800000; + ex2.approx.ftz.f32 %f910, %f89; + sub.f32 %f912, %f2949, %f910; + mov.b32 %r274, %f912; + or.b32 %r275, %r48, %r274; + mov.b32 %f3026, %r275; + +$L__BB7_60: + sub.f32 %f913, %f3025, %f3026; + mul.f32 %f125, %f913, 0f3F000000; + cvt.rn.f32.s32 %f126, %r847; + sub.f32 %f127, %f126, %f3068; + add.f32 %f128, %f127, 0f3F000000; + mul.f32 %f129, %f128, %f90; + abs.f32 %f914, %f129; + setp.ltu.f32 %p165, %f914, 0f3F8060FE; + setp.ge.f32 %p166, %f914, 0f3F8060FE; + mul.f32 %f915, %f129, %f129; + selp.f32 %f916, %f914, %f915, %p166; + selp.f32 %f917, 0f3789CA3C, 0f38B1E96A, %p166; + selp.f32 %f918, 0fB9F560B9, 0fBA574D20, %p166; + fma.rn.f32 %f919, %f917, %f916, %f918; + selp.f32 %f920, 0f3BAC840B, 0f3BAAD5EA, %p166; + fma.rn.f32 %f921, %f919, %f916, %f920; + selp.f32 %f922, 0fBD0C8162, 0fBCDC1BE7, %p166; + fma.rn.f32 %f923, %f921, %f916, %f922; + selp.f32 %f924, 0f3E1CF906, 0f3DE718AF, %p166; + fma.rn.f32 %f925, %f923, %f916, %f924; + selp.f32 %f926, 0f3F6A937E, 0fBEC093AC, %p166; + fma.rn.f32 %f927, %f925, %f916, %f926; + selp.f32 %f928, 0f3F20D842, 0f3E0375D3, %p166; + fma.rn.f32 %f929, %f927, %f916, %f928; + neg.f32 %f930, %f914; + selp.f32 %f931, %f930, %f129, %p166; + fma.rn.f32 %f3027, %f929, %f931, %f931; + @%p165 bra $L__BB7_62; + + mov.f32 %f2948, 0f3F800000; + ex2.approx.ftz.f32 %f932, %f3027; + sub.f32 %f934, %f2948, %f932; + mov.b32 %r276, %f934; + mov.b32 %r277, %f129; + and.b32 %r278, %r277, -2147483648; + or.b32 %r279, %r278, %r276; + mov.b32 %f3027, %r279; + +$L__BB7_62: + cvt.rn.f32.s32 %f2851, %r847; + sub.f32 %f2850, %f2851, %f3068; + add.f32 %f133, %f2850, 0fBF000000; + mul.f32 %f134, %f133, %f90; + abs.f32 %f935, %f134; + setp.ltu.f32 %p167, %f935, 0f3F8060FE; + setp.ge.f32 %p168, %f935, 0f3F8060FE; + mul.f32 %f936, %f134, %f134; + selp.f32 %f937, %f935, %f936, %p168; + selp.f32 %f938, 0f3789CA3C, 0f38B1E96A, %p168; + selp.f32 %f939, 0fB9F560B9, 0fBA574D20, %p168; + fma.rn.f32 %f940, %f938, %f937, %f939; + selp.f32 %f941, 0f3BAC840B, 0f3BAAD5EA, %p168; + fma.rn.f32 %f942, %f940, %f937, %f941; + selp.f32 %f943, 0fBD0C8162, 0fBCDC1BE7, %p168; + fma.rn.f32 %f944, %f942, %f937, %f943; + selp.f32 %f945, 0f3E1CF906, 0f3DE718AF, %p168; + fma.rn.f32 %f946, %f944, %f937, %f945; + selp.f32 %f947, 0f3F6A937E, 0fBEC093AC, %p168; + fma.rn.f32 %f948, %f946, %f937, %f947; + selp.f32 %f949, 0f3F20D842, 0f3E0375D3, %p168; + fma.rn.f32 %f950, %f948, %f937, %f949; + neg.f32 %f951, %f935; + selp.f32 %f952, %f951, %f134, %p168; + fma.rn.f32 %f3028, %f950, %f952, %f952; + @%p167 bra $L__BB7_64; + + mov.f32 %f2947, 0f3F800000; + ex2.approx.ftz.f32 %f953, %f3028; + sub.f32 %f955, %f2947, %f953; + mov.b32 %r280, %f955; + mov.b32 %r281, %f134; + and.b32 %r282, %r281, -2147483648; + or.b32 %r283, %r282, %r280; + mov.b32 %f3028, %r283; + +$L__BB7_64: + cvt.rn.f32.s32 %f2852, %r846; + sub.f32 %f957, %f3027, %f3028; + mul.f32 %f138, %f957, 0f3F000000; + mul.f32 %f958, %f125, %f3067; + fma.rn.f32 %f139, %f138, %f958, %f3066; + mad.lo.s32 %r284, %r847, %r104, %r846; + add.s32 %r285, %r284, %r2; + mul.wide.s32 %rd27, %r285, 4; + add.s64 %rd28, %rd1, %rd27; + ld.global.f32 %f140, [%rd28]; + add.f32 %f959, %f68, %f126; + fma.rn.f32 %f960, %f959, %f52, %f69; + add.f32 %f961, %f960, %f2852; + cvt.rzi.s32.f32 %r286, %f961; + mul.wide.s32 %rd30, %r286, 4; + add.s64 %rd31, %rd29, %rd30; + ld.global.f32 %f3063, [%rd31]; + setp.eq.f32 %p169, %f96, 0f7F800000; + mov.f32 %f3029, 0f7F800000; + @%p169 bra $L__BB7_66; + + fma.rn.f32 %f3029, %f96, %f95, %f96; + +$L__BB7_66: + setp.geu.f32 %p728, %f92, 0f00000000; + mov.b32 %r287, %f3029; + xor.b32 %r288, %r287, -2147483648; + mov.b32 %f962, %r288; + selp.f32 %f144, %f962, %f3029, %p4; + add.f32 %f963, %f92, %f92; + selp.f32 %f964, %f963, 0f00000000, %p110; + setp.eq.f32 %p171, %f92, 0f00000000; + selp.f32 %f3030, %f964, %f144, %p171; + @%p728 bra $L__BB7_69; + + cvt.rzi.f32.f32 %f966, %f634; + setp.eq.f32 %p172, %f966, 0f40000000; + mov.f32 %f3030, %f144; + @%p172 bra $L__BB7_69; + + mov.f32 %f3030, 0f7FFFFFFF; + +$L__BB7_69: + mov.f32 %f2855, 0f3FB8AA3B; + mov.f32 %f2854, 0f3F000000; + abs.f32 %f2853, %f92; + add.f32 %f969, %f2853, 0f40000000; + mov.b32 %r289, %f969; + setp.gt.s32 %p173, %r289, 2139095039; + add.f32 %f970, %f92, 0f40000000; + setp.gtu.f32 %p174, %f2853, 0f7F800000; + mov.f32 %f3031, 0f7F800000; + selp.f32 %f971, %f970, %f3030, %p174; + selp.f32 %f972, 0fFF800000, 0f7F800000, %p4; + setp.neu.f32 %p175, %f2853, 0f7F800000; + selp.f32 %f973, %f971, %f972, %p175; + selp.f32 %f974, %f973, %f3030, %p173; + mul.f32 %f975, %f974, 0fBF000000; + setp.eq.f32 %p176, %f92, 0f3F800000; + selp.f32 %f976, 0fBF000000, %f975, %p176; + mov.f32 %f978, 0f3BBB989D; + fma.rn.f32 %f979, %f976, %f978, %f2854; + mov.f32 %f981, 0f437C0000; + cvt.sat.f32.f32 %f982, %f979; + mov.f32 %f983, 0f4B400001; + fma.rm.f32 %f984, %f982, %f981, %f983; + add.f32 %f985, %f984, 0fCB40007F; + neg.f32 %f986, %f985; + fma.rn.f32 %f987, %f976, %f2855, %f986; + mov.f32 %f988, 0f32A57060; + fma.rn.f32 %f989, %f976, %f988, %f987; + mov.b32 %r290, %f984; + shl.b32 %r291, %r290, 23; + mov.b32 %f990, %r291; + ex2.approx.ftz.f32 %f991, %f989; + mul.f32 %f147, %f991, %f990; + setp.eq.f32 %p177, %f100, 0f7F800000; + @%p177 bra $L__BB7_71; + + fma.rn.f32 %f3031, %f100, %f99, %f100; + +$L__BB7_71: + setp.geu.f32 %p731, %f97, 0f00000000; + setp.lt.f32 %p730, %f97, 0f00000000; + and.pred %p729, %p730, %p110; + mov.b32 %r292, %f3031; + xor.b32 %r293, %r292, -2147483648; + mov.b32 %f992, %r293; + selp.f32 %f150, %f992, %f3031, %p729; + add.f32 %f993, %f97, %f97; + selp.f32 %f994, %f993, 0f00000000, %p110; + setp.eq.f32 %p179, %f97, 0f00000000; + selp.f32 %f3032, %f994, %f150, %p179; + @%p731 bra $L__BB7_74; + + cvt.rzi.f32.f32 %f996, %f634; + setp.eq.f32 %p180, %f996, 0f40000000; + mov.f32 %f3032, %f150; + @%p180 bra $L__BB7_74; + + mov.f32 %f3032, 0f7FFFFFFF; + +$L__BB7_74: + mov.f32 %f2862, 0f32A57060; + mov.f32 %f2861, 0f4B400001; + mov.f32 %f2860, 0f437C0000; + mov.f32 %f2859, 0f3BBB989D; + abs.f32 %f2858, %f97; + setp.lt.f32 %p733, %f97, 0f00000000; + and.pred %p732, %p733, %p110; + mov.f32 %f2857, 0f3FB8AA3B; + mov.f32 %f2856, 0f3F000000; + add.f32 %f998, %f2858, 0f40000000; + mov.b32 %r294, %f998; + setp.gt.s32 %p181, %r294, 2139095039; + add.f32 %f999, %f97, 0f40000000; + setp.gtu.f32 %p182, %f2858, 0f7F800000; + selp.f32 %f1000, %f999, %f3032, %p182; + selp.f32 %f1001, 0fFF800000, 0f7F800000, %p732; + setp.neu.f32 %p183, %f2858, 0f7F800000; + selp.f32 %f1002, %f1000, %f1001, %p183; + selp.f32 %f1003, %f1002, %f3032, %p181; + mul.f32 %f1004, %f1003, 0fBF000000; + setp.eq.f32 %p184, %f97, 0f3F800000; + selp.f32 %f1005, 0fBF000000, %f1004, %p184; + fma.rn.f32 %f1008, %f1005, %f2859, %f2856; + cvt.sat.f32.f32 %f1011, %f1008; + fma.rm.f32 %f1013, %f1011, %f2860, %f2861; + add.f32 %f1014, %f1013, 0fCB40007F; + neg.f32 %f1015, %f1014; + fma.rn.f32 %f1016, %f1005, %f2857, %f1015; + fma.rn.f32 %f1018, %f1005, %f2862, %f1016; + mov.b32 %r295, %f1013; + shl.b32 %r296, %r295, 23; + mov.b32 %f1019, %r296; + ex2.approx.ftz.f32 %f1020, %f1018; + mul.f32 %f153, %f1020, %f1019; + sub.f32 %f1021, %f147, %f153; + mul.f32 %f1022, %f62, %f1021; + mul.f32 %f154, %f138, %f1022; + not.pred %p185, %p6; + mov.f64 %fd570, %fd36; + @%p185 bra $L__BB7_76; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r297}, %fd36; + } + xor.b32 %r298, %r297, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r299, %temp}, %fd36; + } + mov.b64 %fd570, {%r299, %r298}; + +$L__BB7_76: + setp.eq.f32 %p186, %f3065, 0f00000000; + @%p186 bra $L__BB7_80; + bra.uni $L__BB7_77; + +$L__BB7_80: + mov.u32 %r300, 0; + selp.b32 %r302, %r49, 0, %p121; + or.b32 %r303, %r302, 2146435072; + selp.b32 %r304, %r303, %r302, %p123; + mov.b64 %fd570, {%r300, %r304}; + bra.uni $L__BB7_81; + +$L__BB7_77: + setp.gt.s32 %p187, %r49, -1; + @%p187 bra $L__BB7_81; + + cvt.rzi.f64.f64 %fd352, %fd339; + setp.eq.f64 %p188, %fd352, 0d4008000000000000; + @%p188 bra $L__BB7_81; + + mov.f64 %fd570, 0dFFF8000000000000; + +$L__BB7_81: + cvt.f64.f32 %fd550, %f3065; + add.f64 %fd549, %fd550, 0d4008000000000000; + selp.f64 %fd571, %fd570, %fd549, %p124; + @%p16 bra $L__BB7_86; + + setp.eq.s32 %p192, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r305, %temp}, %fd339; + } + setp.eq.s32 %p193, %r305, 0; + and.pred %p194, %p192, %p193; + @%p194 bra $L__BB7_85; + bra.uni $L__BB7_83; + +$L__BB7_85: + mov.u32 %r312, 0; + mov.b64 %fd571, {%r312, %r54}; + bra.uni $L__BB7_86; + +$L__BB7_83: + cvt.f64.f32 %fd551, %f3065; + and.b32 %r306, %r49, 2147483647; + setp.ne.s32 %p195, %r306, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r307, %temp}, %fd551; + } + setp.ne.s32 %p196, %r307, 0; + or.pred %p197, %p195, %p196; + mov.f64 %fd571, %fd570; + @%p197 bra $L__BB7_86; + + setp.ne.s32 %p198, %r53, 1071644672; + and.pred %p199, %p198, %p6; + or.b32 %r309, %r55, -2147483648; + selp.b32 %r310, %r309, %r55, %p199; + mov.u32 %r311, 0; + mov.b64 %fd571, {%r311, %r310}; + +$L__BB7_86: + mov.f32 %f2873, 0f3102E308; + mov.f32 %f2872, 0fBF317218; + mov.f32 %f2871, 0f35BFBE8E; + mov.f32 %f2870, 0f3F317200; + mov.f32 %f2869, 0f3DAAAABD; + mov.f32 %f2868, 0f3C4CAF63; + mov.f32 %f2867, 0f3B18F0FE; + cvt.rn.f32.s32 %f2866, %r846; + add.f32 %f2865, %f2866, 0f3F000000; + sub.f32 %f2864, %f2865, %f3069; + mov.f32 %f2863, 0f3FB8AA3B; + setp.eq.f32 %p200, %f3065, 0f3F800000; + selp.f64 %fd358, 0d3FF0000000000000, %fd571, %p200; + div.rn.f64 %fd359, %fd34, %fd358; + mul.f32 %f1024, %f87, %f153; + mul.f32 %f1025, %f2864, %f147; + sub.f32 %f1026, %f1025, %f1024; + cvt.f64.f32 %fd360, %f1026; + mul.f64 %fd361, %fd359, %fd360; + cvt.f64.f32 %fd54, %f138; + mul.f64 %fd362, %fd361, %fd54; + cvt.rn.f32.f64 %f155, %fd362; + add.f32 %f1027, %f126, 0f3F000000; + sub.f32 %f156, %f1027, %f3068; + div.rn.f32 %f157, %f156, %f3064; + abs.f32 %f158, %f157; + setp.lt.f32 %p201, %f158, 0f00800000; + mul.f32 %f1028, %f158, 0f4B800000; + selp.f32 %f1029, %f1028, %f158, %p201; + selp.f32 %f1030, 0fC3170000, 0fC2FE0000, %p201; + mov.b32 %r313, %f1029; + and.b32 %r314, %r313, 8388607; + or.b32 %r315, %r314, 1065353216; + mov.b32 %f1031, %r315; + shr.u32 %r316, %r313, 23; + cvt.rn.f32.u32 %f1032, %r316; + add.f32 %f1033, %f1030, %f1032; + setp.gt.f32 %p202, %f1031, 0f3FB504F3; + mul.f32 %f1034, %f1031, 0f3F000000; + add.f32 %f1035, %f1033, 0f3F800000; + selp.f32 %f1036, %f1035, %f1033, %p202; + selp.f32 %f1037, %f1034, %f1031, %p202; + add.f32 %f1038, %f1037, 0fBF800000; + add.f32 %f1039, %f1037, 0f3F800000; + rcp.approx.ftz.f32 %f1040, %f1039; + add.f32 %f1041, %f1038, %f1038; + mul.f32 %f1043, %f1041, %f1040; + mul.f32 %f1044, %f1043, %f1043; + fma.rn.f32 %f1047, %f2867, %f1044, %f2868; + fma.rn.f32 %f1049, %f1047, %f1044, %f2869; + mul.rn.f32 %f1050, %f1049, %f1044; + mul.rn.f32 %f1051, %f1050, %f1043; + sub.f32 %f1052, %f1038, %f1043; + add.f32 %f1053, %f1052, %f1052; + neg.f32 %f1054, %f1043; + fma.rn.f32 %f1055, %f1054, %f1038, %f1053; + mul.rn.f32 %f1056, %f1040, %f1055; + add.f32 %f1057, %f1051, %f1043; + sub.f32 %f1058, %f1043, %f1057; + add.f32 %f1059, %f1051, %f1058; + add.f32 %f1060, %f1056, %f1059; + add.f32 %f1061, %f1057, %f1060; + sub.f32 %f1062, %f1057, %f1061; + add.f32 %f1063, %f1060, %f1062; + mul.rn.f32 %f1065, %f1036, %f2870; + mul.rn.f32 %f1067, %f1036, %f2871; + add.f32 %f1068, %f1065, %f1061; + sub.f32 %f1069, %f1065, %f1068; + add.f32 %f1070, %f1061, %f1069; + add.f32 %f1071, %f1063, %f1070; + add.f32 %f1072, %f1067, %f1071; + add.f32 %f1073, %f1068, %f1072; + sub.f32 %f1074, %f1068, %f1073; + add.f32 %f1075, %f1072, %f1074; + mul.rn.f32 %f1076, %f634, %f1073; + neg.f32 %f1077, %f1076; + fma.rn.f32 %f1078, %f634, %f1073, %f1077; + fma.rn.f32 %f1079, %f634, %f1075, %f1078; + mov.f32 %f1080, 0f00000000; + fma.rn.f32 %f1081, %f1080, %f1073, %f1079; + add.rn.f32 %f1082, %f1076, %f1081; + neg.f32 %f1083, %f1082; + add.rn.f32 %f1084, %f1076, %f1083; + add.rn.f32 %f1085, %f1084, %f1081; + mov.b32 %r317, %f1082; + setp.eq.s32 %p203, %r317, 1118925336; + add.s32 %r318, %r317, -1; + mov.b32 %f1086, %r318; + add.f32 %f1087, %f1085, 0f37000000; + selp.f32 %f159, %f1087, %f1085, %p203; + selp.f32 %f1088, %f1086, %f1082, %p203; + mul.rn.f32 %f1090, %f1088, %f2863; + cvt.rzi.f32.f32 %f1091, %f1090; + abs.f32 %f1092, %f1091; + setp.gt.f32 %p204, %f1092, 0f42FC0000; + mov.b32 %r319, %f1091; + and.b32 %r320, %r319, -2147483648; + or.b32 %r321, %r320, 1123811328; + mov.b32 %f1093, %r321; + selp.f32 %f1094, %f1093, %f1091, %p204; + fma.rn.f32 %f1096, %f1094, %f2872, %f1088; + fma.rn.f32 %f1098, %f1094, %f2873, %f1096; + mul.f32 %f1099, %f1098, 0f3FB8AA3B; + add.f32 %f1100, %f1094, 0f4B40007F; + mov.b32 %r322, %f1100; + shl.b32 %r323, %r322, 23; + mov.b32 %f1101, %r323; + ex2.approx.ftz.f32 %f1102, %f1099; + mul.f32 %f160, %f1102, %f1101; + setp.eq.f32 %p205, %f160, 0f7F800000; + mov.f32 %f3033, 0f7F800000; + @%p205 bra $L__BB7_88; + + fma.rn.f32 %f3033, %f160, %f159, %f160; + +$L__BB7_88: + setp.lt.f32 %p206, %f157, 0f00000000; + and.pred %p24, %p206, %p110; + setp.eq.f32 %p208, %f157, 0f00000000; + @%p208 bra $L__BB7_92; + bra.uni $L__BB7_89; + +$L__BB7_92: + add.f32 %f1107, %f157, %f157; + selp.f32 %f3035, %f1107, 0f00000000, %p110; + bra.uni $L__BB7_93; + +$L__BB7_89: + mov.b32 %r324, %f3033; + xor.b32 %r325, %r324, -2147483648; + mov.b32 %f1103, %r325; + selp.f32 %f3035, %f1103, %f3033, %p24; + setp.geu.f32 %p209, %f157, 0f00000000; + @%p209 bra $L__BB7_93; + + cvt.rzi.f32.f32 %f1105, %f634; + setp.eq.f32 %p210, %f1105, 0f40000000; + @%p210 bra $L__BB7_93; + + mov.f32 %f3035, 0f7FFFFFFF; + +$L__BB7_93: + abs.f32 %f2953, %f157; + add.f32 %f1108, %f2953, 0f40000000; + mov.b32 %r326, %f1108; + setp.lt.s32 %p212, %r326, 2139095040; + @%p212 bra $L__BB7_98; + + abs.f32 %f2958, %f157; + setp.gtu.f32 %p213, %f2958, 0f7F800000; + @%p213 bra $L__BB7_97; + bra.uni $L__BB7_95; + +$L__BB7_97: + add.f32 %f3035, %f157, 0f40000000; + bra.uni $L__BB7_98; + +$L__BB7_95: + abs.f32 %f2959, %f157; + setp.neu.f32 %p214, %f2959, 0f7F800000; + @%p214 bra $L__BB7_98; + + selp.f32 %f3035, 0fFF800000, 0f7F800000, %p24; + +$L__BB7_98: + mov.f32 %f2887, 0f00000000; + mov.f32 %f2886, 0f3102E308; + mov.f32 %f2885, 0fBF317218; + mov.f32 %f2884, 0f35BFBE8E; + mov.f32 %f2883, 0f3F317200; + mov.f32 %f2882, 0f3DAAAABD; + mov.f32 %f2881, 0f3C4CAF63; + mov.f32 %f2880, 0f3B18F0FE; + mov.f32 %f2879, 0f32A57060; + mov.f32 %f2878, 0f4B400001; + mov.f32 %f2877, 0f437C0000; + mov.f32 %f2876, 0f3BBB989D; + mov.f32 %f2875, 0f3FB8AA3B; + mov.f32 %f2874, 0f3F000000; + mul.f32 %f1110, %f3035, 0fBF000000; + setp.eq.f32 %p215, %f157, 0f3F800000; + selp.f32 %f1111, 0fBF000000, %f1110, %p215; + fma.rn.f32 %f1114, %f1111, %f2876, %f2874; + cvt.sat.f32.f32 %f1117, %f1114; + fma.rm.f32 %f1119, %f1117, %f2877, %f2878; + add.f32 %f1120, %f1119, 0fCB40007F; + neg.f32 %f1121, %f1120; + fma.rn.f32 %f1122, %f1111, %f2875, %f1121; + fma.rn.f32 %f1124, %f1111, %f2879, %f1122; + mov.b32 %r327, %f1119; + shl.b32 %r328, %r327, 23; + mov.b32 %f1125, %r328; + ex2.approx.ftz.f32 %f1126, %f1124; + mul.f32 %f169, %f1126, %f1125; + div.rn.f32 %f170, %f133, %f3064; + abs.f32 %f171, %f170; + setp.lt.f32 %p216, %f171, 0f00800000; + mul.f32 %f1127, %f171, 0f4B800000; + selp.f32 %f1128, %f1127, %f171, %p216; + selp.f32 %f1129, 0fC3170000, 0fC2FE0000, %p216; + mov.b32 %r329, %f1128; + and.b32 %r330, %r329, 8388607; + or.b32 %r331, %r330, 1065353216; + mov.b32 %f1130, %r331; + shr.u32 %r332, %r329, 23; + cvt.rn.f32.u32 %f1131, %r332; + add.f32 %f1132, %f1129, %f1131; + setp.gt.f32 %p217, %f1130, 0f3FB504F3; + mul.f32 %f1133, %f1130, 0f3F000000; + add.f32 %f1134, %f1132, 0f3F800000; + selp.f32 %f1135, %f1134, %f1132, %p217; + selp.f32 %f1136, %f1133, %f1130, %p217; + add.f32 %f1137, %f1136, 0fBF800000; + add.f32 %f1138, %f1136, 0f3F800000; + rcp.approx.ftz.f32 %f1139, %f1138; + add.f32 %f1140, %f1137, %f1137; + mul.f32 %f1142, %f1140, %f1139; + mul.f32 %f1143, %f1142, %f1142; + fma.rn.f32 %f1146, %f2880, %f1143, %f2881; + fma.rn.f32 %f1148, %f1146, %f1143, %f2882; + mul.rn.f32 %f1149, %f1148, %f1143; + mul.rn.f32 %f1150, %f1149, %f1142; + sub.f32 %f1151, %f1137, %f1142; + add.f32 %f1152, %f1151, %f1151; + neg.f32 %f1153, %f1142; + fma.rn.f32 %f1154, %f1153, %f1137, %f1152; + mul.rn.f32 %f1155, %f1139, %f1154; + add.f32 %f1156, %f1150, %f1142; + sub.f32 %f1157, %f1142, %f1156; + add.f32 %f1158, %f1150, %f1157; + add.f32 %f1159, %f1155, %f1158; add.f32 %f1160, %f1156, %f1159; - add.f32 %f1161, %f1157, %f1160; - sub.f32 %f1162, %f1157, %f1161; - add.f32 %f1163, %f1160, %f1162; - add.f32 %f1164, %f147, %f1161; - sub.f32 %f1165, %f147, %f1164; - add.f32 %f1166, %f1161, %f1165; - add.f32 %f1167, %f1163, %f1166; - add.f32 %f1168, %f148, %f1167; - add.f32 %f1169, %f1164, %f1168; - sub.f32 %f1170, %f1164, %f1169; - add.f32 %f1171, %f1168, %f1170; - mul.rn.f32 %f1173, %f908, %f1169; - neg.f32 %f1174, %f1173; - fma.rn.f32 %f1175, %f908, %f1169, %f1174; - fma.rn.f32 %f1176, %f908, %f1171, %f1175; - fma.rn.f32 %f1178, %f3065, %f1169, %f1176; - add.rn.f32 %f1179, %f1173, %f1178; - neg.f32 %f1180, %f1179; - add.rn.f32 %f1181, %f1173, %f1180; - add.rn.f32 %f1182, %f1181, %f1178; - mov.b32 %r165, %f1179; - setp.eq.s32 %p82, %r165, 1118925336; - add.s32 %r166, %r165, -1; - mov.b32 %f1183, %r166; - add.f32 %f1184, %f1182, 0f37000000; - selp.f32 %f1185, %f1183, %f1179, %p82; - selp.f32 %f197, %f1184, %f1182, %p82; - mul.f32 %f1186, %f1185, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1187, %f1186; - fma.rn.f32 %f1188, %f1187, %f3060, %f1185; - fma.rn.f32 %f1189, %f1187, %f3061, %f1188; - mul.f32 %f1190, %f1189, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1191, %f1190; - add.f32 %f1192, %f1187, 0f00000000; - ex2.approx.f32 %f1193, %f1192; - mul.f32 %f1194, %f1191, %f1193; - setp.lt.f32 %p83, %f1185, 0fC2D20000; - selp.f32 %f1195, 0f00000000, %f1194, %p83; - setp.gt.f32 %p84, %f1185, 0f42D20000; - selp.f32 %f3305, 0f7F800000, %f1195, %p84; - setp.eq.f32 %p85, %f3305, 0f7F800000; - @%p85 bra BB7_60; - - fma.rn.f32 %f3305, %f3305, %f197, %f3305; - -BB7_60: - setp.geu.f32 %p351, %f142, 0f00000000; - mov.b32 %r167, %f3305; - xor.b32 %r168, %r167, -2147483648; - mov.b32 %f1196, %r168; - selp.f32 %f201, %f1196, %f3305, %p2; - setp.eq.f32 %p86, %f142, 0f00000000; - selp.f32 %f3306, %f149, %f201, %p86; - @%p351 bra BB7_62; - - cvt.rzi.f32.f32 %f1198, %f908; - setp.neu.f32 %p87, %f1198, 0f40000000; - selp.f32 %f3306, 0f7FFFFFFF, %f201, %p87; - -BB7_62: - abs.f32 %f3082, %f142; - add.f32 %f3081, %f3082, 0f40000000; - mov.b32 %r300, %f3081; - cvt.rn.f32.s32 %f3080, %r328; - cvt.rn.f32.s32 %f3079, %r329; - mov.f32 %f3078, 0f35BFBE8E; - mov.f32 %f3077, 0f3F317200; - add.f32 %f3076, %f3080, 0f3F800000; - sub.f32 %f3075, %f3076, %f3343; - sub.f32 %f3074, %f3080, %f3343; - mov.f32 %f3073, 0f00000000; - mov.f32 %f3072, 0f3DAAAABD; - mov.f32 %f3071, 0f3C4CAF63; - mov.f32 %f3070, 0f3B18F0FE; - mov.f32 %f3069, 0fB5BFBE8E; - mov.f32 %f3068, 0fBF317200; - add.f32 %f1201, %f142, 0f40000000; - setp.gtu.f32 %p88, %f3082, 0f7F800000; - selp.f32 %f1202, %f1201, %f3306, %p88; - selp.f32 %f1203, 0fFF800000, 0f7F800000, %p2; - setp.neu.f32 %p89, %f3082, 0f7F800000; - selp.f32 %f1204, %f1202, %f1203, %p89; - setp.gt.s32 %p90, %r300, 2139095039; - selp.f32 %f1205, %f1204, %f3306, %p90; - mul.f32 %f1206, %f1205, 0fBF000000; - setp.eq.f32 %p91, %f142, 0f3F800000; - selp.f32 %f1207, 0fBF000000, %f1206, %p91; - mul.f32 %f1208, %f1207, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1209, %f1208; - fma.rn.f32 %f1211, %f1209, %f3068, %f1207; - fma.rn.f32 %f1213, %f1209, %f3069, %f1211; - mul.f32 %f1214, %f1213, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1215, %f1214; - add.f32 %f1216, %f1209, 0f00000000; - ex2.approx.f32 %f1217, %f1216; - mul.f32 %f1218, %f1215, %f1217; - setp.lt.f32 %p92, %f1207, 0fC2D20000; - selp.f32 %f1219, 0f00000000, %f1218, %p92; - setp.gt.f32 %p93, %f1207, 0f42D20000; - selp.f32 %f1220, 0f7F800000, %f1219, %p93; - sub.f32 %f1221, %f196, %f1220; - mul.f32 %f1222, %f98, %f1221; - mul.f32 %f205, %f184, %f1222; - mul.f32 %f1223, %f3074, %f1220; - mul.f32 %f1224, %f3075, %f196; - sub.f32 %f1225, %f1224, %f1223; - mul.f32 %f1226, %f1225, %f104; - mul.f32 %f206, %f184, %f1226; - add.f32 %f1227, %f3079, 0f3F800000; - sub.f32 %f207, %f1227, %f3342; - div.rn.f32 %f208, %f207, %f3338; - abs.f32 %f209, %f208; - setp.lt.f32 %p94, %f209, 0f00800000; - mul.f32 %f1228, %f209, 0f4B800000; - selp.f32 %f1229, 0fC3170000, 0fC2FE0000, %p94; - selp.f32 %f1230, %f1228, %f209, %p94; - mov.b32 %r169, %f1230; - and.b32 %r170, %r169, 8388607; - or.b32 %r171, %r170, 1065353216; - mov.b32 %f1231, %r171; - shr.u32 %r172, %r169, 23; - cvt.rn.f32.u32 %f1232, %r172; - add.f32 %f1233, %f1229, %f1232; - setp.gt.f32 %p95, %f1231, 0f3FB504F3; - mul.f32 %f1234, %f1231, 0f3F000000; - add.f32 %f1235, %f1233, 0f3F800000; - selp.f32 %f1236, %f1234, %f1231, %p95; - selp.f32 %f1237, %f1235, %f1233, %p95; - add.f32 %f210, %f1236, 0fBF800000; - add.f32 %f1200, %f1236, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1199,%f1200; - // inline asm - add.f32 %f212, %f210, %f210; - mul.f32 %f1238, %f1199, %f212; - mul.f32 %f1239, %f1238, %f1238; - fma.rn.f32 %f1242, %f3070, %f1239, %f3071; - fma.rn.f32 %f1244, %f1242, %f1239, %f3072; - mul.rn.f32 %f1245, %f1244, %f1239; - mul.rn.f32 %f1246, %f1245, %f1238; - sub.f32 %f1247, %f210, %f1238; - neg.f32 %f1248, %f1238; - add.f32 %f1249, %f1247, %f1247; - fma.rn.f32 %f1250, %f1248, %f210, %f1249; - mul.rn.f32 %f1251, %f1199, %f1250; - add.f32 %f1252, %f1246, %f1238; - sub.f32 %f1253, %f1238, %f1252; - add.f32 %f1254, %f1246, %f1253; - add.f32 %f1255, %f1251, %f1254; - add.f32 %f1256, %f1252, %f1255; - sub.f32 %f1257, %f1252, %f1256; - add.f32 %f1258, %f1255, %f1257; - mul.rn.f32 %f213, %f1237, %f3077; - mul.rn.f32 %f214, %f1237, %f3078; - add.f32 %f1261, %f213, %f1256; - sub.f32 %f1262, %f213, %f1261; - add.f32 %f1263, %f1256, %f1262; - add.f32 %f1264, %f1258, %f1263; - add.f32 %f1265, %f214, %f1264; - add.f32 %f1266, %f1261, %f1265; - sub.f32 %f1267, %f1261, %f1266; - add.f32 %f1268, %f1265, %f1267; - mul.rn.f32 %f1270, %f908, %f1266; - neg.f32 %f1271, %f1270; - fma.rn.f32 %f1272, %f908, %f1266, %f1271; - fma.rn.f32 %f1273, %f908, %f1268, %f1272; - fma.rn.f32 %f1275, %f3073, %f1266, %f1273; - add.rn.f32 %f1276, %f1270, %f1275; - neg.f32 %f1277, %f1276; - add.rn.f32 %f1278, %f1270, %f1277; - add.rn.f32 %f1279, %f1278, %f1275; - mov.b32 %r173, %f1276; - setp.eq.s32 %p96, %r173, 1118925336; - add.s32 %r174, %r173, -1; - mov.b32 %f1280, %r174; - add.f32 %f1281, %f1279, 0f37000000; - selp.f32 %f1282, %f1280, %f1276, %p96; - selp.f32 %f215, %f1281, %f1279, %p96; - mul.f32 %f1283, %f1282, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1284, %f1283; - fma.rn.f32 %f1285, %f1284, %f3068, %f1282; - fma.rn.f32 %f1286, %f1284, %f3069, %f1285; - mul.f32 %f1287, %f1286, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1288, %f1287; - add.f32 %f1289, %f1284, 0f00000000; - ex2.approx.f32 %f1290, %f1289; - mul.f32 %f1291, %f1288, %f1290; - setp.lt.f32 %p97, %f1282, 0fC2D20000; - selp.f32 %f1292, 0f00000000, %f1291, %p97; - setp.gt.f32 %p98, %f1282, 0f42D20000; - selp.f32 %f3307, 0f7F800000, %f1292, %p98; - setp.eq.f32 %p99, %f3307, 0f7F800000; - @%p99 bra BB7_64; - - fma.rn.f32 %f3307, %f3307, %f215, %f3307; - -BB7_64: - setp.lt.f32 %p100, %f208, 0f00000000; - and.pred %p5, %p100, %p55; - mov.b32 %r175, %f3307; - xor.b32 %r176, %r175, -2147483648; - mov.b32 %f1293, %r176; - selp.f32 %f3309, %f1293, %f3307, %p5; - setp.eq.f32 %p102, %f208, 0f00000000; - @%p102 bra BB7_67; - bra.uni BB7_65; - -BB7_67: - add.f32 %f1296, %f208, %f208; - selp.f32 %f3309, %f1296, 0f00000000, %p55; - bra.uni BB7_68; - -BB7_65: - setp.geu.f32 %p103, %f208, 0f00000000; - @%p103 bra BB7_68; - - cvt.rzi.f32.f32 %f1295, %f908; - setp.neu.f32 %p104, %f1295, 0f40000000; - selp.f32 %f3309, 0f7FFFFFFF, %f3309, %p104; - -BB7_68: - abs.f32 %f3083, %f208; - add.f32 %f1297, %f3083, 0f40000000; - mov.b32 %r36, %f1297; - setp.lt.s32 %p106, %r36, 2139095040; - @%p106 bra BB7_73; - - abs.f32 %f3092, %f208; - setp.gtu.f32 %p107, %f3092, 0f7F800000; - @%p107 bra BB7_72; - bra.uni BB7_70; - -BB7_72: - add.f32 %f3309, %f208, 0f40000000; - bra.uni BB7_73; - -BB7_70: - abs.f32 %f3093, %f208; - setp.neu.f32 %p108, %f3093, 0f7F800000; - @%p108 bra BB7_73; - - selp.f32 %f3309, 0fFF800000, 0f7F800000, %p5; - -BB7_73: - cvt.rn.f32.s32 %f3105, %r329; - sub.f32 %f3104, %f3105, %f3342; - mov.f32 %f3091, 0f35BFBE8E; - mov.f32 %f3090, 0f3F317200; - mov.f32 %f3089, 0f00000000; - mov.f32 %f3088, 0f3DAAAABD; - mov.f32 %f3087, 0f3C4CAF63; - mov.f32 %f3086, 0f3B18F0FE; - mov.f32 %f3085, 0fB5BFBE8E; - mov.f32 %f3084, 0fBF317200; - mul.f32 %f1300, %f3309, 0fBF000000; - setp.eq.f32 %p109, %f208, 0f3F800000; - selp.f32 %f1301, 0fBF000000, %f1300, %p109; - mul.f32 %f1302, %f1301, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1303, %f1302; - fma.rn.f32 %f1305, %f1303, %f3084, %f1301; - fma.rn.f32 %f1307, %f1303, %f3085, %f1305; - mul.f32 %f1308, %f1307, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1309, %f1308; - add.f32 %f1310, %f1303, 0f00000000; - ex2.approx.f32 %f1311, %f1310; - mul.f32 %f1312, %f1309, %f1311; - setp.lt.f32 %p110, %f1301, 0fC2D20000; - selp.f32 %f1313, 0f00000000, %f1312, %p110; - setp.gt.f32 %p111, %f1301, 0f42D20000; - selp.f32 %f226, 0f7F800000, %f1313, %p111; - div.rn.f32 %f227, %f3104, %f3338; - abs.f32 %f228, %f227; - setp.lt.f32 %p112, %f228, 0f00800000; - mul.f32 %f1314, %f228, 0f4B800000; - selp.f32 %f1315, 0fC3170000, 0fC2FE0000, %p112; - selp.f32 %f1316, %f1314, %f228, %p112; - mov.b32 %r177, %f1316; - and.b32 %r178, %r177, 8388607; - or.b32 %r179, %r178, 1065353216; - mov.b32 %f1317, %r179; - shr.u32 %r180, %r177, 23; - cvt.rn.f32.u32 %f1318, %r180; - add.f32 %f1319, %f1315, %f1318; - setp.gt.f32 %p113, %f1317, 0f3FB504F3; - mul.f32 %f1320, %f1317, 0f3F000000; - add.f32 %f1321, %f1319, 0f3F800000; - selp.f32 %f1322, %f1320, %f1317, %p113; - selp.f32 %f1323, %f1321, %f1319, %p113; - add.f32 %f229, %f1322, 0fBF800000; - add.f32 %f1299, %f1322, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1298,%f1299; - // inline asm - add.f32 %f231, %f229, %f229; - mul.f32 %f1324, %f1298, %f231; - mul.f32 %f1325, %f1324, %f1324; - fma.rn.f32 %f1328, %f3086, %f1325, %f3087; - fma.rn.f32 %f1330, %f1328, %f1325, %f3088; - mul.rn.f32 %f1331, %f1330, %f1325; - mul.rn.f32 %f1332, %f1331, %f1324; - sub.f32 %f1333, %f229, %f1324; - neg.f32 %f1334, %f1324; - add.f32 %f1335, %f1333, %f1333; - fma.rn.f32 %f1336, %f1334, %f229, %f1335; - mul.rn.f32 %f1337, %f1298, %f1336; - add.f32 %f1338, %f1332, %f1324; - sub.f32 %f1339, %f1324, %f1338; - add.f32 %f1340, %f1332, %f1339; - add.f32 %f1341, %f1337, %f1340; - add.f32 %f1342, %f1338, %f1341; - sub.f32 %f1343, %f1338, %f1342; - add.f32 %f1344, %f1341, %f1343; - mul.rn.f32 %f232, %f1323, %f3090; - mul.rn.f32 %f233, %f1323, %f3091; - add.f32 %f1347, %f232, %f1342; - sub.f32 %f1348, %f232, %f1347; - add.f32 %f1349, %f1342, %f1348; - add.f32 %f1350, %f1344, %f1349; - add.f32 %f1351, %f233, %f1350; - add.f32 %f1352, %f1347, %f1351; - sub.f32 %f1353, %f1347, %f1352; - add.f32 %f1354, %f1351, %f1353; - mul.rn.f32 %f1356, %f908, %f1352; - neg.f32 %f1357, %f1356; - fma.rn.f32 %f1358, %f908, %f1352, %f1357; - fma.rn.f32 %f1359, %f908, %f1354, %f1358; - fma.rn.f32 %f1361, %f3089, %f1352, %f1359; - add.rn.f32 %f1362, %f1356, %f1361; - neg.f32 %f1363, %f1362; - add.rn.f32 %f1364, %f1356, %f1363; - add.rn.f32 %f1365, %f1364, %f1361; - mov.b32 %r181, %f1362; - setp.eq.s32 %p114, %r181, 1118925336; - add.s32 %r182, %r181, -1; - mov.b32 %f1366, %r182; - add.f32 %f1367, %f1365, 0f37000000; - selp.f32 %f1368, %f1366, %f1362, %p114; - selp.f32 %f234, %f1367, %f1365, %p114; + sub.f32 %f1161, %f1156, %f1160; + add.f32 %f1162, %f1159, %f1161; + mul.rn.f32 %f1164, %f1135, %f2883; + mul.rn.f32 %f1166, %f1135, %f2884; + add.f32 %f1167, %f1164, %f1160; + sub.f32 %f1168, %f1164, %f1167; + add.f32 %f1169, %f1160, %f1168; + add.f32 %f1170, %f1162, %f1169; + add.f32 %f1171, %f1166, %f1170; + add.f32 %f1172, %f1167, %f1171; + sub.f32 %f1173, %f1167, %f1172; + add.f32 %f1174, %f1171, %f1173; + mul.rn.f32 %f1175, %f634, %f1172; + neg.f32 %f1176, %f1175; + fma.rn.f32 %f1177, %f634, %f1172, %f1176; + fma.rn.f32 %f1178, %f634, %f1174, %f1177; + fma.rn.f32 %f1180, %f2887, %f1172, %f1178; + add.rn.f32 %f1181, %f1175, %f1180; + neg.f32 %f1182, %f1181; + add.rn.f32 %f1183, %f1175, %f1182; + add.rn.f32 %f1184, %f1183, %f1180; + mov.b32 %r333, %f1181; + setp.eq.s32 %p218, %r333, 1118925336; + add.s32 %r334, %r333, -1; + mov.b32 %f1185, %r334; + add.f32 %f1186, %f1184, 0f37000000; + selp.f32 %f172, %f1186, %f1184, %p218; + selp.f32 %f1187, %f1185, %f1181, %p218; + mul.rn.f32 %f1188, %f1187, %f2875; + cvt.rzi.f32.f32 %f1189, %f1188; + abs.f32 %f1190, %f1189; + setp.gt.f32 %p219, %f1190, 0f42FC0000; + mov.b32 %r335, %f1189; + and.b32 %r336, %r335, -2147483648; + or.b32 %r337, %r336, 1123811328; + mov.b32 %f1191, %r337; + selp.f32 %f1192, %f1191, %f1189, %p219; + fma.rn.f32 %f1194, %f1192, %f2885, %f1187; + fma.rn.f32 %f1196, %f1192, %f2886, %f1194; + mul.f32 %f1197, %f1196, 0f3FB8AA3B; + add.f32 %f1198, %f1192, 0f4B40007F; + mov.b32 %r338, %f1198; + shl.b32 %r339, %r338, 23; + mov.b32 %f1199, %r339; + ex2.approx.ftz.f32 %f1200, %f1197; + mul.f32 %f173, %f1200, %f1199; + setp.eq.f32 %p220, %f173, 0f7F800000; + mov.f32 %f3036, 0f7F800000; + @%p220 bra $L__BB7_100; + + fma.rn.f32 %f3036, %f173, %f172, %f173; + +$L__BB7_100: + setp.lt.f32 %p221, %f170, 0f00000000; + and.pred %p25, %p221, %p110; + setp.eq.f32 %p223, %f170, 0f00000000; + @%p223 bra $L__BB7_104; + bra.uni $L__BB7_101; + +$L__BB7_104: + add.f32 %f1205, %f170, %f170; + selp.f32 %f3038, %f1205, 0f00000000, %p110; + bra.uni $L__BB7_105; + +$L__BB7_101: + mov.b32 %r340, %f3036; + xor.b32 %r341, %r340, -2147483648; + mov.b32 %f1201, %r341; + selp.f32 %f3038, %f1201, %f3036, %p25; + setp.geu.f32 %p224, %f170, 0f00000000; + @%p224 bra $L__BB7_105; + + cvt.rzi.f32.f32 %f1203, %f634; + setp.eq.f32 %p225, %f1203, 0f40000000; + @%p225 bra $L__BB7_105; + + mov.f32 %f3038, 0f7FFFFFFF; + +$L__BB7_105: + abs.f32 %f2960, %f170; + add.f32 %f1206, %f2960, 0f40000000; + mov.b32 %r342, %f1206; + setp.lt.s32 %p227, %r342, 2139095040; + @%p227 bra $L__BB7_110; + + abs.f32 %f2961, %f170; + setp.gtu.f32 %p228, %f2961, 0f7F800000; + @%p228 bra $L__BB7_109; + bra.uni $L__BB7_107; + +$L__BB7_109: + add.f32 %f3038, %f170, 0f40000000; + bra.uni $L__BB7_110; + +$L__BB7_107: + abs.f32 %f2962, %f170; + setp.neu.f32 %p229, %f2962, 0f7F800000; + @%p229 bra $L__BB7_110; + + selp.f32 %f3038, 0fFF800000, 0f7F800000, %p25; + +$L__BB7_110: + mov.f32 %f2893, 0f32A57060; + mov.f32 %f2892, 0f4B400001; + mov.f32 %f2891, 0f437C0000; + mov.f32 %f2890, 0f3BBB989D; + mov.f32 %f2889, 0f3FB8AA3B; + mov.f32 %f2888, 0f3F000000; + mul.f32 %f1207, %f3038, 0fBF000000; + setp.eq.f32 %p230, %f170, 0f3F800000; + selp.f32 %f1208, 0fBF000000, %f1207, %p230; + fma.rn.f32 %f1211, %f1208, %f2890, %f2888; + cvt.sat.f32.f32 %f1214, %f1211; + fma.rm.f32 %f1216, %f1214, %f2891, %f2892; + add.f32 %f1217, %f1216, 0fCB40007F; + neg.f32 %f1218, %f1217; + fma.rn.f32 %f1219, %f1208, %f2889, %f1218; + fma.rn.f32 %f1221, %f1208, %f2893, %f1219; + mov.b32 %r343, %f1216; + shl.b32 %r344, %r343, 23; + mov.b32 %f1222, %r344; + ex2.approx.ftz.f32 %f1223, %f1221; + mul.f32 %f182, %f1223, %f1222; + sub.f32 %f1224, %f169, %f182; + mul.f32 %f1225, %f63, %f1224; + mul.f32 %f183, %f125, %f1225; + not.pred %p231, %p7; + mov.f64 %fd573, %fd37; + @%p231 bra $L__BB7_112; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r345}, %fd37; + } + xor.b32 %r346, %r345, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r347, %temp}, %fd37; + } + mov.b64 %fd573, {%r347, %r346}; + +$L__BB7_112: + setp.eq.f32 %p232, %f3064, 0f00000000; + @%p232 bra $L__BB7_116; + bra.uni $L__BB7_113; + +$L__BB7_116: + mov.u32 %r348, 0; + selp.b32 %r350, %r56, 0, %p121; + or.b32 %r351, %r350, 2146435072; + selp.b32 %r352, %r351, %r350, %p123; + mov.b64 %fd573, {%r348, %r352}; + bra.uni $L__BB7_117; + +$L__BB7_113: + setp.gt.s32 %p233, %r56, -1; + @%p233 bra $L__BB7_117; + + cvt.rzi.f64.f64 %fd364, %fd339; + setp.eq.f64 %p234, %fd364, 0d4008000000000000; + @%p234 bra $L__BB7_117; + + mov.f64 %fd573, 0dFFF8000000000000; + +$L__BB7_117: + cvt.f64.f32 %fd553, %f3064; + add.f64 %fd552, %fd553, 0d4008000000000000; + selp.f64 %fd574, %fd573, %fd552, %p130; + @%p17 bra $L__BB7_122; + + setp.eq.s32 %p238, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r353, %temp}, %fd339; + } + setp.eq.s32 %p239, %r353, 0; + and.pred %p240, %p238, %p239; + @%p240 bra $L__BB7_121; + bra.uni $L__BB7_119; + +$L__BB7_121: + mov.u32 %r360, 0; + mov.b64 %fd574, {%r360, %r58}; + bra.uni $L__BB7_122; + +$L__BB7_119: + cvt.f64.f32 %fd554, %f3064; + and.b32 %r354, %r56, 2147483647; + setp.ne.s32 %p241, %r354, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r355, %temp}, %fd554; + } + setp.ne.s32 %p242, %r355, 0; + or.pred %p243, %p241, %p242; + mov.f64 %fd574, %fd573; + @%p243 bra $L__BB7_122; + + setp.ne.s32 %p244, %r53, 1071644672; + and.pred %p245, %p244, %p7; + or.b32 %r357, %r55, -2147483648; + selp.b32 %r358, %r357, %r55, %p245; + mov.u32 %r359, 0; + mov.b64 %fd574, {%r359, %r358}; + +$L__BB7_122: + cvt.rn.f32.s32 %f2956, %r847; + add.f32 %f2955, %f2956, 0f3F000000; + sub.f32 %f2954, %f2955, %f3068; + setp.eq.f32 %p246, %f3064, 0f3F800000; + selp.f64 %fd370, 0d3FF0000000000000, %fd574, %p246; + div.rn.f64 %fd371, %fd34, %fd370; + mul.f32 %f1227, %f133, %f182; + mul.f32 %f1228, %f2954, %f169; + sub.f32 %f1229, %f1228, %f1227; + cvt.f64.f32 %fd372, %f1229; + mul.f64 %fd373, %fd371, %fd372; + cvt.f64.f32 %fd374, %f125; + mul.f64 %fd375, %fd373, %fd374; + cvt.rn.f32.f64 %f184, %fd375; + setp.eq.f32 %p247, %f104, 0f7F800000; + mov.f32 %f3039, 0f7F800000; + @%p247 bra $L__BB7_124; + + fma.rn.f32 %f3039, %f104, %f103, %f104; + +$L__BB7_124: + setp.geu.f32 %p736, %f101, 0f00000000; + setp.lt.f32 %p735, %f101, 0f00000000; + and.pred %p734, %p735, %p110; + mov.b32 %r361, %f3039; + xor.b32 %r362, %r361, -2147483648; + mov.b32 %f1230, %r362; + selp.f32 %f187, %f1230, %f3039, %p734; + add.f32 %f1231, %f101, %f101; + selp.f32 %f1232, %f1231, 0f00000000, %p110; + setp.eq.f32 %p249, %f101, 0f00000000; + selp.f32 %f3040, %f1232, %f187, %p249; + @%p736 bra $L__BB7_127; + + cvt.rzi.f32.f32 %f1234, %f634; + setp.eq.f32 %p250, %f1234, 0f40000000; + mov.f32 %f3040, %f187; + @%p250 bra $L__BB7_127; + + mov.f32 %f3040, 0f7FFFFFFF; + +$L__BB7_127: + abs.f32 %f2900, %f101; + setp.lt.f32 %p738, %f101, 0f00000000; + and.pred %p737, %p738, %p110; + mov.f32 %f2899, 0f32A57060; + mov.f32 %f2898, 0f4B400001; + mov.f32 %f2897, 0f437C0000; + mov.f32 %f2896, 0f3BBB989D; + mov.f32 %f2895, 0f3FB8AA3B; + mov.f32 %f2894, 0f3F000000; + add.f32 %f1237, %f2900, 0f40000000; + mov.b32 %r363, %f1237; + setp.gt.s32 %p251, %r363, 2139095039; + add.f32 %f1238, %f101, 0f40000000; + setp.gtu.f32 %p252, %f2900, 0f7F800000; + mov.f32 %f3041, 0f7F800000; + selp.f32 %f1239, %f1238, %f3040, %p252; + selp.f32 %f1240, 0fFF800000, 0f7F800000, %p737; + setp.neu.f32 %p253, %f2900, 0f7F800000; + selp.f32 %f1241, %f1239, %f1240, %p253; + selp.f32 %f1242, %f1241, %f3040, %p251; + mul.f32 %f1243, %f1242, 0fBF000000; + setp.eq.f32 %p254, %f101, 0f3F800000; + selp.f32 %f1244, 0fBF000000, %f1243, %p254; + fma.rn.f32 %f1247, %f1244, %f2896, %f2894; + cvt.sat.f32.f32 %f1250, %f1247; + fma.rm.f32 %f1252, %f1250, %f2897, %f2898; + add.f32 %f1253, %f1252, 0fCB40007F; + neg.f32 %f1254, %f1253; + fma.rn.f32 %f1255, %f1244, %f2895, %f1254; + fma.rn.f32 %f1257, %f1244, %f2899, %f1255; + mov.b32 %r364, %f1252; + shl.b32 %r365, %r364, 23; + mov.b32 %f1258, %r365; + ex2.approx.ftz.f32 %f1259, %f1257; + mul.f32 %f190, %f1259, %f1258; + setp.eq.f32 %p255, %f108, 0f7F800000; + @%p255 bra $L__BB7_129; + + fma.rn.f32 %f3041, %f108, %f107, %f108; + +$L__BB7_129: + setp.geu.f32 %p741, %f105, 0f00000000; + setp.lt.f32 %p740, %f105, 0f00000000; + and.pred %p739, %p740, %p110; + mov.b32 %r366, %f3041; + xor.b32 %r367, %r366, -2147483648; + mov.b32 %f1260, %r367; + selp.f32 %f193, %f1260, %f3041, %p739; + add.f32 %f1261, %f105, %f105; + selp.f32 %f1262, %f1261, 0f00000000, %p110; + setp.eq.f32 %p257, %f105, 0f00000000; + selp.f32 %f3042, %f1262, %f193, %p257; + @%p741 bra $L__BB7_132; + + cvt.rzi.f32.f32 %f1264, %f634; + setp.eq.f32 %p258, %f1264, 0f40000000; + mov.f32 %f3042, %f193; + @%p258 bra $L__BB7_132; + + mov.f32 %f3042, 0f7FFFFFFF; + +$L__BB7_132: + cvt.rn.f32.s32 %f2909, %r846; + sub.f32 %f2908, %f2909, %f3069; + abs.f32 %f2907, %f105; + setp.lt.f32 %p743, %f105, 0f00000000; + and.pred %p742, %p743, %p110; + mov.f32 %f2906, 0f32A57060; + mov.f32 %f2905, 0f4B400001; + mov.f32 %f2904, 0f437C0000; + mov.f32 %f2903, 0f3BBB989D; + mov.f32 %f2902, 0f3FB8AA3B; + mov.f32 %f2901, 0f3F000000; + add.f32 %f1266, %f2907, 0f40000000; + mov.b32 %r368, %f1266; + setp.gt.s32 %p259, %r368, 2139095039; + add.f32 %f1267, %f105, 0f40000000; + setp.gtu.f32 %p260, %f2907, 0f7F800000; + selp.f32 %f1268, %f1267, %f3042, %p260; + selp.f32 %f1269, 0fFF800000, 0f7F800000, %p742; + setp.neu.f32 %p261, %f2907, 0f7F800000; + selp.f32 %f1270, %f1268, %f1269, %p261; + selp.f32 %f1271, %f1270, %f3042, %p259; + mul.f32 %f1272, %f1271, 0fBF000000; + setp.eq.f32 %p262, %f105, 0f3F800000; + selp.f32 %f1273, 0fBF000000, %f1272, %p262; + fma.rn.f32 %f1276, %f1273, %f2903, %f2901; + cvt.sat.f32.f32 %f1279, %f1276; + fma.rm.f32 %f1281, %f1279, %f2904, %f2905; + add.f32 %f1282, %f1281, 0fCB40007F; + neg.f32 %f1283, %f1282; + fma.rn.f32 %f1284, %f1273, %f2902, %f1283; + fma.rn.f32 %f1286, %f1273, %f2906, %f1284; + mov.b32 %r369, %f1281; + shl.b32 %r370, %r369, 23; + mov.b32 %f1287, %r370; + ex2.approx.ftz.f32 %f1288, %f1286; + mul.f32 %f196, %f1288, %f1287; + add.f32 %f1289, %f2908, 0f3F800000; + mul.f32 %f1290, %f1289, %f190; + mul.f32 %f1291, %f2908, %f196; + sub.f32 %f1292, %f1290, %f1291; + mul.f32 %f1293, %f64, %f1292; + mul.f32 %f197, %f138, %f1293; + not.pred %p263, %p10; + mov.f64 %fd576, %fd38; + @%p263 bra $L__BB7_134; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r371}, %fd38; + } + xor.b32 %r372, %r371, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r373, %temp}, %fd38; + } + mov.b64 %fd576, {%r373, %r372}; + +$L__BB7_134: + setp.eq.f32 %p744, %f3065, 0f00000000; + @%p744 bra $L__BB7_138; + bra.uni $L__BB7_135; + +$L__BB7_138: + mov.u32 %r374, 0; + mov.b64 %fd576, {%r374, %r59}; + bra.uni $L__BB7_139; + +$L__BB7_135: + setp.gt.s32 %p265, %r49, -1; + @%p265 bra $L__BB7_139; + + cvt.rzi.f64.f64 %fd377, %fd345; + setp.eq.f64 %p266, %fd377, 0d4014000000000000; + @%p266 bra $L__BB7_139; + + mov.f64 %fd576, 0dFFF8000000000000; + +$L__BB7_139: + cvt.f64.f32 %fd556, %f3065; + add.f64 %fd555, %fd556, 0d4014000000000000; + selp.f64 %fd577, %fd576, %fd555, %p146; + @%p20 bra $L__BB7_144; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r836}, %fd345; + } + and.b32 %r835, %r836, 2147483647; + setp.eq.s32 %p268, %r835, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r375, %temp}, %fd345; + } + setp.eq.s32 %p269, %r375, 0; + and.pred %p270, %p268, %p269; + @%p270 bra $L__BB7_143; + bra.uni $L__BB7_141; + +$L__BB7_143: + mov.u32 %r379, 0; + mov.b64 %fd577, {%r379, %r63}; + bra.uni $L__BB7_144; + +$L__BB7_141: + cvt.f64.f32 %fd557, %f3065; + and.b32 %r376, %r49, 2147483647; + setp.ne.s32 %p271, %r376, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r377, %temp}, %fd557; + } + setp.ne.s32 %p272, %r377, 0; + or.pred %p273, %p271, %p272; + mov.f64 %fd577, %fd576; + @%p273 bra $L__BB7_144; + + mov.u32 %r378, 0; + mov.b64 %fd577, {%r378, %r65}; + +$L__BB7_144: + not.pred %p274, %p11; + mov.f64 %fd579, %fd40; + @%p274 bra $L__BB7_146; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r380}, %fd40; + } + xor.b32 %r381, %r380, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r382, %temp}, %fd40; + } + mov.b64 %fd579, {%r382, %r381}; + +$L__BB7_146: + cvt.rn.f32.s32 %f2912, %r846; + sub.f32 %f2911, %f2912, %f3069; + add.f32 %f2910, %f2911, 0f3F000000; + setp.eq.f32 %p275, %f2910, 0f00000000; + @%p275 bra $L__BB7_150; + bra.uni $L__BB7_147; + +$L__BB7_150: + mov.u32 %r383, 0; + selp.b32 %r385, %r61, 0, %p121; + or.b32 %r386, %r385, 2146435072; + selp.b32 %r387, %r386, %r385, %p123; + mov.b64 %fd579, {%r383, %r387}; + bra.uni $L__BB7_151; + +$L__BB7_147: + setp.gt.s32 %p276, %r61, -1; + @%p276 bra $L__BB7_151; + + cvt.rzi.f64.f64 %fd384, %fd339; + setp.eq.f64 %p277, %fd384, 0d4008000000000000; + @%p277 bra $L__BB7_151; + + mov.f64 %fd579, 0dFFF8000000000000; + +$L__BB7_151: + selp.f64 %fd580, %fd579, %fd41, %p148; + @%p21 bra $L__BB7_156; + + setp.eq.s32 %p281, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r388, %temp}, %fd339; + } + setp.eq.s32 %p282, %r388, 0; + and.pred %p283, %p281, %p282; + @%p283 bra $L__BB7_155; + bra.uni $L__BB7_153; + +$L__BB7_155: + mov.u32 %r395, 0; + mov.b64 %fd580, {%r395, %r67}; + bra.uni $L__BB7_156; + +$L__BB7_153: + cvt.rn.f32.s32 %f2915, %r846; + sub.f32 %f2914, %f2915, %f3069; + add.f32 %f2913, %f2914, 0f3F000000; + cvt.f64.f32 %fd558, %f2913; + and.b32 %r389, %r61, 2147483647; + setp.ne.s32 %p284, %r389, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r390, %temp}, %fd558; + } + setp.ne.s32 %p285, %r390, 0; + or.pred %p286, %p284, %p285; + mov.f64 %fd580, %fd579; + @%p286 bra $L__BB7_156; + + setp.ne.s32 %p287, %r53, 1071644672; + and.pred %p288, %p287, %p11; + or.b32 %r392, %r55, -2147483648; + selp.b32 %r393, %r392, %r55, %p288; + mov.u32 %r394, 0; + mov.b64 %fd580, {%r394, %r393}; + +$L__BB7_156: + cvt.rn.f32.s32 %f2918, %r846; + sub.f32 %f2917, %f2918, %f3069; + add.f32 %f2916, %f2917, 0f3F000000; + setp.eq.f32 %p289, %f2916, 0f3F800000; + selp.f64 %fd387, 0d3FF0000000000000, %fd580, %p289; + cvt.f64.f32 %fd388, %f190; + mul.f64 %fd79, %fd387, %fd388; + not.pred %p290, %p12; + mov.f64 %fd582, %fd43; + @%p290 bra $L__BB7_158; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r396}, %fd43; + } + xor.b32 %r397, %r396, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r398, %temp}, %fd43; + } + mov.b64 %fd582, {%r398, %r397}; + +$L__BB7_158: + setp.eq.f32 %p291, %f87, 0f00000000; + @%p291 bra $L__BB7_162; + bra.uni $L__BB7_159; + +$L__BB7_162: + mov.u32 %r399, 0; + selp.b32 %r401, %r66, 0, %p121; + or.b32 %r402, %r401, 2146435072; + selp.b32 %r403, %r402, %r401, %p123; + mov.b64 %fd582, {%r399, %r403}; + bra.uni $L__BB7_163; + +$L__BB7_159: + setp.gt.s32 %p292, %r66, -1; + @%p292 bra $L__BB7_163; + + cvt.rzi.f64.f64 %fd390, %fd339; + setp.eq.f64 %p293, %fd390, 0d4008000000000000; + @%p293 bra $L__BB7_163; + + mov.f64 %fd582, 0dFFF8000000000000; + +$L__BB7_163: + selp.f64 %fd583, %fd582, %fd44, %p156; + @%p22 bra $L__BB7_168; + + setp.eq.s32 %p297, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r404, %temp}, %fd339; + } + setp.eq.s32 %p298, %r404, 0; + and.pred %p299, %p297, %p298; + @%p299 bra $L__BB7_167; + bra.uni $L__BB7_165; + +$L__BB7_167: + mov.u32 %r411, 0; + mov.b64 %fd583, {%r411, %r69}; + bra.uni $L__BB7_168; + +$L__BB7_165: + cvt.rn.f32.s32 %f2921, %r846; + sub.f32 %f2920, %f2921, %f3069; + add.f32 %f2919, %f2920, 0fBF000000; + cvt.f64.f32 %fd559, %f2919; + and.b32 %r405, %r66, 2147483647; + setp.ne.s32 %p300, %r405, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r406, %temp}, %fd559; + } + setp.ne.s32 %p301, %r406, 0; + or.pred %p302, %p300, %p301; + mov.f64 %fd583, %fd582; + @%p302 bra $L__BB7_168; + + setp.ne.s32 %p303, %r53, 1071644672; + and.pred %p304, %p303, %p12; + or.b32 %r408, %r55, -2147483648; + selp.b32 %r409, %r408, %r55, %p304; + mov.u32 %r410, 0; + mov.b64 %fd583, {%r410, %r409}; + +$L__BB7_168: + cvt.f64.f32 %fd560, %f138; + setp.eq.f32 %p745, %f3065, 0f3F800000; + cvt.rn.f32.s32 %f2957, %r847; + mov.f32 %f2930, 0f00000000; + mov.f32 %f2929, 0f3102E308; + mov.f32 %f2928, 0fBF317218; + mov.f32 %f2927, 0f35BFBE8E; + mov.f32 %f2926, 0f3F317200; + mov.f32 %f2925, 0f3DAAAABD; + mov.f32 %f2924, 0f3C4CAF63; + mov.f32 %f2923, 0f3B18F0FE; + mov.f32 %f2922, 0f3FB8AA3B; + setp.eq.f32 %p305, %f87, 0f3F800000; + selp.f64 %fd393, 0d3FF0000000000000, %fd583, %p305; + cvt.f64.f32 %fd394, %f196; + mul.f64 %fd395, %fd393, %fd394; + sub.f64 %fd396, %fd79, %fd395; + selp.f64 %fd397, 0d3FF0000000000000, %fd577, %p745; + div.rn.f64 %fd398, %fd35, %fd397; + mul.f64 %fd399, %fd398, %fd396; + mul.f64 %fd400, %fd399, %fd560; + mul.f32 %f1295, %f65, %f197; + cvt.f64.f32 %fd401, %f1295; + sub.f64 %fd402, %fd401, %fd400; + cvt.rn.f32.f64 %f198, %fd402; + add.f32 %f1296, %f2957, 0f3F800000; + sub.f32 %f1297, %f1296, %f3068; + div.rn.f32 %f199, %f1297, %f3064; + abs.f32 %f200, %f199; + setp.lt.f32 %p307, %f200, 0f00800000; + mul.f32 %f1298, %f200, 0f4B800000; + selp.f32 %f1299, %f1298, %f200, %p307; + selp.f32 %f1300, 0fC3170000, 0fC2FE0000, %p307; + mov.b32 %r412, %f1299; + and.b32 %r413, %r412, 8388607; + or.b32 %r414, %r413, 1065353216; + mov.b32 %f1301, %r414; + shr.u32 %r415, %r412, 23; + cvt.rn.f32.u32 %f1302, %r415; + add.f32 %f1303, %f1300, %f1302; + setp.gt.f32 %p308, %f1301, 0f3FB504F3; + mul.f32 %f1304, %f1301, 0f3F000000; + add.f32 %f1305, %f1303, 0f3F800000; + selp.f32 %f1306, %f1305, %f1303, %p308; + selp.f32 %f1307, %f1304, %f1301, %p308; + add.f32 %f1308, %f1307, 0fBF800000; + add.f32 %f1309, %f1307, 0f3F800000; + rcp.approx.ftz.f32 %f1310, %f1309; + add.f32 %f1311, %f1308, %f1308; + mul.f32 %f1313, %f1311, %f1310; + mul.f32 %f1314, %f1313, %f1313; + fma.rn.f32 %f1317, %f2923, %f1314, %f2924; + fma.rn.f32 %f1319, %f1317, %f1314, %f2925; + mul.rn.f32 %f1320, %f1319, %f1314; + mul.rn.f32 %f1321, %f1320, %f1313; + sub.f32 %f1322, %f1308, %f1313; + add.f32 %f1323, %f1322, %f1322; + neg.f32 %f1324, %f1313; + fma.rn.f32 %f1325, %f1324, %f1308, %f1323; + mul.rn.f32 %f1326, %f1310, %f1325; + add.f32 %f1327, %f1321, %f1313; + sub.f32 %f1328, %f1313, %f1327; + add.f32 %f1329, %f1321, %f1328; + add.f32 %f1330, %f1326, %f1329; + add.f32 %f1331, %f1327, %f1330; + sub.f32 %f1332, %f1327, %f1331; + add.f32 %f1333, %f1330, %f1332; + mul.rn.f32 %f1335, %f1306, %f2926; + mul.rn.f32 %f1337, %f1306, %f2927; + add.f32 %f1338, %f1335, %f1331; + sub.f32 %f1339, %f1335, %f1338; + add.f32 %f1340, %f1331, %f1339; + add.f32 %f1341, %f1333, %f1340; + add.f32 %f1342, %f1337, %f1341; + add.f32 %f1343, %f1338, %f1342; + sub.f32 %f1344, %f1338, %f1343; + add.f32 %f1345, %f1342, %f1344; + mul.rn.f32 %f1346, %f634, %f1343; + neg.f32 %f1347, %f1346; + fma.rn.f32 %f1348, %f634, %f1343, %f1347; + fma.rn.f32 %f1349, %f634, %f1345, %f1348; + fma.rn.f32 %f1351, %f2930, %f1343, %f1349; + add.rn.f32 %f1352, %f1346, %f1351; + neg.f32 %f1353, %f1352; + add.rn.f32 %f1354, %f1346, %f1353; + add.rn.f32 %f1355, %f1354, %f1351; + mov.b32 %r416, %f1352; + setp.eq.s32 %p309, %r416, 1118925336; + add.s32 %r417, %r416, -1; + mov.b32 %f1356, %r417; + add.f32 %f1357, %f1355, 0f37000000; + selp.f32 %f201, %f1357, %f1355, %p309; + selp.f32 %f1358, %f1356, %f1352, %p309; + mul.rn.f32 %f1360, %f1358, %f2922; + cvt.rzi.f32.f32 %f1361, %f1360; + abs.f32 %f1362, %f1361; + setp.gt.f32 %p310, %f1362, 0f42FC0000; + mov.b32 %r418, %f1361; + and.b32 %r419, %r418, -2147483648; + or.b32 %r420, %r419, 1123811328; + mov.b32 %f1363, %r420; + selp.f32 %f1364, %f1363, %f1361, %p310; + fma.rn.f32 %f1366, %f1364, %f2928, %f1358; + fma.rn.f32 %f1368, %f1364, %f2929, %f1366; mul.f32 %f1369, %f1368, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1370, %f1369; - fma.rn.f32 %f1371, %f1370, %f3084, %f1368; - fma.rn.f32 %f1372, %f1370, %f3085, %f1371; - mul.f32 %f1373, %f1372, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1374, %f1373; - add.f32 %f1375, %f1370, 0f00000000; - ex2.approx.f32 %f1376, %f1375; - mul.f32 %f1377, %f1374, %f1376; - setp.lt.f32 %p115, %f1368, 0fC2D20000; - selp.f32 %f1378, 0f00000000, %f1377, %p115; - setp.gt.f32 %p116, %f1368, 0f42D20000; - selp.f32 %f3310, 0f7F800000, %f1378, %p116; - setp.eq.f32 %p117, %f3310, 0f7F800000; - @%p117 bra BB7_75; - - fma.rn.f32 %f3310, %f3310, %f234, %f3310; - -BB7_75: - setp.lt.f32 %p118, %f227, 0f00000000; - and.pred %p6, %p118, %p55; - mov.b32 %r183, %f3310; - xor.b32 %r184, %r183, -2147483648; - mov.b32 %f1379, %r184; - selp.f32 %f3312, %f1379, %f3310, %p6; - setp.eq.f32 %p120, %f227, 0f00000000; - @%p120 bra BB7_78; - bra.uni BB7_76; - -BB7_78: - add.f32 %f1382, %f227, %f227; - selp.f32 %f3312, %f1382, 0f00000000, %p55; - bra.uni BB7_79; - -BB7_76: - setp.geu.f32 %p121, %f227, 0f00000000; - @%p121 bra BB7_79; - - cvt.rzi.f32.f32 %f1381, %f908; - setp.neu.f32 %p122, %f1381, 0f40000000; - selp.f32 %f3312, 0f7FFFFFFF, %f3312, %p122; - -BB7_79: - abs.f32 %f3005, %f227; - add.f32 %f1383, %f3005, 0f40000000; - mov.b32 %r37, %f1383; - setp.lt.s32 %p124, %r37, 2139095040; - @%p124 bra BB7_84; - - abs.f32 %f3102, %f227; - setp.gtu.f32 %p125, %f3102, 0f7F800000; - @%p125 bra BB7_83; - bra.uni BB7_81; - -BB7_83: - add.f32 %f3312, %f227, 0f40000000; - bra.uni BB7_84; - -BB7_81: - abs.f32 %f3103, %f227; - setp.neu.f32 %p126, %f3103, 0f7F800000; - @%p126 bra BB7_84; - - selp.f32 %f3312, 0fFF800000, 0f7F800000, %p6; - -BB7_84: - cvt.rn.f32.s32 %f3107, %r329; - sub.f32 %f3106, %f3107, %f3342; - cvt.rn.f32.s32 %f3014, %r329; - add.f32 %f3013, %f3014, 0f3F800000; - sub.f32 %f3012, %f3013, %f3342; - mov.f32 %f3011, 0f00000000; - mov.f32 %f3010, 0f3DAAAABD; - mov.f32 %f3009, 0f3C4CAF63; - mov.f32 %f3008, 0f3B18F0FE; - mov.f32 %f3007, 0fB5BFBE8E; - mov.f32 %f3006, 0fBF317200; - mul.f32 %f1386, %f3312, 0fBF000000; - setp.eq.f32 %p127, %f227, 0f3F800000; - selp.f32 %f1387, 0fBF000000, %f1386, %p127; - mul.f32 %f1388, %f1387, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1389, %f1388; - fma.rn.f32 %f1391, %f1389, %f3006, %f1387; - fma.rn.f32 %f1393, %f1389, %f3007, %f1391; - mul.f32 %f1394, %f1393, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1395, %f1394; - add.f32 %f1396, %f1389, 0f00000000; - ex2.approx.f32 %f1397, %f1396; - mul.f32 %f1398, %f1395, %f1397; - setp.lt.f32 %p128, %f1387, 0fC2D20000; - selp.f32 %f1399, 0f00000000, %f1398, %p128; - setp.gt.f32 %p129, %f1387, 0f42D20000; - selp.f32 %f1400, 0f7F800000, %f1399, %p129; - sub.f32 %f1401, %f226, %f1400; - mul.f32 %f1402, %f99, %f1401; - mul.f32 %f245, %f170, %f1402; - mul.f32 %f1403, %f3106, %f1400; - mul.f32 %f1404, %f3012, %f226; - sub.f32 %f1405, %f1404, %f1403; - mul.f32 %f1406, %f1405, %f105; - mul.f32 %f246, %f170, %f1406; - // inline asm - rcp.approx.ftz.f32 %f1384,%f137; - // inline asm - mul.f32 %f1407, %f1384, %f138; - mul.f32 %f1408, %f1407, %f1407; - fma.rn.f32 %f1411, %f3008, %f1408, %f3009; - fma.rn.f32 %f1413, %f1411, %f1408, %f3010; - mul.rn.f32 %f1414, %f1413, %f1408; - mul.rn.f32 %f1415, %f1414, %f1407; - sub.f32 %f1416, %f136, %f1407; - neg.f32 %f1417, %f1407; - add.f32 %f1418, %f1416, %f1416; - fma.rn.f32 %f1419, %f1417, %f136, %f1418; - mul.rn.f32 %f1420, %f1384, %f1419; - add.f32 %f1421, %f1415, %f1407; - sub.f32 %f1422, %f1407, %f1421; - add.f32 %f1423, %f1415, %f1422; - add.f32 %f1424, %f1420, %f1423; - add.f32 %f1425, %f1421, %f1424; - sub.f32 %f1426, %f1421, %f1425; - add.f32 %f1427, %f1424, %f1426; - add.f32 %f1428, %f139, %f1425; - sub.f32 %f1429, %f139, %f1428; - add.f32 %f1430, %f1425, %f1429; - add.f32 %f1431, %f1427, %f1430; - add.f32 %f1432, %f140, %f1431; - add.f32 %f1433, %f1428, %f1432; - sub.f32 %f1434, %f1428, %f1433; - add.f32 %f1435, %f1432, %f1434; - mul.rn.f32 %f1437, %f908, %f1433; - neg.f32 %f1438, %f1437; - fma.rn.f32 %f1439, %f908, %f1433, %f1438; - fma.rn.f32 %f1440, %f908, %f1435, %f1439; - fma.rn.f32 %f1442, %f3011, %f1433, %f1440; - add.rn.f32 %f1443, %f1437, %f1442; - neg.f32 %f1444, %f1443; - add.rn.f32 %f1445, %f1437, %f1444; - add.rn.f32 %f1446, %f1445, %f1442; - mov.b32 %r185, %f1443; - setp.eq.s32 %p130, %r185, 1118925336; - add.s32 %r186, %r185, -1; - mov.b32 %f1447, %r186; - add.f32 %f1448, %f1446, 0f37000000; - selp.f32 %f1449, %f1447, %f1443, %p130; - selp.f32 %f247, %f1448, %f1446, %p130; - mul.f32 %f1450, %f1449, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1451, %f1450; - fma.rn.f32 %f1452, %f1451, %f3006, %f1449; - fma.rn.f32 %f1453, %f1451, %f3007, %f1452; - mul.f32 %f1454, %f1453, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1455, %f1454; - add.f32 %f1456, %f1451, 0f00000000; - ex2.approx.f32 %f1457, %f1456; - mul.f32 %f1458, %f1455, %f1457; - setp.lt.f32 %p131, %f1449, 0fC2D20000; - selp.f32 %f1459, 0f00000000, %f1458, %p131; - setp.gt.f32 %p132, %f1449, 0f42D20000; - selp.f32 %f3313, 0f7F800000, %f1459, %p132; - setp.eq.f32 %p133, %f3313, 0f7F800000; - @%p133 bra BB7_86; - - fma.rn.f32 %f3313, %f3313, %f247, %f3313; - -BB7_86: - setp.eq.f32 %p336, %f133, 0f00000000; - setp.geu.f32 %p335, %f133, 0f00000000; - mov.b32 %r187, %f3313; - xor.b32 %r188, %r187, -2147483648; - mov.b32 %f1460, %r188; - selp.f32 %f251, %f1460, %f3313, %p1; - selp.f32 %f3314, %f141, %f251, %p336; - @%p335 bra BB7_88; - - cvt.rzi.f32.f32 %f1462, %f908; - setp.neu.f32 %p135, %f1462, 0f40000000; - selp.f32 %f3314, 0f7FFFFFFF, %f251, %p135; - -BB7_88: - abs.f32 %f3024, %f133; - setp.eq.f32 %p340, %f133, 0f3F800000; - add.f32 %f3023, %f3024, 0f40000000; - mov.b32 %r294, %f3023; - setp.gt.s32 %p339, %r294, 2139095039; - setp.neu.f32 %p338, %f3024, 0f7F800000; - selp.f32 %f3022, 0fFF800000, 0f7F800000, %p1; - setp.gtu.f32 %p337, %f3024, 0f7F800000; - add.f32 %f3021, %f133, 0f40000000; - mov.f32 %f3020, 0f00000000; - mov.f32 %f3019, 0f3DAAAABD; - mov.f32 %f3018, 0f3C4CAF63; - mov.f32 %f3017, 0f3B18F0FE; - mov.f32 %f3016, 0fB5BFBE8E; - mov.f32 %f3015, 0fBF317200; - selp.f32 %f1466, %f3021, %f3314, %p337; - selp.f32 %f1468, %f1466, %f3022, %p338; - selp.f32 %f1469, %f1468, %f3314, %p339; - mul.f32 %f1470, %f1469, 0fBF000000; - selp.f32 %f1471, 0fBF000000, %f1470, %p340; - mul.f32 %f1472, %f1471, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1473, %f1472; - fma.rn.f32 %f1475, %f1473, %f3015, %f1471; - fma.rn.f32 %f1477, %f1473, %f3016, %f1475; - mul.f32 %f1478, %f1477, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1479, %f1478; - add.f32 %f1480, %f1473, 0f00000000; - ex2.approx.f32 %f1481, %f1480; - mul.f32 %f1482, %f1479, %f1481; - setp.lt.f32 %p140, %f1471, 0fC2D20000; - selp.f32 %f1483, 0f00000000, %f1482, %p140; - setp.gt.f32 %p141, %f1471, 0f42D20000; - selp.f32 %f255, 0f7F800000, %f1483, %p141; - // inline asm - rcp.approx.ftz.f32 %f1463,%f145; - // inline asm - mul.f32 %f1484, %f1463, %f146; - mul.f32 %f1485, %f1484, %f1484; - fma.rn.f32 %f1488, %f3017, %f1485, %f3018; - fma.rn.f32 %f1490, %f1488, %f1485, %f3019; - mul.rn.f32 %f1491, %f1490, %f1485; - mul.rn.f32 %f1492, %f1491, %f1484; - sub.f32 %f1493, %f144, %f1484; - neg.f32 %f1494, %f1484; - add.f32 %f1495, %f1493, %f1493; - fma.rn.f32 %f1496, %f1494, %f144, %f1495; - mul.rn.f32 %f1497, %f1463, %f1496; - add.f32 %f1498, %f1492, %f1484; - sub.f32 %f1499, %f1484, %f1498; - add.f32 %f1500, %f1492, %f1499; - add.f32 %f1501, %f1497, %f1500; - add.f32 %f1502, %f1498, %f1501; - sub.f32 %f1503, %f1498, %f1502; - add.f32 %f1504, %f1501, %f1503; - add.f32 %f1505, %f147, %f1502; - sub.f32 %f1506, %f147, %f1505; - add.f32 %f1507, %f1502, %f1506; - add.f32 %f1508, %f1504, %f1507; - add.f32 %f1509, %f148, %f1508; - add.f32 %f1510, %f1505, %f1509; - sub.f32 %f1511, %f1505, %f1510; - add.f32 %f1512, %f1509, %f1511; - mul.rn.f32 %f1514, %f908, %f1510; - neg.f32 %f1515, %f1514; - fma.rn.f32 %f1516, %f908, %f1510, %f1515; - fma.rn.f32 %f1517, %f908, %f1512, %f1516; - fma.rn.f32 %f1519, %f3020, %f1510, %f1517; - add.rn.f32 %f1520, %f1514, %f1519; - neg.f32 %f1521, %f1520; - add.rn.f32 %f1522, %f1514, %f1521; - add.rn.f32 %f1523, %f1522, %f1519; - mov.b32 %r189, %f1520; - setp.eq.s32 %p142, %r189, 1118925336; - add.s32 %r190, %r189, -1; - mov.b32 %f1524, %r190; - add.f32 %f1525, %f1523, 0f37000000; - selp.f32 %f1526, %f1524, %f1520, %p142; - selp.f32 %f256, %f1525, %f1523, %p142; - mul.f32 %f1527, %f1526, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1528, %f1527; - fma.rn.f32 %f1529, %f1528, %f3015, %f1526; - fma.rn.f32 %f1530, %f1528, %f3016, %f1529; - mul.f32 %f1531, %f1530, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1532, %f1531; - add.f32 %f1533, %f1528, 0f00000000; - ex2.approx.f32 %f1534, %f1533; - mul.f32 %f1535, %f1532, %f1534; - setp.lt.f32 %p143, %f1526, 0fC2D20000; - selp.f32 %f1536, 0f00000000, %f1535, %p143; - setp.gt.f32 %p144, %f1526, 0f42D20000; - selp.f32 %f3315, 0f7F800000, %f1536, %p144; - setp.eq.f32 %p145, %f3315, 0f7F800000; - @%p145 bra BB7_90; - - fma.rn.f32 %f3315, %f3315, %f256, %f3315; - -BB7_90: - setp.eq.f32 %p342, %f142, 0f00000000; - setp.geu.f32 %p341, %f142, 0f00000000; - mov.b32 %r191, %f3315; - xor.b32 %r192, %r191, -2147483648; - mov.b32 %f1537, %r192; - selp.f32 %f260, %f1537, %f3315, %p2; - selp.f32 %f3316, %f149, %f260, %p342; - @%p341 bra BB7_92; - - cvt.rzi.f32.f32 %f1539, %f908; - setp.neu.f32 %p147, %f1539, 0f40000000; - selp.f32 %f3316, 0f7FFFFFFF, %f260, %p147; - -BB7_92: - abs.f32 %f3039, %f142; - cvt.rn.f32.s32 %f3038, %r328; - sub.f32 %f3037, %f3038, %f3343; - mul.f32 %f3036, %f3037, %f3037; - mul.f32 %f3035, %f3037, %f3036; - add.f32 %f3034, %f3037, 0f3F800000; - setp.eq.f32 %p346, %f142, 0f3F800000; - add.f32 %f3033, %f3039, 0f40000000; - mov.b32 %r295, %f3033; - setp.gt.s32 %p345, %r295, 2139095039; - setp.neu.f32 %p344, %f3039, 0f7F800000; - selp.f32 %f3032, 0fFF800000, 0f7F800000, %p2; - setp.gtu.f32 %p343, %f3039, 0f7F800000; - add.f32 %f3031, %f142, 0f40000000; - mov.f32 %f3030, 0f00000000; - mov.f32 %f3029, 0f3DAAAABD; - mov.f32 %f3028, 0f3C4CAF63; - mov.f32 %f3027, 0f3B18F0FE; - mov.f32 %f3026, 0fB5BFBE8E; - mov.f32 %f3025, 0fBF317200; - selp.f32 %f1543, %f3031, %f3316, %p343; - selp.f32 %f1545, %f1543, %f3032, %p344; - selp.f32 %f1546, %f1545, %f3316, %p345; - mul.f32 %f1547, %f1546, 0fBF000000; - selp.f32 %f1548, 0fBF000000, %f1547, %p346; - mul.f32 %f1549, %f1548, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1550, %f1549; - fma.rn.f32 %f1552, %f1550, %f3025, %f1548; - fma.rn.f32 %f1554, %f1550, %f3026, %f1552; - mul.f32 %f1555, %f1554, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1556, %f1555; - add.f32 %f1557, %f1550, 0f00000000; - ex2.approx.f32 %f1558, %f1557; - mul.f32 %f1559, %f1556, %f1558; - setp.lt.f32 %p152, %f1548, 0fC2D20000; - selp.f32 %f1560, 0f00000000, %f1559, %p152; - setp.gt.f32 %p153, %f1548, 0f42D20000; - selp.f32 %f1561, 0f7F800000, %f1560, %p153; - mul.f32 %f1562, %f3037, %f1561; - mul.f32 %f1563, %f3034, %f255; - sub.f32 %f1564, %f1563, %f1562; - mul.f32 %f1565, %f100, %f1564; - mul.f32 %f264, %f184, %f1565; - mul.f32 %f1566, %f101, %f264; - mul.f32 %f1567, %f1561, %f3035; - mul.f32 %f1568, %f255, %f150; - sub.f32 %f1569, %f1568, %f1567; - mul.f32 %f1570, %f106, %f1569; - mul.f32 %f1571, %f184, %f1570; - sub.f32 %f265, %f1566, %f1571; - // inline asm - rcp.approx.ftz.f32 %f1540,%f1200; - // inline asm - mul.f32 %f1572, %f1540, %f212; - mul.f32 %f1573, %f1572, %f1572; - fma.rn.f32 %f1576, %f3027, %f1573, %f3028; - fma.rn.f32 %f1578, %f1576, %f1573, %f3029; - mul.rn.f32 %f1579, %f1578, %f1573; - mul.rn.f32 %f1580, %f1579, %f1572; - sub.f32 %f1581, %f210, %f1572; - neg.f32 %f1582, %f1572; - add.f32 %f1583, %f1581, %f1581; - fma.rn.f32 %f1584, %f1582, %f210, %f1583; - mul.rn.f32 %f1585, %f1540, %f1584; - add.f32 %f1586, %f1580, %f1572; - sub.f32 %f1587, %f1572, %f1586; - add.f32 %f1588, %f1580, %f1587; - add.f32 %f1589, %f1585, %f1588; - add.f32 %f1590, %f1586, %f1589; - sub.f32 %f1591, %f1586, %f1590; - add.f32 %f1592, %f1589, %f1591; - add.f32 %f1593, %f213, %f1590; - sub.f32 %f1594, %f213, %f1593; - add.f32 %f1595, %f1590, %f1594; - add.f32 %f1596, %f1592, %f1595; - add.f32 %f1597, %f214, %f1596; - add.f32 %f1598, %f1593, %f1597; - sub.f32 %f1599, %f1593, %f1598; - add.f32 %f1600, %f1597, %f1599; - mul.rn.f32 %f1602, %f908, %f1598; - neg.f32 %f1603, %f1602; - fma.rn.f32 %f1604, %f908, %f1598, %f1603; - fma.rn.f32 %f1605, %f908, %f1600, %f1604; - fma.rn.f32 %f1607, %f3030, %f1598, %f1605; - add.rn.f32 %f1608, %f1602, %f1607; - neg.f32 %f1609, %f1608; - add.rn.f32 %f1610, %f1602, %f1609; - add.rn.f32 %f1611, %f1610, %f1607; - mov.b32 %r193, %f1608; - setp.eq.s32 %p154, %r193, 1118925336; - add.s32 %r194, %r193, -1; - mov.b32 %f1612, %r194; - add.f32 %f1613, %f1611, 0f37000000; - selp.f32 %f1614, %f1612, %f1608, %p154; - selp.f32 %f266, %f1613, %f1611, %p154; - mul.f32 %f1615, %f1614, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1616, %f1615; - fma.rn.f32 %f1617, %f1616, %f3025, %f1614; - fma.rn.f32 %f1618, %f1616, %f3026, %f1617; - mul.f32 %f1619, %f1618, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1620, %f1619; - add.f32 %f1621, %f1616, 0f00000000; - ex2.approx.f32 %f1622, %f1621; - mul.f32 %f1623, %f1620, %f1622; - setp.lt.f32 %p155, %f1614, 0fC2D20000; - selp.f32 %f1624, 0f00000000, %f1623, %p155; - setp.gt.f32 %p156, %f1614, 0f42D20000; - selp.f32 %f3317, 0f7F800000, %f1624, %p156; - setp.eq.f32 %p157, %f3317, 0f7F800000; - @%p157 bra BB7_94; - - fma.rn.f32 %f3317, %f3317, %f266, %f3317; - -BB7_94: - setp.eq.f32 %p347, %f208, 0f00000000; - mov.b32 %r195, %f3317; - xor.b32 %r196, %r195, -2147483648; - mov.b32 %f1625, %r196; - selp.f32 %f3319, %f1625, %f3317, %p5; - @%p347 bra BB7_97; - bra.uni BB7_95; - -BB7_97: - add.f32 %f1628, %f208, %f208; - selp.f32 %f3319, %f1628, 0f00000000, %p55; - bra.uni BB7_98; - -BB7_95: - setp.geu.f32 %p159, %f208, 0f00000000; - @%p159 bra BB7_98; - - cvt.rzi.f32.f32 %f1627, %f908; - setp.neu.f32 %p160, %f1627, 0f40000000; - selp.f32 %f3319, 0f7FFFFFFF, %f3319, %p160; - -BB7_98: - abs.f32 %f3041, %f208; - add.f32 %f3040, %f3041, 0f40000000; - mov.b32 %r296, %f3040; - setp.lt.s32 %p348, %r296, 2139095040; - @%p348 bra BB7_103; - - abs.f32 %f3100, %f208; - setp.gtu.f32 %p163, %f3100, 0f7F800000; - @%p163 bra BB7_102; - bra.uni BB7_100; - -BB7_102: - add.f32 %f3319, %f208, 0f40000000; - bra.uni BB7_103; - -BB7_100: - abs.f32 %f3101, %f208; - setp.neu.f32 %p164, %f3101, 0f7F800000; - @%p164 bra BB7_103; - - selp.f32 %f3319, 0fFF800000, 0f7F800000, %p5; - -BB7_103: - setp.eq.f32 %p349, %f208, 0f3F800000; - mov.f32 %f3047, 0f00000000; - mov.f32 %f3046, 0f3DAAAABD; - mov.f32 %f3045, 0f3C4CAF63; - mov.f32 %f3044, 0f3B18F0FE; - mov.f32 %f3043, 0fB5BFBE8E; - mov.f32 %f3042, 0fBF317200; - mul.f32 %f1631, %f3319, 0fBF000000; - selp.f32 %f1632, 0fBF000000, %f1631, %p349; - mul.f32 %f1633, %f1632, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1634, %f1633; - fma.rn.f32 %f1636, %f1634, %f3042, %f1632; - fma.rn.f32 %f1638, %f1634, %f3043, %f1636; - mul.f32 %f1639, %f1638, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1640, %f1639; - add.f32 %f1641, %f1634, 0f00000000; - ex2.approx.f32 %f1642, %f1641; - mul.f32 %f1643, %f1640, %f1642; - setp.lt.f32 %p166, %f1632, 0fC2D20000; - selp.f32 %f1644, 0f00000000, %f1643, %p166; - setp.gt.f32 %p167, %f1632, 0f42D20000; - selp.f32 %f277, 0f7F800000, %f1644, %p167; - // inline asm - rcp.approx.ftz.f32 %f1629,%f1299; - // inline asm - mul.f32 %f1645, %f1629, %f231; - mul.f32 %f1646, %f1645, %f1645; - fma.rn.f32 %f1649, %f3044, %f1646, %f3045; - fma.rn.f32 %f1651, %f1649, %f1646, %f3046; - mul.rn.f32 %f1652, %f1651, %f1646; - mul.rn.f32 %f1653, %f1652, %f1645; - sub.f32 %f1654, %f229, %f1645; - neg.f32 %f1655, %f1645; - add.f32 %f1656, %f1654, %f1654; - fma.rn.f32 %f1657, %f1655, %f229, %f1656; - mul.rn.f32 %f1658, %f1629, %f1657; - add.f32 %f1659, %f1653, %f1645; - sub.f32 %f1660, %f1645, %f1659; - add.f32 %f1661, %f1653, %f1660; - add.f32 %f1662, %f1658, %f1661; - add.f32 %f1663, %f1659, %f1662; - sub.f32 %f1664, %f1659, %f1663; - add.f32 %f1665, %f1662, %f1664; - add.f32 %f1666, %f232, %f1663; - sub.f32 %f1667, %f232, %f1666; - add.f32 %f1668, %f1663, %f1667; - add.f32 %f1669, %f1665, %f1668; - add.f32 %f1670, %f233, %f1669; - add.f32 %f1671, %f1666, %f1670; - sub.f32 %f1672, %f1666, %f1671; - add.f32 %f1673, %f1670, %f1672; - mul.rn.f32 %f1675, %f908, %f1671; - neg.f32 %f1676, %f1675; - fma.rn.f32 %f1677, %f908, %f1671, %f1676; - fma.rn.f32 %f1678, %f908, %f1673, %f1677; - fma.rn.f32 %f1680, %f3047, %f1671, %f1678; - add.rn.f32 %f1681, %f1675, %f1680; - neg.f32 %f1682, %f1681; - add.rn.f32 %f1683, %f1675, %f1682; - add.rn.f32 %f1684, %f1683, %f1680; - mov.b32 %r197, %f1681; - setp.eq.s32 %p168, %r197, 1118925336; - add.s32 %r198, %r197, -1; - mov.b32 %f1685, %r198; - add.f32 %f1686, %f1684, 0f37000000; - selp.f32 %f1687, %f1685, %f1681, %p168; - selp.f32 %f278, %f1686, %f1684, %p168; - mul.f32 %f1688, %f1687, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1689, %f1688; - fma.rn.f32 %f1690, %f1689, %f3042, %f1687; - fma.rn.f32 %f1691, %f1689, %f3043, %f1690; - mul.f32 %f1692, %f1691, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1693, %f1692; - add.f32 %f1694, %f1689, 0f00000000; - ex2.approx.f32 %f1695, %f1694; - mul.f32 %f1696, %f1693, %f1695; - setp.lt.f32 %p169, %f1687, 0fC2D20000; - selp.f32 %f1697, 0f00000000, %f1696, %p169; - setp.gt.f32 %p170, %f1687, 0f42D20000; - selp.f32 %f3320, 0f7F800000, %f1697, %p170; - setp.eq.f32 %p171, %f3320, 0f7F800000; - @%p171 bra BB7_105; - - fma.rn.f32 %f3320, %f3320, %f278, %f3320; - -BB7_105: - setp.eq.f32 %p352, %f227, 0f00000000; - mov.b32 %r199, %f3320; - xor.b32 %r200, %r199, -2147483648; - mov.b32 %f1698, %r200; - selp.f32 %f3322, %f1698, %f3320, %p6; - @%p352 bra BB7_108; - bra.uni BB7_106; - -BB7_108: - add.f32 %f1701, %f227, %f227; - selp.f32 %f3322, %f1701, 0f00000000, %p55; - bra.uni BB7_109; - -BB7_106: - setp.geu.f32 %p173, %f227, 0f00000000; - @%p173 bra BB7_109; - - cvt.rzi.f32.f32 %f1700, %f908; - setp.neu.f32 %p174, %f1700, 0f40000000; - selp.f32 %f3322, 0f7FFFFFFF, %f3322, %p174; - -BB7_109: - abs.f32 %f3114, %f227; - add.f32 %f3113, %f3114, 0f40000000; - mov.b32 %r301, %f3113; - setp.lt.s32 %p353, %r301, 2139095040; - @%p353 bra BB7_114; - - abs.f32 %f3098, %f227; - setp.gtu.f32 %p177, %f3098, 0f7F800000; - @%p177 bra BB7_113; - bra.uni BB7_111; - -BB7_113: - add.f32 %f3322, %f227, 0f40000000; - bra.uni BB7_114; - -BB7_111: - abs.f32 %f3099, %f227; - setp.neu.f32 %p178, %f3099, 0f7F800000; - @%p178 bra BB7_114; - - selp.f32 %f3322, 0fFF800000, 0f7F800000, %p6; - -BB7_114: - setp.eq.f32 %p354, %f227, 0f3F800000; - cvt.rn.f32.s32 %f3053, %r329; - sub.f32 %f3052, %f3053, %f3342; - add.f32 %f3051, %f3052, 0f3F800000; - mov.f32 %f3323, 0f00000000; - mov.f32 %f3049, 0fB5BFBE8E; - mov.f32 %f3048, 0fBF317200; - mul.f32 %f1703, %f3322, 0fBF000000; - selp.f32 %f1704, 0fBF000000, %f1703, %p354; - mul.f32 %f1705, %f1704, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1706, %f1705; - fma.rn.f32 %f1708, %f1706, %f3048, %f1704; - fma.rn.f32 %f1710, %f1706, %f3049, %f1708; - mul.f32 %f1711, %f1710, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1712, %f1711; - add.f32 %f1713, %f1706, 0f00000000; - ex2.approx.f32 %f1714, %f1713; - mul.f32 %f1715, %f1712, %f1714; - setp.lt.f32 %p180, %f1704, 0fC2D20000; - selp.f32 %f1716, 0f00000000, %f1715, %p180; - setp.gt.f32 %p181, %f1704, 0f42D20000; - selp.f32 %f1717, 0f7F800000, %f1716, %p181; - mul.f32 %f1718, %f3052, %f1717; - mul.f32 %f1719, %f3051, %f277; - sub.f32 %f1720, %f1719, %f1718; - mul.f32 %f1721, %f102, %f1720; - mul.f32 %f289, %f170, %f1721; - mul.f32 %f1722, %f103, %f289; - mul.f32 %f1723, %f3051, %f3051; - mul.f32 %f1724, %f3051, %f1723; - mul.f32 %f1725, %f3052, %f3052; - mul.f32 %f1726, %f3052, %f1725; - mul.f32 %f1727, %f277, %f1724; - mul.f32 %f1728, %f1717, %f1726; - sub.f32 %f1729, %f1727, %f1728; - mul.f32 %f1730, %f107, %f1729; - mul.f32 %f1731, %f170, %f1730; - sub.f32 %f290, %f1722, %f1731; - mul.f32 %f291, %f170, %f184; - setp.leu.f32 %p182, %f185, 0f3C23D70A; - @%p182 bra BB7_116; - - sub.f32 %f1732, %f186, %f185; - add.f32 %f1733, %f185, %f3337; - div.rn.f32 %f3323, %f1732, %f1733; - -BB7_116: - mov.f32 %f3324, 0f00000000; - @%p182 bra BB7_118; - - add.f32 %f1735, %f185, %f3337; + add.f32 %f1370, %f1364, 0f4B40007F; + mov.b32 %r421, %f1370; + shl.b32 %r422, %r421, 23; + mov.b32 %f1371, %r422; + ex2.approx.ftz.f32 %f1372, %f1369; + mul.f32 %f202, %f1372, %f1371; + setp.eq.f32 %p311, %f202, 0f7F800000; + mov.f32 %f3043, 0f7F800000; + @%p311 bra $L__BB7_170; + + fma.rn.f32 %f3043, %f202, %f201, %f202; + +$L__BB7_170: + setp.lt.f32 %p312, %f199, 0f00000000; + and.pred %p26, %p312, %p110; + setp.eq.f32 %p314, %f199, 0f00000000; + @%p314 bra $L__BB7_174; + bra.uni $L__BB7_171; + +$L__BB7_174: + add.f32 %f1377, %f199, %f199; + selp.f32 %f3045, %f1377, 0f00000000, %p110; + bra.uni $L__BB7_175; + +$L__BB7_171: + mov.b32 %r423, %f3043; + xor.b32 %r424, %r423, -2147483648; + mov.b32 %f1373, %r424; + selp.f32 %f3045, %f1373, %f3043, %p26; + setp.geu.f32 %p315, %f199, 0f00000000; + @%p315 bra $L__BB7_175; + + cvt.rzi.f32.f32 %f1375, %f634; + setp.eq.f32 %p316, %f1375, 0f40000000; + @%p316 bra $L__BB7_175; + + mov.f32 %f3045, 0f7FFFFFFF; + +$L__BB7_175: + abs.f32 %f2963, %f199; + add.f32 %f1378, %f2963, 0f40000000; + mov.b32 %r425, %f1378; + setp.lt.s32 %p318, %r425, 2139095040; + @%p318 bra $L__BB7_180; + + abs.f32 %f2964, %f199; + setp.gtu.f32 %p319, %f2964, 0f7F800000; + @%p319 bra $L__BB7_179; + bra.uni $L__BB7_177; + +$L__BB7_179: + add.f32 %f3045, %f199, 0f40000000; + bra.uni $L__BB7_180; + +$L__BB7_177: + abs.f32 %f2965, %f199; + setp.neu.f32 %p320, %f2965, 0f7F800000; + @%p320 bra $L__BB7_180; + + selp.f32 %f3045, 0fFF800000, 0f7F800000, %p26; + +$L__BB7_180: + mov.f32 %f2946, 0f00000000; + mov.f32 %f2945, 0f3102E308; + mov.f32 %f2944, 0fBF317218; + mov.f32 %f2943, 0f35BFBE8E; + mov.f32 %f2942, 0f3F317200; + mov.f32 %f2941, 0f3DAAAABD; + mov.f32 %f2940, 0f3C4CAF63; + mov.f32 %f2939, 0f3B18F0FE; + mov.f32 %f2938, 0f32A57060; + mov.f32 %f2937, 0f4B400001; + mov.f32 %f2936, 0f437C0000; + mov.f32 %f2935, 0f3BBB989D; + mov.f32 %f2934, 0f3FB8AA3B; + mov.f32 %f2933, 0f3F000000; + cvt.rn.f32.s32 %f2932, %r847; + sub.f32 %f2931, %f2932, %f3068; + mul.f32 %f1380, %f3045, 0fBF000000; + setp.eq.f32 %p321, %f199, 0f3F800000; + selp.f32 %f1381, 0fBF000000, %f1380, %p321; + fma.rn.f32 %f1384, %f1381, %f2935, %f2933; + cvt.sat.f32.f32 %f1387, %f1384; + fma.rm.f32 %f1389, %f1387, %f2936, %f2937; + add.f32 %f1390, %f1389, 0fCB40007F; + neg.f32 %f1391, %f1390; + fma.rn.f32 %f1392, %f1381, %f2934, %f1391; + fma.rn.f32 %f1394, %f1381, %f2938, %f1392; + mov.b32 %r426, %f1389; + shl.b32 %r427, %r426, 23; + mov.b32 %f1395, %r427; + ex2.approx.ftz.f32 %f1396, %f1394; + mul.f32 %f211, %f1396, %f1395; + div.rn.f32 %f212, %f2931, %f3064; + abs.f32 %f213, %f212; + setp.lt.f32 %p322, %f213, 0f00800000; + mul.f32 %f1397, %f213, 0f4B800000; + selp.f32 %f1398, %f1397, %f213, %p322; + selp.f32 %f1399, 0fC3170000, 0fC2FE0000, %p322; + mov.b32 %r428, %f1398; + and.b32 %r429, %r428, 8388607; + or.b32 %r430, %r429, 1065353216; + mov.b32 %f1400, %r430; + shr.u32 %r431, %r428, 23; + cvt.rn.f32.u32 %f1401, %r431; + add.f32 %f1402, %f1399, %f1401; + setp.gt.f32 %p323, %f1400, 0f3FB504F3; + mul.f32 %f1403, %f1400, 0f3F000000; + add.f32 %f1404, %f1402, 0f3F800000; + selp.f32 %f1405, %f1404, %f1402, %p323; + selp.f32 %f1406, %f1403, %f1400, %p323; + add.f32 %f1407, %f1406, 0fBF800000; + add.f32 %f1408, %f1406, 0f3F800000; + rcp.approx.ftz.f32 %f1409, %f1408; + add.f32 %f1410, %f1407, %f1407; + mul.f32 %f1412, %f1410, %f1409; + mul.f32 %f1413, %f1412, %f1412; + fma.rn.f32 %f1416, %f2939, %f1413, %f2940; + fma.rn.f32 %f1418, %f1416, %f1413, %f2941; + mul.rn.f32 %f1419, %f1418, %f1413; + mul.rn.f32 %f1420, %f1419, %f1412; + sub.f32 %f1421, %f1407, %f1412; + add.f32 %f1422, %f1421, %f1421; + neg.f32 %f1423, %f1412; + fma.rn.f32 %f1424, %f1423, %f1407, %f1422; + mul.rn.f32 %f1425, %f1409, %f1424; + add.f32 %f1426, %f1420, %f1412; + sub.f32 %f1427, %f1412, %f1426; + add.f32 %f1428, %f1420, %f1427; + add.f32 %f1429, %f1425, %f1428; + add.f32 %f1430, %f1426, %f1429; + sub.f32 %f1431, %f1426, %f1430; + add.f32 %f1432, %f1429, %f1431; + mul.rn.f32 %f1434, %f1405, %f2942; + mul.rn.f32 %f1436, %f1405, %f2943; + add.f32 %f1437, %f1434, %f1430; + sub.f32 %f1438, %f1434, %f1437; + add.f32 %f1439, %f1430, %f1438; + add.f32 %f1440, %f1432, %f1439; + add.f32 %f1441, %f1436, %f1440; + add.f32 %f1442, %f1437, %f1441; + sub.f32 %f1443, %f1437, %f1442; + add.f32 %f1444, %f1441, %f1443; + mul.rn.f32 %f1445, %f634, %f1442; + neg.f32 %f1446, %f1445; + fma.rn.f32 %f1447, %f634, %f1442, %f1446; + fma.rn.f32 %f1448, %f634, %f1444, %f1447; + fma.rn.f32 %f1450, %f2946, %f1442, %f1448; + add.rn.f32 %f1451, %f1445, %f1450; + neg.f32 %f1452, %f1451; + add.rn.f32 %f1453, %f1445, %f1452; + add.rn.f32 %f1454, %f1453, %f1450; + mov.b32 %r432, %f1451; + setp.eq.s32 %p324, %r432, 1118925336; + add.s32 %r433, %r432, -1; + mov.b32 %f1455, %r433; + add.f32 %f1456, %f1454, 0f37000000; + selp.f32 %f214, %f1456, %f1454, %p324; + selp.f32 %f1457, %f1455, %f1451, %p324; + mul.rn.f32 %f1458, %f1457, %f2934; + cvt.rzi.f32.f32 %f1459, %f1458; + abs.f32 %f1460, %f1459; + setp.gt.f32 %p325, %f1460, 0f42FC0000; + mov.b32 %r434, %f1459; + and.b32 %r435, %r434, -2147483648; + or.b32 %r436, %r435, 1123811328; + mov.b32 %f1461, %r436; + selp.f32 %f1462, %f1461, %f1459, %p325; + fma.rn.f32 %f1464, %f1462, %f2944, %f1457; + fma.rn.f32 %f1466, %f1462, %f2945, %f1464; + mul.f32 %f1467, %f1466, 0f3FB8AA3B; + add.f32 %f1468, %f1462, 0f4B40007F; + mov.b32 %r437, %f1468; + shl.b32 %r438, %r437, 23; + mov.b32 %f1469, %r438; + ex2.approx.ftz.f32 %f1470, %f1467; + mul.f32 %f215, %f1470, %f1469; + setp.eq.f32 %p326, %f215, 0f7F800000; + mov.f32 %f3046, 0f7F800000; + @%p326 bra $L__BB7_182; + + fma.rn.f32 %f3046, %f215, %f214, %f215; + +$L__BB7_182: + setp.lt.f32 %p327, %f212, 0f00000000; + and.pred %p27, %p327, %p110; + setp.eq.f32 %p329, %f212, 0f00000000; + @%p329 bra $L__BB7_186; + bra.uni $L__BB7_183; + +$L__BB7_186: + add.f32 %f1475, %f212, %f212; + selp.f32 %f3048, %f1475, 0f00000000, %p110; + bra.uni $L__BB7_187; + +$L__BB7_183: + mov.b32 %r439, %f3046; + xor.b32 %r440, %r439, -2147483648; + mov.b32 %f1471, %r440; + selp.f32 %f3048, %f1471, %f3046, %p27; + setp.geu.f32 %p330, %f212, 0f00000000; + @%p330 bra $L__BB7_187; + + cvt.rzi.f32.f32 %f1473, %f634; + setp.eq.f32 %p331, %f1473, 0f40000000; + @%p331 bra $L__BB7_187; + + mov.f32 %f3048, 0f7FFFFFFF; + +$L__BB7_187: + abs.f32 %f2827, %f212; + add.f32 %f1476, %f2827, 0f40000000; + mov.b32 %r441, %f1476; + setp.lt.s32 %p333, %r441, 2139095040; + @%p333 bra $L__BB7_192; + + abs.f32 %f2951, %f212; + setp.gtu.f32 %p334, %f2951, 0f7F800000; + @%p334 bra $L__BB7_191; + bra.uni $L__BB7_189; + +$L__BB7_191: + add.f32 %f3048, %f212, 0f40000000; + bra.uni $L__BB7_192; + +$L__BB7_189: + abs.f32 %f2952, %f212; + setp.neu.f32 %p335, %f2952, 0f7F800000; + @%p335 bra $L__BB7_192; + + selp.f32 %f3048, 0fFF800000, 0f7F800000, %p27; + +$L__BB7_192: + mov.f32 %f2835, 0f32A57060; + mov.f32 %f2834, 0f4B400001; + mov.f32 %f2833, 0f437C0000; + mov.f32 %f2832, 0f3BBB989D; + mov.f32 %f2831, 0f3FB8AA3B; + mov.f32 %f2830, 0f3F000000; + cvt.rn.f32.s32 %f2829, %r847; + sub.f32 %f2828, %f2829, %f3068; + mul.f32 %f1477, %f3048, 0fBF000000; + setp.eq.f32 %p336, %f212, 0f3F800000; + selp.f32 %f1478, 0fBF000000, %f1477, %p336; + fma.rn.f32 %f1481, %f1478, %f2832, %f2830; + cvt.sat.f32.f32 %f1484, %f1481; + fma.rm.f32 %f1486, %f1484, %f2833, %f2834; + add.f32 %f1487, %f1486, 0fCB40007F; + neg.f32 %f1488, %f1487; + fma.rn.f32 %f1489, %f1478, %f2831, %f1488; + fma.rn.f32 %f1491, %f1478, %f2835, %f1489; + mov.b32 %r442, %f1486; + shl.b32 %r443, %r442, 23; + mov.b32 %f1492, %r443; + ex2.approx.ftz.f32 %f1493, %f1491; + mul.f32 %f224, %f1493, %f1492; + add.f32 %f1494, %f2828, 0f3F800000; + mul.f32 %f1495, %f1494, %f211; + mul.f32 %f1496, %f2828, %f224; + sub.f32 %f1497, %f1495, %f1496; + mul.f32 %f1498, %f66, %f1497; + mul.f32 %f225, %f125, %f1498; + not.pred %p337, %p13; + mov.f64 %fd585, %fd45; + @%p337 bra $L__BB7_194; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r444}, %fd45; + } + xor.b32 %r445, %r444, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r446, %temp}, %fd45; + } + mov.b64 %fd585, {%r446, %r445}; + +$L__BB7_194: + setp.eq.f32 %p726, %f3064, 0f00000000; + @%p726 bra $L__BB7_198; + bra.uni $L__BB7_195; + +$L__BB7_198: + mov.u32 %r447, 0; + mov.b64 %fd585, {%r447, %r70}; + bra.uni $L__BB7_199; + +$L__BB7_195: + setp.gt.s32 %p339, %r56, -1; + @%p339 bra $L__BB7_199; + + cvt.rzi.f64.f64 %fd404, %fd345; + setp.eq.f64 %p340, %fd404, 0d4014000000000000; + @%p340 bra $L__BB7_199; + + mov.f64 %fd585, 0dFFF8000000000000; + +$L__BB7_199: + cvt.f64.f32 %fd546, %f3064; + add.f64 %fd545, %fd546, 0d4014000000000000; + selp.f64 %fd586, %fd585, %fd545, %p160; + @%p23 bra $L__BB7_204; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r831}, %fd345; + } + and.b32 %r830, %r831, 2147483647; + setp.eq.s32 %p342, %r830, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r448, %temp}, %fd345; + } + setp.eq.s32 %p343, %r448, 0; + and.pred %p344, %p342, %p343; + @%p344 bra $L__BB7_203; + bra.uni $L__BB7_201; + +$L__BB7_203: + mov.u32 %r452, 0; + mov.b64 %fd586, {%r452, %r72}; + bra.uni $L__BB7_204; + +$L__BB7_201: + cvt.f64.f32 %fd547, %f3064; + and.b32 %r449, %r56, 2147483647; + setp.ne.s32 %p345, %r449, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r450, %temp}, %fd547; + } + setp.ne.s32 %p346, %r450, 0; + or.pred %p347, %p345, %p346; + mov.f64 %fd586, %fd585; + @%p347 bra $L__BB7_204; + + mov.u32 %r451, 0; + mov.b64 %fd586, {%r451, %r73}; + +$L__BB7_204: + cvt.f64.f32 %fd410, %f128; + { + .reg .b32 %temp; + mov.b64 {%temp, %r79}, %fd410; + } + abs.f64 %fd96, %fd410; + { // callseq 162, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd96; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd588, [retval0+0]; + } // callseq 162 + setp.lt.s32 %p348, %r79, 0; + and.pred %p28, %p348, %p121; + not.pred %p350, %p28; + @%p350 bra $L__BB7_206; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r453}, %fd588; + } + xor.b32 %r454, %r453, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r455, %temp}, %fd588; + } + mov.b64 %fd588, {%r455, %r454}; + +$L__BB7_206: + setp.eq.f32 %p351, %f128, 0f00000000; + @%p351 bra $L__BB7_210; + bra.uni $L__BB7_207; + +$L__BB7_210: + mov.u32 %r456, 0; + selp.b32 %r457, %r79, 0, %p121; + or.b32 %r458, %r457, 2146435072; + selp.b32 %r459, %r458, %r457, %p123; + mov.b64 %fd588, {%r456, %r459}; + bra.uni $L__BB7_211; + +$L__BB7_207: + setp.gt.s32 %p352, %r79, -1; + @%p352 bra $L__BB7_211; + + cvt.rzi.f64.f64 %fd413, %fd339; + setp.eq.f64 %p353, %fd413, 0d4008000000000000; + @%p353 bra $L__BB7_211; + + mov.f64 %fd588, 0dFFF8000000000000; + +$L__BB7_211: + add.f64 %fd102, %fd410, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r460}, %fd102; + } + and.b32 %r461, %r460, 2146435072; + setp.ne.s32 %p356, %r461, 2146435072; + mov.f64 %fd589, %fd588; + @%p356 bra $L__BB7_217; + + setp.gtu.f64 %p357, %fd96, 0d7FF0000000000000; + mov.f64 %fd589, %fd102; + @%p357 bra $L__BB7_217; + + setp.eq.s32 %p358, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r462, %temp}, %fd339; + } + setp.eq.s32 %p359, %r462, 0; + and.pred %p360, %p358, %p359; + @%p360 bra $L__BB7_216; + bra.uni $L__BB7_214; + +$L__BB7_216: + mov.u32 %r469, 0; + setp.gt.f64 %p368, %fd96, 0d3FF0000000000000; + selp.b32 %r470, 2146435072, 0, %p368; + xor.b32 %r471, %r470, 2146435072; + selp.b32 %r472, %r471, %r470, %p123; + setp.eq.f32 %p369, %f128, 0fBF800000; + selp.b32 %r473, 1072693248, %r472, %p369; + mov.b64 %fd589, {%r469, %r473}; + bra.uni $L__BB7_217; + +$L__BB7_214: + { + .reg .b32 %temp; + mov.b64 {%r463, %temp}, %fd410; + } + and.b32 %r464, %r79, 2147483647; + setp.ne.s32 %p361, %r464, 2146435072; + setp.ne.s32 %p362, %r463, 0; + or.pred %p363, %p361, %p362; + mov.f64 %fd589, %fd588; + @%p363 bra $L__BB7_217; + + setp.ne.s32 %p364, %r53, 1071644672; + and.pred %p365, %p364, %p28; + mov.u32 %r466, 0; + or.b32 %r467, %r55, -2147483648; + selp.b32 %r468, %r467, %r55, %p365; + mov.b64 %fd589, {%r466, %r468}; + +$L__BB7_217: + setp.eq.f32 %p370, %f128, 0f3F800000; + selp.f64 %fd418, 0d3FF0000000000000, %fd589, %p370; + cvt.f64.f32 %fd419, %f211; + mul.f64 %fd106, %fd418, %fd419; + cvt.f64.f32 %fd107, %f133; + { + .reg .b32 %temp; + mov.b64 {%temp, %r80}, %fd107; + } + abs.f64 %fd108, %fd107; + { // callseq 163, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd108; + .param .b64 param1; + st.param.f64 [param1+0], %fd339; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd591, [retval0+0]; + } // callseq 163 + setp.lt.s32 %p371, %r80, 0; + and.pred %p29, %p371, %p121; + not.pred %p373, %p29; + @%p373 bra $L__BB7_219; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r474}, %fd591; + } + xor.b32 %r475, %r474, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r476, %temp}, %fd591; + } + mov.b64 %fd591, {%r476, %r475}; + +$L__BB7_219: + setp.eq.f32 %p374, %f133, 0f00000000; + @%p374 bra $L__BB7_223; + bra.uni $L__BB7_220; + +$L__BB7_223: + mov.u32 %r477, 0; + selp.b32 %r478, %r80, 0, %p121; + or.b32 %r479, %r478, 2146435072; + selp.b32 %r480, %r479, %r478, %p123; + mov.b64 %fd591, {%r477, %r480}; + bra.uni $L__BB7_224; + +$L__BB7_220: + setp.gt.s32 %p375, %r80, -1; + @%p375 bra $L__BB7_224; + + cvt.rzi.f64.f64 %fd422, %fd339; + setp.eq.f64 %p376, %fd422, 0d4008000000000000; + @%p376 bra $L__BB7_224; + + mov.f64 %fd591, 0dFFF8000000000000; + +$L__BB7_224: + add.f64 %fd114, %fd107, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r481}, %fd114; + } + and.b32 %r482, %r481, 2146435072; + setp.ne.s32 %p379, %r482, 2146435072; + mov.f64 %fd592, %fd591; + @%p379 bra $L__BB7_230; + + setp.gtu.f64 %p380, %fd108, 0d7FF0000000000000; + mov.f64 %fd592, %fd114; + @%p380 bra $L__BB7_230; + + setp.eq.s32 %p381, %r53, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r483, %temp}, %fd339; + } + setp.eq.s32 %p382, %r483, 0; + and.pred %p383, %p381, %p382; + @%p383 bra $L__BB7_229; + bra.uni $L__BB7_227; + +$L__BB7_229: + mov.u32 %r490, 0; + setp.gt.f64 %p391, %fd108, 0d3FF0000000000000; + selp.b32 %r491, 2146435072, 0, %p391; + xor.b32 %r492, %r491, 2146435072; + selp.b32 %r493, %r492, %r491, %p123; + setp.eq.f32 %p392, %f133, 0fBF800000; + selp.b32 %r494, 1072693248, %r493, %p392; + mov.b64 %fd592, {%r490, %r494}; + bra.uni $L__BB7_230; + +$L__BB7_227: + { + .reg .b32 %temp; + mov.b64 {%r484, %temp}, %fd107; + } + and.b32 %r485, %r80, 2147483647; + setp.ne.s32 %p384, %r485, 2146435072; + setp.ne.s32 %p385, %r484, 0; + or.pred %p386, %p384, %p385; + mov.f64 %fd592, %fd591; + @%p386 bra $L__BB7_230; + + setp.ne.s32 %p387, %r53, 1071644672; + and.pred %p388, %p387, %p29; + mov.u32 %r487, 0; + or.b32 %r488, %r55, -2147483648; + selp.b32 %r489, %r488, %r55, %p388; + mov.b64 %fd592, {%r487, %r489}; + +$L__BB7_230: + cvt.f64.f32 %fd548, %f125; + setp.eq.f32 %p727, %f3064, 0f3F800000; + mov.f32 %f3049, 0f00000000; + setp.eq.f32 %p393, %f133, 0f3F800000; + selp.f64 %fd425, 0d3FF0000000000000, %fd592, %p393; + cvt.f64.f32 %fd426, %f224; + mul.f64 %fd427, %fd425, %fd426; + sub.f64 %fd428, %fd106, %fd427; + selp.f64 %fd429, 0d3FF0000000000000, %fd586, %p727; + div.rn.f64 %fd430, %fd35, %fd429; + mul.f64 %fd431, %fd430, %fd428; + mul.f64 %fd433, %fd431, %fd548; + mul.f32 %f1500, %f67, %f225; + cvt.f64.f32 %fd434, %f1500; + sub.f64 %fd435, %fd434, %fd433; + cvt.rn.f32.f64 %f226, %fd435; + mul.f32 %f227, %f125, %f138; + setp.leu.f32 %p395, %f139, 0f3C23D70A; + @%p395 bra $L__BB7_232; + + sub.f32 %f1501, %f140, %f139; + add.f32 %f1502, %f139, %f3063; + div.rn.f32 %f3049, %f1501, %f1502; + +$L__BB7_232: + mov.f32 %f3050, 0f00000000; + @%p395 bra $L__BB7_247; + + and.b32 %r495, %r74, 2146435072; + setp.eq.s32 %p397, %r495, 1062207488; + add.f32 %f230, %f139, %f3063; + cvt.f64.f32 %fd118, %f230; + { + .reg .b32 %temp; + mov.b64 {%temp, %r81}, %fd118; + } + abs.f64 %fd119, %fd118; + { // callseq 164, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd119; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd594, [retval0+0]; + } // callseq 164 + setp.lt.s32 %p398, %r81, 0; + and.pred %p30, %p398, %p397; + not.pred %p399, %p30; + @%p399 bra $L__BB7_235; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r496}, %fd594; + } + xor.b32 %r497, %r496, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r498, %temp}, %fd594; + } + mov.b64 %fd594, {%r498, %r497}; + +$L__BB7_235: + setp.eq.f32 %p400, %f230, 0f00000000; + @%p400 bra $L__BB7_239; + bra.uni $L__BB7_236; + +$L__BB7_239: + setp.lt.s32 %p403, %r74, 0; + mov.u32 %r499, 0; + selp.b32 %r501, %r81, 0, %p397; + or.b32 %r502, %r501, 2146435072; + selp.b32 %r503, %r502, %r501, %p403; + mov.b64 %fd594, {%r499, %r503}; + bra.uni $L__BB7_240; + +$L__BB7_236: + setp.gt.s32 %p401, %r81, -1; + @%p401 bra $L__BB7_240; + + cvt.rzi.f64.f64 %fd438, %fd350; + setp.eq.f64 %p402, %fd438, 0d4000000000000000; + @%p402 bra $L__BB7_240; + + mov.f64 %fd594, 0dFFF8000000000000; + +$L__BB7_240: + add.f64 %fd125, %fd118, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r504}, %fd125; + } + and.b32 %r505, %r504, 2146435072; + setp.ne.s32 %p405, %r505, 2146435072; + mov.f64 %fd595, %fd594; + @%p405 bra $L__BB7_246; + + setp.gtu.f64 %p406, %fd119, 0d7FF0000000000000; + mov.f64 %fd595, %fd125; + @%p406 bra $L__BB7_246; + + setp.eq.s32 %p407, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r506, %temp}, %fd350; + } + setp.eq.s32 %p408, %r506, 0; + and.pred %p409, %p407, %p408; + @%p409 bra $L__BB7_245; + bra.uni $L__BB7_243; + +$L__BB7_245: + setp.lt.s32 %p415, %r74, 0; + mov.u32 %r512, 0; + setp.gt.f64 %p416, %fd119, 0d3FF0000000000000; + selp.b32 %r513, 2146435072, 0, %p416; + xor.b32 %r514, %r513, 2146435072; + selp.b32 %r515, %r514, %r513, %p415; + setp.eq.f32 %p417, %f230, 0fBF800000; + selp.b32 %r516, 1072693248, %r515, %p417; + mov.b64 %fd595, {%r512, %r516}; + bra.uni $L__BB7_246; + +$L__BB7_243: + { + .reg .b32 %temp; + mov.b64 {%r507, %temp}, %fd118; + } + and.b32 %r508, %r81, 2147483647; + setp.ne.s32 %p410, %r508, 2146435072; + setp.ne.s32 %p411, %r507, 0; + or.pred %p412, %p410, %p411; + mov.f64 %fd595, %fd594; + @%p412 bra $L__BB7_246; + + setp.ne.s32 %p413, %r75, 1071644672; + and.pred %p414, %p413, %p30; + or.b32 %r509, %r76, -2147483648; + selp.b32 %r510, %r509, %r76, %p414; + mov.u32 %r511, 0; + mov.b64 %fd595, {%r511, %r510}; + +$L__BB7_246: + setp.eq.f32 %p418, %f230, 0f3F800000; + selp.f64 %fd441, 0d3FF0000000000000, %fd595, %p418; + add.f32 %f1504, %f140, %f3063; + cvt.f64.f32 %fd442, %f1504; + div.rn.f64 %fd443, %fd442, %fd441; + cvt.rn.f32.f64 %f3050, %fd443; + +$L__BB7_247: + and.b32 %r517, %r74, 2146435072; + setp.eq.s32 %p419, %r517, 1062207488; + mov.f32 %f1505, 0f47C35000; + min.f32 %f1506, %f3050, %f1505; + cvt.f64.f32 %fd129, %f1506; + min.f32 %f233, %f3049, %f1505; + fma.rn.f32 %f3018, %f233, %f154, %f3018; + mul.f32 %f1507, %f233, %f155; + cvt.f64.f32 %fd130, %f1507; + cvt.f64.f32 %fd131, %f154; + { + .reg .b32 %temp; + mov.b64 {%temp, %r82}, %fd131; + } + abs.f64 %fd132, %fd131; + { // callseq 165, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd132; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd596, [retval0+0]; + } // callseq 165 + @%p419 bra $L__BB7_304; + bra.uni $L__BB7_248; + +$L__BB7_304: + setp.gt.s32 %p495, %r82, -1; + @%p495 bra $L__BB7_306; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r578}, %fd596; + } + xor.b32 %r579, %r578, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r580, %temp}, %fd596; + } + mov.b64 %fd596, {%r580, %r579}; + +$L__BB7_306: + setp.eq.f32 %p496, %f154, 0f00000000; + @%p496 bra $L__BB7_310; + bra.uni $L__BB7_307; + +$L__BB7_310: + setp.lt.s32 %p499, %r74, 0; + mov.u32 %r581, 0; + or.b32 %r582, %r82, 2146435072; + selp.b32 %r583, %r582, %r82, %p499; + mov.b64 %fd596, {%r581, %r583}; + bra.uni $L__BB7_311; + +$L__BB7_248: + setp.eq.f32 %p420, %f154, 0f00000000; + @%p420 bra $L__BB7_252; + bra.uni $L__BB7_249; + +$L__BB7_252: + mov.u32 %r518, 0; + mov.b64 %fd596, {%r518, %r77}; + bra.uni $L__BB7_253; + +$L__BB7_307: + @%p495 bra $L__BB7_311; + + cvt.rzi.f64.f64 %fd496, %fd350; + setp.eq.f64 %p498, %fd496, 0d4000000000000000; + @%p498 bra $L__BB7_311; + + mov.f64 %fd596, 0dFFF8000000000000; + +$L__BB7_311: + add.f64 %fd186, %fd131, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r584}, %fd186; + } + and.b32 %r585, %r584, 2146435072; + setp.ne.s32 %p500, %r585, 2146435072; + mov.f64 %fd608, %fd596; + @%p500 bra $L__BB7_317; + + setp.gtu.f64 %p501, %fd132, 0d7FF0000000000000; + mov.f64 %fd608, %fd186; + @%p501 bra $L__BB7_317; + + setp.eq.s32 %p502, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r586, %temp}, %fd350; + } + setp.eq.s32 %p503, %r586, 0; + and.pred %p504, %p502, %p503; + @%p504 bra $L__BB7_316; + bra.uni $L__BB7_314; + +$L__BB7_316: + setp.lt.s32 %p511, %r74, 0; + mov.u32 %r592, 0; + setp.gt.f64 %p512, %fd132, 0d3FF0000000000000; + selp.b32 %r593, 2146435072, 0, %p512; + xor.b32 %r594, %r593, 2146435072; + selp.b32 %r595, %r594, %r593, %p511; + setp.eq.f32 %p513, %f154, 0fBF800000; + selp.b32 %r596, 1072693248, %r595, %p513; + mov.b64 %fd608, {%r592, %r596}; + bra.uni $L__BB7_317; + +$L__BB7_249: + setp.gt.s32 %p421, %r82, -1; + @%p421 bra $L__BB7_253; + + cvt.rzi.f64.f64 %fd446, %fd350; + setp.eq.f64 %p422, %fd446, 0d4000000000000000; + @%p422 bra $L__BB7_253; + + mov.f64 %fd596, 0dFFF8000000000000; + +$L__BB7_253: + add.f64 %fd136, %fd131, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r519}, %fd136; + } + and.b32 %r520, %r519, 2146435072; + setp.ne.s32 %p423, %r520, 2146435072; + mov.f64 %fd597, %fd596; + @%p423 bra $L__BB7_259; + + setp.gtu.f64 %p424, %fd132, 0d7FF0000000000000; + mov.f64 %fd597, %fd136; + @%p424 bra $L__BB7_259; + + setp.eq.s32 %p425, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r521, %temp}, %fd350; + } + setp.eq.s32 %p426, %r521, 0; + and.pred %p427, %p425, %p426; + @%p427 bra $L__BB7_258; + bra.uni $L__BB7_256; + +$L__BB7_258: + setp.lt.s32 %p431, %r74, 0; + mov.u32 %r525, 0; + setp.gt.f64 %p432, %fd132, 0d3FF0000000000000; + selp.b32 %r526, 2146435072, 0, %p432; + xor.b32 %r527, %r526, 2146435072; + selp.b32 %r528, %r527, %r526, %p431; + setp.eq.f32 %p433, %f154, 0fBF800000; + selp.b32 %r529, 1072693248, %r528, %p433; + mov.b64 %fd597, {%r525, %r529}; + bra.uni $L__BB7_259; + +$L__BB7_314: + { + .reg .b32 %temp; + mov.b64 {%r587, %temp}, %fd131; + } + and.b32 %r588, %r82, 2147483647; + setp.ne.s32 %p505, %r588, 2146435072; + setp.ne.s32 %p506, %r587, 0; + or.pred %p507, %p505, %p506; + mov.f64 %fd608, %fd596; + @%p507 bra $L__BB7_317; + + setp.lt.s32 %p508, %r82, 0; + mov.u32 %r589, 0; + setp.ne.s32 %p509, %r75, 1071644672; + and.pred %p510, %p509, %p508; + or.b32 %r590, %r76, -2147483648; + selp.b32 %r591, %r590, %r76, %p510; + mov.b64 %fd608, {%r589, %r591}; + +$L__BB7_317: + setp.eq.f32 %p514, %f154, 0f3F800000; + selp.f64 %fd499, 0d3FF0000000000000, %fd608, %p514; + mul.f64 %fd500, %fd499, %fd129; + sub.f64 %fd501, %fd130, %fd500; + cvt.f64.f32 %fd502, %f3024; + add.f64 %fd626, %fd501, %fd502; + cvt.f64.f32 %fd191, %f183; + { + .reg .b32 %temp; + mov.b64 {%temp, %r87}, %fd191; + } + abs.f64 %fd192, %fd191; + { // callseq 170, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd192; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd610, [retval0+0]; + } // callseq 170 + setp.gt.s32 %p515, %r87, -1; + @%p515 bra $L__BB7_319; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r597}, %fd610; + } + xor.b32 %r598, %r597, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r599, %temp}, %fd610; + } + mov.b64 %fd610, {%r599, %r598}; + +$L__BB7_319: + setp.eq.f32 %p516, %f183, 0f00000000; + @%p516 bra $L__BB7_323; + bra.uni $L__BB7_320; + +$L__BB7_323: + setp.lt.s32 %p519, %r74, 0; + mov.u32 %r600, 0; + or.b32 %r601, %r87, 2146435072; + selp.b32 %r602, %r601, %r87, %p519; + mov.b64 %fd610, {%r600, %r602}; + bra.uni $L__BB7_324; + +$L__BB7_320: + @%p515 bra $L__BB7_324; + + cvt.rzi.f64.f64 %fd505, %fd350; + setp.eq.f64 %p518, %fd505, 0d4000000000000000; + @%p518 bra $L__BB7_324; + + mov.f64 %fd610, 0dFFF8000000000000; + +$L__BB7_324: + add.f64 %fd198, %fd191, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r603}, %fd198; + } + and.b32 %r604, %r603, 2146435072; + setp.ne.s32 %p520, %r604, 2146435072; + mov.f64 %fd611, %fd610; + @%p520 bra $L__BB7_330; + + setp.gtu.f64 %p521, %fd192, 0d7FF0000000000000; + mov.f64 %fd611, %fd198; + @%p521 bra $L__BB7_330; + + setp.eq.s32 %p522, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r605, %temp}, %fd350; + } + setp.eq.s32 %p523, %r605, 0; + and.pred %p524, %p522, %p523; + @%p524 bra $L__BB7_329; + bra.uni $L__BB7_327; + +$L__BB7_329: + setp.lt.s32 %p531, %r74, 0; + mov.u32 %r611, 0; + setp.gt.f64 %p532, %fd192, 0d3FF0000000000000; + selp.b32 %r612, 2146435072, 0, %p532; + xor.b32 %r613, %r612, 2146435072; + selp.b32 %r614, %r613, %r612, %p531; + setp.eq.f32 %p533, %f183, 0fBF800000; + selp.b32 %r615, 1072693248, %r614, %p533; + mov.b64 %fd611, {%r611, %r615}; + bra.uni $L__BB7_330; + +$L__BB7_256: + { + .reg .b32 %temp; + mov.b64 {%r522, %temp}, %fd131; + } + and.b32 %r523, %r82, 2147483647; + setp.ne.s32 %p428, %r523, 2146435072; + setp.ne.s32 %p429, %r522, 0; + or.pred %p430, %p428, %p429; + mov.f64 %fd597, %fd596; + @%p430 bra $L__BB7_259; + + mov.u32 %r524, 0; + mov.b64 %fd597, {%r524, %r76}; + +$L__BB7_259: + setp.eq.f32 %p434, %f154, 0f3F800000; + selp.f64 %fd449, 0d3FF0000000000000, %fd597, %p434; + mul.f64 %fd450, %fd449, %fd129; + sub.f64 %fd451, %fd130, %fd450; + cvt.f64.f32 %fd452, %f3024; + add.f64 %fd626, %fd451, %fd452; + cvt.f64.f32 %fd141, %f183; + { + .reg .b32 %temp; + mov.b64 {%temp, %r83}, %fd141; + } + abs.f64 %fd142, %fd141; + setp.eq.f32 %p435, %f183, 0f00000000; + @%p435 bra $L__BB7_263; + bra.uni $L__BB7_260; + +$L__BB7_263: + mov.u32 %r530, 0; + mov.b64 %fd598, {%r530, %r77}; + bra.uni $L__BB7_264; + +$L__BB7_260: + { // callseq 166, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd142; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd598, [retval0+0]; + } // callseq 166 + setp.gt.s32 %p436, %r83, -1; + @%p436 bra $L__BB7_264; + + cvt.rzi.f64.f64 %fd455, %fd350; + setp.eq.f64 %p437, %fd455, 0d4000000000000000; + @%p437 bra $L__BB7_264; + + mov.f64 %fd598, 0dFFF8000000000000; + +$L__BB7_264: + add.f64 %fd146, %fd141, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r531}, %fd146; + } + and.b32 %r532, %r531, 2146435072; + setp.ne.s32 %p438, %r532, 2146435072; + mov.f64 %fd599, %fd598; + @%p438 bra $L__BB7_270; + + setp.gtu.f64 %p439, %fd142, 0d7FF0000000000000; + mov.f64 %fd599, %fd146; + @%p439 bra $L__BB7_270; + + setp.eq.s32 %p440, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r533, %temp}, %fd350; + } + setp.eq.s32 %p441, %r533, 0; + and.pred %p442, %p440, %p441; + @%p442 bra $L__BB7_269; + bra.uni $L__BB7_267; + +$L__BB7_269: + setp.lt.s32 %p446, %r74, 0; + mov.u32 %r537, 0; + setp.gt.f64 %p447, %fd142, 0d3FF0000000000000; + selp.b32 %r538, 2146435072, 0, %p447; + xor.b32 %r539, %r538, 2146435072; + selp.b32 %r540, %r539, %r538, %p446; + setp.eq.f32 %p448, %f183, 0fBF800000; + selp.b32 %r541, 1072693248, %r540, %p448; + mov.b64 %fd599, {%r537, %r541}; + bra.uni $L__BB7_270; + +$L__BB7_327: + { + .reg .b32 %temp; + mov.b64 {%r606, %temp}, %fd191; + } + and.b32 %r607, %r87, 2147483647; + setp.ne.s32 %p525, %r607, 2146435072; + setp.ne.s32 %p526, %r606, 0; + or.pred %p527, %p525, %p526; + mov.f64 %fd611, %fd610; + @%p527 bra $L__BB7_330; + + setp.lt.s32 %p528, %r87, 0; + mov.u32 %r608, 0; + setp.ne.s32 %p529, %r75, 1071644672; + and.pred %p530, %p529, %p528; + or.b32 %r609, %r76, -2147483648; + selp.b32 %r610, %r609, %r76, %p530; + mov.b64 %fd611, {%r608, %r610}; + +$L__BB7_330: + setp.eq.f32 %p534, %f183, 0f3F800000; + selp.f64 %fd508, 0d3FF0000000000000, %fd611, %p534; + mul.f64 %fd509, %fd508, %fd129; + mul.f32 %f1512, %f233, %f184; + cvt.f64.f32 %fd510, %f1512; + sub.f64 %fd511, %fd510, %fd509; + cvt.f64.f32 %fd512, %f3023; + add.f64 %fd625, %fd511, %fd512; + cvt.f64.f32 %fd203, %f227; + { + .reg .b32 %temp; + mov.b64 {%temp, %r88}, %fd203; + } + abs.f64 %fd204, %fd203; + { // callseq 171, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd204; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd613, [retval0+0]; + } // callseq 171 + setp.gt.s32 %p535, %r88, -1; + @%p535 bra $L__BB7_332; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r616}, %fd613; + } + xor.b32 %r617, %r616, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r618, %temp}, %fd613; + } + mov.b64 %fd613, {%r618, %r617}; + +$L__BB7_332: + setp.eq.f32 %p536, %f227, 0f00000000; + @%p536 bra $L__BB7_336; + bra.uni $L__BB7_333; + +$L__BB7_336: + setp.lt.s32 %p539, %r74, 0; + mov.u32 %r619, 0; + or.b32 %r620, %r88, 2146435072; + selp.b32 %r621, %r620, %r88, %p539; + mov.b64 %fd613, {%r619, %r621}; + bra.uni $L__BB7_337; + +$L__BB7_333: + @%p535 bra $L__BB7_337; + + cvt.rzi.f64.f64 %fd515, %fd350; + setp.eq.f64 %p538, %fd515, 0d4000000000000000; + @%p538 bra $L__BB7_337; + + mov.f64 %fd613, 0dFFF8000000000000; + +$L__BB7_337: + add.f64 %fd210, %fd203, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r622}, %fd210; + } + and.b32 %r623, %r622, 2146435072; + setp.ne.s32 %p540, %r623, 2146435072; + mov.f64 %fd614, %fd613; + @%p540 bra $L__BB7_343; + + setp.gtu.f64 %p541, %fd204, 0d7FF0000000000000; + mov.f64 %fd614, %fd210; + @%p541 bra $L__BB7_343; + + setp.eq.s32 %p542, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r624, %temp}, %fd350; + } + setp.eq.s32 %p543, %r624, 0; + and.pred %p544, %p542, %p543; + @%p544 bra $L__BB7_342; + bra.uni $L__BB7_340; + +$L__BB7_342: + setp.lt.s32 %p551, %r74, 0; + mov.u32 %r630, 0; + setp.gt.f64 %p552, %fd204, 0d3FF0000000000000; + selp.b32 %r631, 2146435072, 0, %p552; + xor.b32 %r632, %r631, 2146435072; + selp.b32 %r633, %r632, %r631, %p551; + setp.eq.f32 %p553, %f227, 0fBF800000; + selp.b32 %r634, 1072693248, %r633, %p553; + mov.b64 %fd614, {%r630, %r634}; + bra.uni $L__BB7_343; + +$L__BB7_267: + { + .reg .b32 %temp; + mov.b64 {%r534, %temp}, %fd141; + } + and.b32 %r535, %r83, 2147483647; + setp.ne.s32 %p443, %r535, 2146435072; + setp.ne.s32 %p444, %r534, 0; + or.pred %p445, %p443, %p444; + mov.f64 %fd599, %fd598; + @%p445 bra $L__BB7_270; + + mov.u32 %r536, 0; + mov.b64 %fd599, {%r536, %r76}; + +$L__BB7_270: + setp.eq.f32 %p449, %f183, 0f3F800000; + selp.f64 %fd458, 0d3FF0000000000000, %fd599, %p449; + mul.f64 %fd459, %fd458, %fd129; + mul.f32 %f1508, %f233, %f184; + cvt.f64.f32 %fd460, %f1508; + sub.f64 %fd461, %fd460, %fd459; + cvt.f64.f32 %fd462, %f3023; + add.f64 %fd625, %fd461, %fd462; + cvt.f64.f32 %fd151, %f227; + { + .reg .b32 %temp; + mov.b64 {%temp, %r84}, %fd151; + } + abs.f64 %fd152, %fd151; + setp.eq.f32 %p450, %f227, 0f00000000; + @%p450 bra $L__BB7_274; + bra.uni $L__BB7_271; + +$L__BB7_274: + mov.u32 %r542, 0; + mov.b64 %fd600, {%r542, %r77}; + bra.uni $L__BB7_275; + +$L__BB7_271: + { // callseq 167, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd152; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd600, [retval0+0]; + } // callseq 167 + setp.gt.s32 %p451, %r84, -1; + @%p451 bra $L__BB7_275; + + cvt.rzi.f64.f64 %fd465, %fd350; + setp.eq.f64 %p452, %fd465, 0d4000000000000000; + @%p452 bra $L__BB7_275; + + mov.f64 %fd600, 0dFFF8000000000000; + +$L__BB7_275: + add.f64 %fd156, %fd151, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r543}, %fd156; + } + and.b32 %r544, %r543, 2146435072; + setp.ne.s32 %p453, %r544, 2146435072; + mov.f64 %fd601, %fd600; + @%p453 bra $L__BB7_281; + + setp.gtu.f64 %p454, %fd152, 0d7FF0000000000000; + mov.f64 %fd601, %fd156; + @%p454 bra $L__BB7_281; + + setp.eq.s32 %p455, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r545, %temp}, %fd350; + } + setp.eq.s32 %p456, %r545, 0; + and.pred %p457, %p455, %p456; + @%p457 bra $L__BB7_280; + bra.uni $L__BB7_278; + +$L__BB7_280: + setp.lt.s32 %p461, %r74, 0; + mov.u32 %r549, 0; + setp.gt.f64 %p462, %fd152, 0d3FF0000000000000; + selp.b32 %r550, 2146435072, 0, %p462; + xor.b32 %r551, %r550, 2146435072; + selp.b32 %r552, %r551, %r550, %p461; + setp.eq.f32 %p463, %f227, 0fBF800000; + selp.b32 %r553, 1072693248, %r552, %p463; + mov.b64 %fd601, {%r549, %r553}; + bra.uni $L__BB7_281; + +$L__BB7_340: + { + .reg .b32 %temp; + mov.b64 {%r625, %temp}, %fd203; + } + and.b32 %r626, %r88, 2147483647; + setp.ne.s32 %p545, %r626, 2146435072; + setp.ne.s32 %p546, %r625, 0; + or.pred %p547, %p545, %p546; + mov.f64 %fd614, %fd613; + @%p547 bra $L__BB7_343; + + setp.lt.s32 %p548, %r88, 0; + mov.u32 %r627, 0; + setp.ne.s32 %p549, %r75, 1071644672; + and.pred %p550, %p549, %p548; + or.b32 %r628, %r76, -2147483648; + selp.b32 %r629, %r628, %r76, %p550; + mov.b64 %fd614, {%r627, %r629}; + +$L__BB7_343: + mul.f32 %f1513, %f233, 0f00000000; + cvt.f64.f32 %fd518, %f1513; + setp.eq.f32 %p554, %f227, 0f3F800000; + selp.f64 %fd519, 0d3FF0000000000000, %fd614, %p554; + mul.f64 %fd520, %fd519, %fd129; + sub.f64 %fd521, %fd518, %fd520; + cvt.f64.f32 %fd522, %f3022; + add.f64 %fd624, %fd521, %fd522; + cvt.f64.f32 %fd523, %f3021; + sub.f64 %fd524, %fd518, %fd129; + add.f64 %fd623, %fd524, %fd523; + cvt.f64.f32 %fd216, %f197; + { + .reg .b32 %temp; + mov.b64 {%temp, %r89}, %fd216; + } + abs.f64 %fd217, %fd216; + { // callseq 172, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd217; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd616, [retval0+0]; + } // callseq 172 + setp.gt.s32 %p555, %r89, -1; + @%p555 bra $L__BB7_345; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r635}, %fd616; + } + xor.b32 %r636, %r635, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r637, %temp}, %fd616; + } + mov.b64 %fd616, {%r637, %r636}; + +$L__BB7_345: + setp.eq.f32 %p556, %f197, 0f00000000; + @%p556 bra $L__BB7_349; + bra.uni $L__BB7_346; + +$L__BB7_349: + setp.lt.s32 %p559, %r74, 0; + mov.u32 %r638, 0; + or.b32 %r639, %r89, 2146435072; + selp.b32 %r640, %r639, %r89, %p559; + mov.b64 %fd616, {%r638, %r640}; + bra.uni $L__BB7_350; + +$L__BB7_346: + @%p555 bra $L__BB7_350; + + cvt.rzi.f64.f64 %fd527, %fd350; + setp.eq.f64 %p558, %fd527, 0d4000000000000000; + @%p558 bra $L__BB7_350; + + mov.f64 %fd616, 0dFFF8000000000000; + +$L__BB7_350: + add.f64 %fd223, %fd216, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r641}, %fd223; + } + and.b32 %r642, %r641, 2146435072; + setp.ne.s32 %p560, %r642, 2146435072; + mov.f64 %fd617, %fd616; + @%p560 bra $L__BB7_356; + + setp.gtu.f64 %p561, %fd217, 0d7FF0000000000000; + mov.f64 %fd617, %fd223; + @%p561 bra $L__BB7_356; + + setp.eq.s32 %p562, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r643, %temp}, %fd350; + } + setp.eq.s32 %p563, %r643, 0; + and.pred %p564, %p562, %p563; + @%p564 bra $L__BB7_355; + bra.uni $L__BB7_353; + +$L__BB7_355: + setp.lt.s32 %p571, %r74, 0; + mov.u32 %r649, 0; + setp.gt.f64 %p572, %fd217, 0d3FF0000000000000; + selp.b32 %r650, 2146435072, 0, %p572; + xor.b32 %r651, %r650, 2146435072; + selp.b32 %r652, %r651, %r650, %p571; + setp.eq.f32 %p573, %f197, 0fBF800000; + selp.b32 %r653, 1072693248, %r652, %p573; + mov.b64 %fd617, {%r649, %r653}; + bra.uni $L__BB7_356; + +$L__BB7_278: + { + .reg .b32 %temp; + mov.b64 {%r546, %temp}, %fd151; + } + and.b32 %r547, %r84, 2147483647; + setp.ne.s32 %p458, %r547, 2146435072; + setp.ne.s32 %p459, %r546, 0; + or.pred %p460, %p458, %p459; + mov.f64 %fd601, %fd600; + @%p460 bra $L__BB7_281; + + mov.u32 %r548, 0; + mov.b64 %fd601, {%r548, %r76}; + +$L__BB7_281: + mul.f32 %f1509, %f233, 0f00000000; + cvt.f64.f32 %fd468, %f1509; + setp.eq.f32 %p464, %f227, 0f3F800000; + selp.f64 %fd469, 0d3FF0000000000000, %fd601, %p464; + mul.f64 %fd470, %fd469, %fd129; + sub.f64 %fd471, %fd468, %fd470; + cvt.f64.f32 %fd472, %f3022; + add.f64 %fd624, %fd471, %fd472; + cvt.f64.f32 %fd473, %f3021; + sub.f64 %fd474, %fd468, %fd129; + add.f64 %fd623, %fd474, %fd473; + cvt.f64.f32 %fd162, %f197; + { + .reg .b32 %temp; + mov.b64 {%temp, %r85}, %fd162; + } + abs.f64 %fd163, %fd162; + setp.eq.f32 %p465, %f197, 0f00000000; + @%p465 bra $L__BB7_285; + bra.uni $L__BB7_282; + +$L__BB7_285: + mov.u32 %r554, 0; + mov.b64 %fd602, {%r554, %r77}; + bra.uni $L__BB7_286; + +$L__BB7_282: + { // callseq 168, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd163; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd602, [retval0+0]; + } // callseq 168 + setp.gt.s32 %p466, %r85, -1; + @%p466 bra $L__BB7_286; + + cvt.rzi.f64.f64 %fd477, %fd350; + setp.eq.f64 %p467, %fd477, 0d4000000000000000; + @%p467 bra $L__BB7_286; + + mov.f64 %fd602, 0dFFF8000000000000; + +$L__BB7_286: + add.f64 %fd167, %fd162, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r555}, %fd167; + } + and.b32 %r556, %r555, 2146435072; + setp.ne.s32 %p468, %r556, 2146435072; + mov.f64 %fd603, %fd602; + @%p468 bra $L__BB7_292; + + setp.gtu.f64 %p469, %fd163, 0d7FF0000000000000; + mov.f64 %fd603, %fd167; + @%p469 bra $L__BB7_292; + + setp.eq.s32 %p470, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r557, %temp}, %fd350; + } + setp.eq.s32 %p471, %r557, 0; + and.pred %p472, %p470, %p471; + @%p472 bra $L__BB7_291; + bra.uni $L__BB7_289; + +$L__BB7_291: + setp.lt.s32 %p476, %r74, 0; + mov.u32 %r561, 0; + setp.gt.f64 %p477, %fd163, 0d3FF0000000000000; + selp.b32 %r562, 2146435072, 0, %p477; + xor.b32 %r563, %r562, 2146435072; + selp.b32 %r564, %r563, %r562, %p476; + setp.eq.f32 %p478, %f197, 0fBF800000; + selp.b32 %r565, 1072693248, %r564, %p478; + mov.b64 %fd603, {%r561, %r565}; + bra.uni $L__BB7_292; + +$L__BB7_353: + { + .reg .b32 %temp; + mov.b64 {%r644, %temp}, %fd216; + } + and.b32 %r645, %r89, 2147483647; + setp.ne.s32 %p565, %r645, 2146435072; + setp.ne.s32 %p566, %r644, 0; + or.pred %p567, %p565, %p566; + mov.f64 %fd617, %fd616; + @%p567 bra $L__BB7_356; + + setp.lt.s32 %p568, %r89, 0; + mov.u32 %r646, 0; + setp.ne.s32 %p569, %r75, 1071644672; + and.pred %p570, %p569, %p568; + or.b32 %r647, %r76, -2147483648; + selp.b32 %r648, %r647, %r76, %p570; + mov.b64 %fd617, {%r646, %r648}; + +$L__BB7_356: + setp.eq.f32 %p574, %f197, 0f3F800000; + selp.f64 %fd530, 0d3FF0000000000000, %fd617, %p574; + mul.f64 %fd531, %fd530, %fd129; + mul.f32 %f1514, %f233, %f198; + cvt.f64.f32 %fd532, %f1514; + sub.f64 %fd533, %fd532, %fd531; + cvt.f64.f32 %fd534, %f3020; + add.f64 %fd622, %fd533, %fd534; + cvt.f64.f32 %fd228, %f225; + { + .reg .b32 %temp; + mov.b64 {%temp, %r90}, %fd228; + } + abs.f64 %fd229, %fd228; + { // callseq 173, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd229; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd619, [retval0+0]; + } // callseq 173 + setp.gt.s32 %p575, %r90, -1; + @%p575 bra $L__BB7_358; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r654}, %fd619; + } + xor.b32 %r655, %r654, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r656, %temp}, %fd619; + } + mov.b64 %fd619, {%r656, %r655}; + +$L__BB7_358: + setp.eq.f32 %p576, %f225, 0f00000000; + @%p576 bra $L__BB7_362; + bra.uni $L__BB7_359; + +$L__BB7_362: + setp.lt.s32 %p579, %r74, 0; + mov.u32 %r657, 0; + or.b32 %r658, %r90, 2146435072; + selp.b32 %r659, %r658, %r90, %p579; + mov.b64 %fd619, {%r657, %r659}; + bra.uni $L__BB7_363; + +$L__BB7_359: + @%p575 bra $L__BB7_363; + + cvt.rzi.f64.f64 %fd537, %fd350; + setp.eq.f64 %p578, %fd537, 0d4000000000000000; + @%p578 bra $L__BB7_363; + + mov.f64 %fd619, 0dFFF8000000000000; + +$L__BB7_363: + add.f64 %fd235, %fd228, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r660}, %fd235; + } + and.b32 %r661, %r660, 2146435072; + setp.ne.s32 %p580, %r661, 2146435072; + mov.f64 %fd620, %fd619; + @%p580 bra $L__BB7_369; + + setp.gtu.f64 %p581, %fd229, 0d7FF0000000000000; + mov.f64 %fd620, %fd235; + @%p581 bra $L__BB7_369; + + setp.eq.s32 %p582, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r662, %temp}, %fd350; + } + setp.eq.s32 %p583, %r662, 0; + and.pred %p584, %p582, %p583; + @%p584 bra $L__BB7_368; + bra.uni $L__BB7_366; + +$L__BB7_368: + setp.lt.s32 %p591, %r74, 0; + mov.u32 %r668, 0; + setp.gt.f64 %p592, %fd229, 0d3FF0000000000000; + selp.b32 %r669, 2146435072, 0, %p592; + xor.b32 %r670, %r669, 2146435072; + selp.b32 %r671, %r670, %r669, %p591; + setp.eq.f32 %p593, %f225, 0fBF800000; + selp.b32 %r672, 1072693248, %r671, %p593; + mov.b64 %fd620, {%r668, %r672}; + bra.uni $L__BB7_369; + +$L__BB7_289: + { + .reg .b32 %temp; + mov.b64 {%r558, %temp}, %fd162; + } + and.b32 %r559, %r85, 2147483647; + setp.ne.s32 %p473, %r559, 2146435072; + setp.ne.s32 %p474, %r558, 0; + or.pred %p475, %p473, %p474; + mov.f64 %fd603, %fd602; + @%p475 bra $L__BB7_292; + + mov.u32 %r560, 0; + mov.b64 %fd603, {%r560, %r76}; + +$L__BB7_292: + setp.eq.f32 %p479, %f197, 0f3F800000; + selp.f64 %fd480, 0d3FF0000000000000, %fd603, %p479; + mul.f64 %fd481, %fd480, %fd129; + mul.f32 %f1510, %f233, %f198; + cvt.f64.f32 %fd482, %f1510; + sub.f64 %fd483, %fd482, %fd481; + cvt.f64.f32 %fd484, %f3020; + add.f64 %fd622, %fd483, %fd484; + cvt.f64.f32 %fd172, %f225; + { + .reg .b32 %temp; + mov.b64 {%temp, %r86}, %fd172; + } + abs.f64 %fd173, %fd172; + setp.eq.f32 %p480, %f225, 0f00000000; + @%p480 bra $L__BB7_296; + bra.uni $L__BB7_293; + +$L__BB7_296: + mov.u32 %r566, 0; + mov.b64 %fd604, {%r566, %r77}; + bra.uni $L__BB7_297; + +$L__BB7_293: + { // callseq 169, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd173; + .param .b64 param1; + st.param.f64 [param1+0], %fd350; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd604, [retval0+0]; + } // callseq 169 + setp.gt.s32 %p481, %r86, -1; + @%p481 bra $L__BB7_297; + + cvt.rzi.f64.f64 %fd487, %fd350; + setp.eq.f64 %p482, %fd487, 0d4000000000000000; + @%p482 bra $L__BB7_297; + + mov.f64 %fd604, 0dFFF8000000000000; + +$L__BB7_297: + add.f64 %fd177, %fd172, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r567}, %fd177; + } + and.b32 %r568, %r567, 2146435072; + setp.ne.s32 %p483, %r568, 2146435072; + mov.f64 %fd605, %fd604; + @%p483 bra $L__BB7_303; + + setp.gtu.f64 %p484, %fd173, 0d7FF0000000000000; + mov.f64 %fd605, %fd177; + @%p484 bra $L__BB7_303; + + setp.eq.s32 %p485, %r75, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r569, %temp}, %fd350; + } + setp.eq.s32 %p486, %r569, 0; + and.pred %p487, %p485, %p486; + @%p487 bra $L__BB7_302; + bra.uni $L__BB7_300; + +$L__BB7_302: + setp.lt.s32 %p491, %r74, 0; + mov.u32 %r573, 0; + setp.gt.f64 %p492, %fd173, 0d3FF0000000000000; + selp.b32 %r574, 2146435072, 0, %p492; + xor.b32 %r575, %r574, 2146435072; + selp.b32 %r576, %r575, %r574, %p491; + setp.eq.f32 %p493, %f225, 0fBF800000; + selp.b32 %r577, 1072693248, %r576, %p493; + mov.b64 %fd605, {%r573, %r577}; + bra.uni $L__BB7_303; + +$L__BB7_366: + { + .reg .b32 %temp; + mov.b64 {%r663, %temp}, %fd228; + } + and.b32 %r664, %r90, 2147483647; + setp.ne.s32 %p585, %r664, 2146435072; + setp.ne.s32 %p586, %r663, 0; + or.pred %p587, %p585, %p586; + mov.f64 %fd620, %fd619; + @%p587 bra $L__BB7_369; + + setp.lt.s32 %p588, %r90, 0; + mov.u32 %r665, 0; + setp.ne.s32 %p589, %r75, 1071644672; + and.pred %p590, %p589, %p588; + or.b32 %r666, %r76, -2147483648; + selp.b32 %r667, %r666, %r76, %p590; + mov.b64 %fd620, {%r665, %r667}; + +$L__BB7_369: + setp.eq.f32 %p594, %f225, 0f3F800000; + selp.f64 %fd540, 0d3FF0000000000000, %fd620, %p594; + mul.f64 %fd541, %fd540, %fd129; + mul.f32 %f1515, %f233, %f226; + cvt.f64.f32 %fd542, %f1515; + sub.f64 %fd543, %fd542, %fd541; + cvt.f64.f32 %fd544, %f3019; + add.f64 %fd621, %fd543, %fd544; + bra.uni $L__BB7_370; + +$L__BB7_300: + { + .reg .b32 %temp; + mov.b64 {%r570, %temp}, %fd172; + } + and.b32 %r571, %r86, 2147483647; + setp.ne.s32 %p488, %r571, 2146435072; + setp.ne.s32 %p489, %r570, 0; + or.pred %p490, %p488, %p489; + mov.f64 %fd605, %fd604; + @%p490 bra $L__BB7_303; + + mov.u32 %r572, 0; + mov.b64 %fd605, {%r572, %r76}; + +$L__BB7_303: + setp.eq.f32 %p494, %f225, 0f3F800000; + selp.f64 %fd490, 0d3FF0000000000000, %fd605, %p494; + mul.f64 %fd491, %fd490, %fd129; + mul.f32 %f1511, %f233, %f226; + cvt.f64.f32 %fd492, %f1511; + sub.f64 %fd493, %fd492, %fd491; + cvt.f64.f32 %fd494, %f3019; + add.f64 %fd621, %fd493, %fd494; + +$L__BB7_370: + cvt.rn.f32.f64 %f3024, %fd626; + cvt.rn.f32.f64 %f3023, %fd625; + cvt.rn.f32.f64 %f3022, %fd624; + cvt.rn.f32.f64 %f3021, %fd623; + cvt.rn.f32.f64 %f3020, %fd622; + cvt.rn.f32.f64 %f3019, %fd621; + fma.rn.f32 %f3017, %f233, %f183, %f3017; + fma.rn.f32 %f3016, %f233, %f227, %f3016; + add.f32 %f3015, %f3015, %f233; + fma.rn.f32 %f3014, %f233, %f197, %f3014; + fma.rn.f32 %f3013, %f233, %f225, %f3013; + add.s32 %r847, %r847, 1; + setp.lt.s32 %p595, %r847, %r104; + @%p595 bra $L__BB7_56; + + add.s32 %r846, %r846, 1; + setp.lt.s32 %p596, %r846, %r104; + @%p596 bra $L__BB7_55; + +$L__BB7_372: + ld.param.u32 %r832, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_6]; + div.rn.f32 %f1516, %f3018, %f3024; + mov.f32 %f1517, 0fBF800000; + max.f32 %f1518, %f1516, %f1517; + mov.f32 %f1519, 0f3F800000; + min.f32 %f1520, %f1518, %f1519; + sub.f32 %f3069, %f3069, %f1520; + div.rn.f32 %f1521, %f3017, %f3023; + max.f32 %f1522, %f1521, %f1517; + min.f32 %f1523, %f1522, %f1519; + sub.f32 %f3068, %f3068, %f1523; + neg.f32 %f1524, %f3067; + div.rn.f32 %f1525, %f3016, %f3022; + max.f32 %f1526, %f1525, %f1524; + min.f32 %f1527, %f1526, %f3067; + sub.f32 %f1528, %f3067, %f1527; + neg.f32 %f1529, %f3066; + div.rn.f32 %f1530, %f3015, %f3021; + max.f32 %f1531, %f1530, %f1529; + min.f32 %f1532, %f1531, %f3066; + sub.f32 %f1533, %f3066, %f1532; + neg.f32 %f1534, %f3065; + div.rn.f32 %f1535, %f3014, %f3020; + max.f32 %f1536, %f1535, %f1534; + min.f32 %f1537, %f1536, %f3065; + sub.f32 %f1538, %f3065, %f1537; + neg.f32 %f1539, %f3064; + div.rn.f32 %f1540, %f3013, %f3019; + max.f32 %f1541, %f1540, %f1539; + min.f32 %f1542, %f1541, %f3064; + sub.f32 %f1543, %f3064, %f1542; + max.f32 %f3067, %f1528, %f1519; + mov.f32 %f1544, 0f3C23D70A; + max.f32 %f3066, %f1533, %f1544; + mov.f32 %f1545, 0f3F000000; + max.f32 %f1546, %f1538, %f1545; + min.f32 %f3065, %f1546, %f51; + max.f32 %f1547, %f1543, %f1545; + min.f32 %f3064, %f1547, %f51; + add.s32 %r845, %r845, 1; + setp.lt.s32 %p597, %r845, %r832; + @%p597 bra $L__BB7_53; + +$L__BB7_373: + mov.f32 %f1569, 0f00000000; + mov.f32 %f3093, %f1569; + mov.f32 %f3094, %f1569; + mov.f32 %f3095, %f1569; + mov.f32 %f3098, %f1569; + mov.f32 %f3102, %f1569; + mov.f32 %f3107, %f1569; + mov.f32 %f3096, %f1569; + mov.f32 %f3097, %f1569; + mov.f32 %f3099, %f1569; + mov.f32 %f3103, %f1569; + mov.f32 %f3108, %f1569; + mov.f32 %f3100, %f1569; + mov.f32 %f3101, %f1569; + mov.f32 %f3104, %f1569; + mov.f32 %f3109, %f1569; + mov.f32 %f3105, %f1569; + mov.f32 %f3106, %f1569; + mov.f32 %f3110, %f1569; + mov.f32 %f3111, %f1569; + mov.f32 %f3112, %f1569; + mov.f32 %f3113, %f1569; + mov.f32 %f3141, %f1569; + @%p44 bra $L__BB7_462; + + mov.f32 %f1592, 0f3F000000; + div.rn.f32 %f1593, %f1592, %f3065; + div.rn.f32 %f1594, %f1593, %f3065; + div.rn.f32 %f1595, %f1592, %f3064; + div.rn.f32 %f1596, %f1595, %f3064; + div.rn.f32 %f1597, %f3067, 0fC0206C98; + div.rn.f32 %f272, %f1597, %f3065; + div.rn.f32 %f273, %f1597, %f3064; + div.rn.f32 %f274, %f272, %f3065; + div.rn.f32 %f275, %f273, %f3064; + sqrt.rn.f32 %f276, %f1594; + sqrt.rn.f32 %f277, %f1596; + mov.f32 %f1598, 0f3F800000; + cvt.rzi.f32.f32 %f1599, %f1598; + add.f32 %f1600, %f1599, %f1599; + mov.f32 %f1601, 0f40000000; + sub.f32 %f1602, %f1601, %f1600; + abs.f32 %f278, %f1602; + mov.u32 %r673, 0; + setp.eq.f32 %p606, %f278, 0f3F800000; + mov.u32 %r848, %r673; + +$L__BB7_375: + cvt.rn.f32.s32 %f1603, %r848; + sub.f32 %f301, %f1603, %f3069; + add.f32 %f1604, %f301, 0f3F000000; + mul.f32 %f1605, %f1604, %f276; + abs.f32 %f302, %f1605; + setp.ge.f32 %p599, %f302, 0f3F8060FE; + mul.f32 %f1606, %f1605, %f1605; + selp.f32 %f1607, %f302, %f1606, %p599; + selp.f32 %f1608, 0f3789CA3C, 0f38B1E96A, %p599; + selp.f32 %f1609, 0fB9F560B9, 0fBA574D20, %p599; + fma.rn.f32 %f1610, %f1608, %f1607, %f1609; + selp.f32 %f1611, 0f3BAC840B, 0f3BAAD5EA, %p599; + fma.rn.f32 %f1612, %f1610, %f1607, %f1611; + selp.f32 %f1613, 0fBD0C8162, 0fBCDC1BE7, %p599; + fma.rn.f32 %f1614, %f1612, %f1607, %f1613; + selp.f32 %f1615, 0f3E1CF906, 0f3DE718AF, %p599; + fma.rn.f32 %f1616, %f1614, %f1607, %f1615; + selp.f32 %f1617, 0f3F6A937E, 0fBEC093AC, %p599; + fma.rn.f32 %f1618, %f1616, %f1607, %f1617; + selp.f32 %f1619, 0f3F20D842, 0f3E0375D3, %p599; + fma.rn.f32 %f1620, %f1618, %f1607, %f1619; + neg.f32 %f1621, %f302; + selp.f32 %f1622, %f1621, %f1605, %p599; + fma.rn.f32 %f303, %f1620, %f1622, %f1622; + mov.b32 %r675, %f1605; + and.b32 %r95, %r675, -2147483648; + add.f32 %f1623, %f301, 0fBF000000; + mul.f32 %f1624, %f1623, %f276; + abs.f32 %f304, %f1624; + setp.ge.f32 %p600, %f304, 0f3F8060FE; + mul.f32 %f1625, %f1624, %f1624; + selp.f32 %f1626, %f304, %f1625, %p600; + selp.f32 %f1627, 0f3789CA3C, 0f38B1E96A, %p600; + selp.f32 %f1628, 0fB9F560B9, 0fBA574D20, %p600; + fma.rn.f32 %f1629, %f1627, %f1626, %f1628; + selp.f32 %f1630, 0f3BAC840B, 0f3BAAD5EA, %p600; + fma.rn.f32 %f1631, %f1629, %f1626, %f1630; + selp.f32 %f1632, 0fBD0C8162, 0fBCDC1BE7, %p600; + fma.rn.f32 %f1633, %f1631, %f1626, %f1632; + selp.f32 %f1634, 0f3E1CF906, 0f3DE718AF, %p600; + fma.rn.f32 %f1635, %f1633, %f1626, %f1634; + selp.f32 %f1636, 0f3F6A937E, 0fBEC093AC, %p600; + fma.rn.f32 %f1637, %f1635, %f1626, %f1636; + selp.f32 %f1638, 0f3F20D842, 0f3E0375D3, %p600; + fma.rn.f32 %f1639, %f1637, %f1626, %f1638; + neg.f32 %f1640, %f304; + selp.f32 %f1641, %f1640, %f1624, %p600; + fma.rn.f32 %f305, %f1639, %f1641, %f1641; + mov.b32 %r676, %f1624; + and.b32 %r96, %r676, -2147483648; + add.f32 %f1642, %f1603, 0f3F000000; + sub.f32 %f1643, %f1642, %f3069; + div.rn.f32 %f306, %f1643, %f3065; + abs.f32 %f307, %f306; + setp.lt.f32 %p601, %f307, 0f00800000; + mul.f32 %f1644, %f307, 0f4B800000; + selp.f32 %f1645, %f1644, %f307, %p601; + selp.f32 %f1646, 0fC3170000, 0fC2FE0000, %p601; + mov.b32 %r677, %f1645; + and.b32 %r678, %r677, 8388607; + or.b32 %r679, %r678, 1065353216; + mov.b32 %f1647, %r679; + shr.u32 %r680, %r677, 23; + cvt.rn.f32.u32 %f1648, %r680; + add.f32 %f1649, %f1646, %f1648; + setp.gt.f32 %p602, %f1647, 0f3FB504F3; + mul.f32 %f1650, %f1647, 0f3F000000; + add.f32 %f1651, %f1649, 0f3F800000; + selp.f32 %f1652, %f1651, %f1649, %p602; + selp.f32 %f1653, %f1650, %f1647, %p602; + add.f32 %f1654, %f1653, 0fBF800000; + add.f32 %f1655, %f1653, 0f3F800000; + rcp.approx.ftz.f32 %f1656, %f1655; + add.f32 %f1657, %f1654, %f1654; + mul.f32 %f1659, %f1657, %f1656; + mul.f32 %f1660, %f1659, %f1659; + mov.f32 %f1661, 0f3C4CAF63; + mov.f32 %f1662, 0f3B18F0FE; + fma.rn.f32 %f1663, %f1662, %f1660, %f1661; + mov.f32 %f1664, 0f3DAAAABD; + fma.rn.f32 %f1665, %f1663, %f1660, %f1664; + mul.rn.f32 %f1666, %f1665, %f1660; + mul.rn.f32 %f1667, %f1666, %f1659; + sub.f32 %f1668, %f1654, %f1659; + add.f32 %f1669, %f1668, %f1668; + neg.f32 %f1670, %f1659; + fma.rn.f32 %f1671, %f1670, %f1654, %f1669; + mul.rn.f32 %f1672, %f1656, %f1671; + add.f32 %f1673, %f1667, %f1659; + sub.f32 %f1674, %f1659, %f1673; + add.f32 %f1675, %f1667, %f1674; + add.f32 %f1676, %f1672, %f1675; + add.f32 %f1677, %f1673, %f1676; + sub.f32 %f1678, %f1673, %f1677; + add.f32 %f1679, %f1676, %f1678; + mov.f32 %f1680, 0f3F317200; + mul.rn.f32 %f1681, %f1652, %f1680; + mov.f32 %f1682, 0f35BFBE8E; + mul.rn.f32 %f1683, %f1652, %f1682; + add.f32 %f1684, %f1681, %f1677; + sub.f32 %f1685, %f1681, %f1684; + add.f32 %f1686, %f1677, %f1685; + add.f32 %f1687, %f1679, %f1686; + add.f32 %f1688, %f1683, %f1687; + add.f32 %f1689, %f1684, %f1688; + sub.f32 %f1690, %f1684, %f1689; + add.f32 %f1691, %f1688, %f1690; + mul.rn.f32 %f1692, %f1601, %f1689; + neg.f32 %f1693, %f1692; + fma.rn.f32 %f1694, %f1601, %f1689, %f1693; + fma.rn.f32 %f1695, %f1601, %f1691, %f1694; + fma.rn.f32 %f1697, %f1569, %f1689, %f1695; + add.rn.f32 %f1698, %f1692, %f1697; + neg.f32 %f1699, %f1698; + add.rn.f32 %f1700, %f1692, %f1699; + add.rn.f32 %f1701, %f1700, %f1697; + mov.b32 %r681, %f1698; + setp.eq.s32 %p603, %r681, 1118925336; + add.s32 %r682, %r681, -1; + mov.b32 %f1702, %r682; + add.f32 %f1703, %f1701, 0f37000000; + selp.f32 %f308, %f1703, %f1701, %p603; + selp.f32 %f1704, %f1702, %f1698, %p603; + mov.f32 %f1705, 0f3FB8AA3B; + mul.rn.f32 %f1706, %f1704, %f1705; + cvt.rzi.f32.f32 %f1707, %f1706; + abs.f32 %f1708, %f1707; + setp.gt.f32 %p604, %f1708, 0f42FC0000; + mov.b32 %r683, %f1707; + and.b32 %r684, %r683, -2147483648; + or.b32 %r685, %r684, 1123811328; + mov.b32 %f1709, %r685; + selp.f32 %f1710, %f1709, %f1707, %p604; + mov.f32 %f1711, 0fBF317218; + fma.rn.f32 %f1712, %f1710, %f1711, %f1704; + mov.f32 %f1713, 0f3102E308; + fma.rn.f32 %f1714, %f1710, %f1713, %f1712; + mul.f32 %f1715, %f1714, 0f3FB8AA3B; + add.f32 %f1716, %f1710, 0f4B40007F; + mov.b32 %r686, %f1716; + shl.b32 %r687, %r686, 23; + mov.b32 %f1717, %r687; + ex2.approx.ftz.f32 %f1718, %f1715; + mul.f32 %f309, %f1718, %f1717; + setp.lt.f32 %p605, %f306, 0f00000000; + and.pred %p31, %p605, %p606; + add.f32 %f1719, %f306, %f306; + selp.f32 %f310, %f1719, 0f00000000, %p606; + add.f32 %f1720, %f307, 0f40000000; + mov.b32 %r97, %f1720; + div.rn.f32 %f311, %f1623, %f3065; + abs.f32 %f312, %f311; + setp.lt.f32 %p607, %f312, 0f00800000; + mul.f32 %f1721, %f312, 0f4B800000; + selp.f32 %f1722, %f1721, %f312, %p607; + selp.f32 %f1723, 0fC3170000, 0fC2FE0000, %p607; + mov.b32 %r688, %f1722; + and.b32 %r689, %r688, 8388607; + or.b32 %r690, %r689, 1065353216; + mov.b32 %f1724, %r690; + shr.u32 %r691, %r688, 23; + cvt.rn.f32.u32 %f1725, %r691; + add.f32 %f1726, %f1723, %f1725; + setp.gt.f32 %p608, %f1724, 0f3FB504F3; + mul.f32 %f1727, %f1724, 0f3F000000; + add.f32 %f1728, %f1726, 0f3F800000; + selp.f32 %f1729, %f1728, %f1726, %p608; + selp.f32 %f1730, %f1727, %f1724, %p608; + add.f32 %f1731, %f1730, 0fBF800000; + add.f32 %f1732, %f1730, 0f3F800000; + rcp.approx.ftz.f32 %f1733, %f1732; + add.f32 %f1734, %f1731, %f1731; + mul.f32 %f1735, %f1734, %f1733; mul.f32 %f1736, %f1735, %f1735; - add.f32 %f1737, %f186, %f3337; - div.rn.f32 %f3324, %f1737, %f1736; - -BB7_118: - mov.f32 %f1738, 0f47C35000; - min.f32 %f1739, %f3323, %f1738; - fma.rn.f32 %f3292, %f1739, %f205, %f3292; - mul.f32 %f1740, %f1739, %f206; - mul.f32 %f1741, %f205, %f205; - min.f32 %f1742, %f3324, %f1738; - mul.f32 %f1743, %f1742, %f1741; - sub.f32 %f1744, %f1740, %f1743; - add.f32 %f3298, %f1744, %f3298; - fma.rn.f32 %f3291, %f1739, %f245, %f3291; - mul.f32 %f1745, %f1739, %f246; - mul.f32 %f1746, %f245, %f245; - mul.f32 %f1747, %f1742, %f1746; - sub.f32 %f1748, %f1745, %f1747; - add.f32 %f3297, %f1748, %f3297; - fma.rn.f32 %f3290, %f1739, %f291, %f3290; - mul.f32 %f1749, %f1739, 0f00000000; - mul.f32 %f1750, %f291, %f291; - mul.f32 %f1751, %f1742, %f1750; - sub.f32 %f1752, %f1749, %f1751; - add.f32 %f3296, %f1752, %f3296; - add.f32 %f3289, %f3289, %f1739; - sub.f32 %f1753, %f1749, %f1742; - add.f32 %f3295, %f1753, %f3295; - fma.rn.f32 %f3288, %f1739, %f264, %f3288; - mul.f32 %f1754, %f1739, %f265; - mul.f32 %f1755, %f264, %f264; - mul.f32 %f1756, %f1742, %f1755; - sub.f32 %f1757, %f1754, %f1756; - add.f32 %f3294, %f1757, %f3294; - fma.rn.f32 %f3287, %f1739, %f289, %f3287; - mul.f32 %f1758, %f1739, %f290; - mul.f32 %f1759, %f289, %f289; - mul.f32 %f1760, %f1742, %f1759; - sub.f32 %f1761, %f1758, %f1760; - add.f32 %f3293, %f1761, %f3293; - add.s32 %r329, %r329, 1; - setp.lt.s32 %p184, %r329, %r71; - @%p184 bra BB7_42; - - add.s32 %r328, %r328, 1; - setp.lt.s32 %p185, %r328, %r71; - @%p185 bra BB7_41; - -BB7_120: - div.rn.f32 %f1762, %f3292, %f3298; - mov.f32 %f1763, 0fBF800000; - max.f32 %f1764, %f1762, %f1763; - mov.f32 %f1765, 0f3F800000; - min.f32 %f1766, %f1764, %f1765; - sub.f32 %f3343, %f3343, %f1766; - div.rn.f32 %f1767, %f3291, %f3297; - max.f32 %f1768, %f1767, %f1763; - min.f32 %f1769, %f1768, %f1765; - sub.f32 %f3342, %f3342, %f1769; - neg.f32 %f1770, %f3341; - div.rn.f32 %f1771, %f3290, %f3296; - max.f32 %f1772, %f1771, %f1770; - min.f32 %f1773, %f1772, %f3341; - sub.f32 %f1774, %f3341, %f1773; - neg.f32 %f1775, %f3246; - div.rn.f32 %f1776, %f3289, %f3295; - max.f32 %f1777, %f1776, %f1775; - min.f32 %f1778, %f1777, %f3246; - sub.f32 %f1779, %f3246, %f1778; - neg.f32 %f1780, %f3339; - div.rn.f32 %f1781, %f3288, %f3294; - max.f32 %f1782, %f1781, %f1780; - min.f32 %f1783, %f1782, %f3339; - sub.f32 %f1784, %f3339, %f1783; - neg.f32 %f1785, %f3338; - div.rn.f32 %f1786, %f3287, %f3293; - max.f32 %f1787, %f1786, %f1785; - min.f32 %f1788, %f1787, %f3338; - sub.f32 %f1789, %f3338, %f1788; - max.f32 %f3341, %f1774, %f1765; - mov.f32 %f1790, 0f3C23D70A; - max.f32 %f3246, %f1779, %f1790; - max.f32 %f1792, %f1784, %f660; - min.f32 %f3339, %f1792, %f87; - max.f32 %f1793, %f1789, %f660; - min.f32 %f3338, %f1793, %f87; - add.s32 %r327, %r327, 1; - setp.lt.s32 %p186, %r327, %r73; - @%p186 bra BB7_39; - bra.uni BB7_121; - -BB7_37: - mov.f32 %f3339, %f3338; - -BB7_121: - mov.f32 %f3367, 0f00000000; - @%p15 bra BB7_207; - - div.rn.f32 %f1797, %f660, %f3339; - div.rn.f32 %f334, %f1797, %f3339; - div.rn.f32 %f1798, %f660, %f3338; - div.rn.f32 %f335, %f1798, %f3338; - div.rn.f32 %f1799, %f3341, 0fC0206C98; - div.rn.f32 %f336, %f1799, %f3339; - div.rn.f32 %f337, %f1799, %f3338; - div.rn.f32 %f338, %f336, %f3339; - div.rn.f32 %f339, %f337, %f3338; - mov.u32 %r201, 0; - mov.f32 %f3367, 0f00000000; - sqrt.rn.f32 %f1801, %f334; - sqrt.rn.f32 %f349, %f335; - mov.u32 %r330, %r201; - -BB7_123: - cvt.rn.f32.s32 %f1800, %r330; - sub.f32 %f341, %f1800, %f3343; - add.f32 %f342, %f341, 0f3F800000; - mul.f32 %f343, %f342, %f1801; - abs.f32 %f344, %f343; - mul.f32 %f345, %f343, %f343; - mul.f32 %f346, %f341, %f1801; - abs.f32 %f347, %f346; - add.f32 %f1802, %f1800, 0f3F800000; - sub.f32 %f1803, %f1802, %f3343; - div.rn.f32 %f350, %f1803, %f3339; - mov.f32 %f1804, 0f3F800000; - cvt.rzi.f32.f32 %f1805, %f1804; - add.f32 %f1806, %f1805, %f1805; - mov.f32 %f1807, 0f40000000; - sub.f32 %f1808, %f1807, %f1806; - abs.f32 %f351, %f1808; - setp.eq.f32 %p188, %f351, 0f3F800000; - abs.f32 %f352, %f350; - setp.lt.f32 %p189, %f352, 0f00800000; - mul.f32 %f1809, %f352, 0f4B800000; - selp.f32 %f1810, 0fC3170000, 0fC2FE0000, %p189; - selp.f32 %f1811, %f1809, %f352, %p189; - mov.b32 %r203, %f1811; - and.b32 %r204, %r203, 8388607; - or.b32 %r205, %r204, 1065353216; - mov.b32 %f1812, %r205; - shr.u32 %r206, %r203, 23; - cvt.rn.f32.u32 %f1813, %r206; - add.f32 %f1814, %f1810, %f1813; - setp.gt.f32 %p190, %f1812, 0f3FB504F3; - mul.f32 %f1815, %f1812, 0f3F000000; - add.f32 %f1816, %f1814, 0f3F800000; - selp.f32 %f1817, %f1815, %f1812, %p190; - selp.f32 %f1818, %f1816, %f1814, %p190; - add.f32 %f353, %f1817, 0fBF800000; - add.f32 %f354, %f1817, 0f3F800000; - add.f32 %f355, %f353, %f353; - mov.f32 %f1819, 0f3F317200; - mul.rn.f32 %f356, %f1818, %f1819; - mov.f32 %f1820, 0f35BFBE8E; - mul.rn.f32 %f357, %f1818, %f1820; - setp.lt.f32 %p191, %f350, 0f00000000; - and.pred %p7, %p191, %p188; - add.f32 %f1821, %f350, %f350; - selp.f32 %f358, %f1821, 0f00000000, %p188; - div.rn.f32 %f361, %f341, %f3339; - abs.f32 %f362, %f361; - setp.lt.f32 %p192, %f362, 0f00800000; - mul.f32 %f1823, %f362, 0f4B800000; - selp.f32 %f1824, 0fC3170000, 0fC2FE0000, %p192; - selp.f32 %f1825, %f1823, %f362, %p192; - mov.b32 %r207, %f1825; - and.b32 %r208, %r207, 8388607; - or.b32 %r209, %r208, 1065353216; - mov.b32 %f1826, %r209; - shr.u32 %r210, %r207, 23; - cvt.rn.f32.u32 %f1827, %r210; - add.f32 %f1828, %f1824, %f1827; - setp.gt.f32 %p193, %f1826, 0f3FB504F3; - mul.f32 %f1829, %f1826, 0f3F000000; - add.f32 %f1830, %f1828, 0f3F800000; - selp.f32 %f1831, %f1829, %f1826, %p193; - selp.f32 %f1832, %f1830, %f1828, %p193; - add.f32 %f363, %f1831, 0fBF800000; - add.f32 %f364, %f1831, 0f3F800000; - add.f32 %f365, %f363, %f363; - mul.rn.f32 %f366, %f1832, %f1819; - mul.rn.f32 %f367, %f1832, %f1820; - setp.lt.f32 %p194, %f361, 0f00000000; - and.pred %p8, %p194, %p188; - add.f32 %f1833, %f361, %f361; - selp.f32 %f368, %f1833, 0f00000000, %p188; - mov.b32 %r212, %f343; - and.b32 %r45, %r212, -2147483648; - ld.local.v4.f32 {%f3366, %f3365, %f3364, %f3363}, [%rd2]; - ld.local.v4.f32 {%f3362, %f3361, %f1841, %f3360}, [%rd2+16]; - ld.local.v4.f32 {%f3359, %f3358, %f3357, %f3356}, [%rd2+32]; - ld.local.v2.f32 {%f3355, %f3354}, [%rd2+56]; - ld.local.v2.f32 {%f3353, %f3352}, [%rd2+64]; - ld.local.f32 %f3351, [%rd2+84]; - ld.local.v2.f32 {%f3350, %f3349}, [%rd2+88]; - ld.local.v2.f32 {%f3348, %f3347}, [%rd2+112]; - ld.local.f32 %f3346, [%rd2+140]; - mov.u32 %r331, %r201; - -BB7_124: - setp.ltu.f32 %p195, %f344, 0f3F800000; - @%p195 bra BB7_126; - bra.uni BB7_125; - -BB7_126: - cvt.rn.f32.s32 %f3211, %r330; - sub.f32 %f3210, %f3211, %f3343; - add.f32 %f3209, %f3210, 0f3F800000; - mul.f32 %f3208, %f3209, %f1801; - mov.f32 %f1873, 0f3BA0C9F8; - mov.f32 %f1874, 0fBA1268FB; - fma.rn.f32 %f1875, %f1874, %f345, %f1873; - mov.f32 %f1876, 0fBCDABFD4; - fma.rn.f32 %f1877, %f1875, %f345, %f1876; - mov.f32 %f1878, 0f3DE70331; - fma.rn.f32 %f1879, %f1877, %f345, %f1878; - mov.f32 %f1880, 0fBEC09330; - fma.rn.f32 %f1881, %f1879, %f345, %f1880; - mov.f32 %f1882, 0f3F906EBA; - fma.rn.f32 %f1883, %f1881, %f345, %f1882; - mul.f32 %f3368, %f3208, %f1883; - bra.uni BB7_127; - -BB7_125: - mov.f32 %f3159, 0f3F800000; - setp.ltu.f32 %p196, %f344, 0f407AD445; - mov.f32 %f1855, 0f3A03BB71; - mov.f32 %f1856, 0fB7B730FB; - fma.rn.f32 %f1857, %f1856, %f344, %f1855; - mov.f32 %f1858, 0fBBACA3B3; - fma.rn.f32 %f1859, %f1857, %f344, %f1858; - mov.f32 %f1860, 0f3D0A7445; - fma.rn.f32 %f1861, %f1859, %f344, %f1860; - mov.f32 %f1862, 0fBE1B3B75; - fma.rn.f32 %f1863, %f1861, %f344, %f1862; - mov.f32 %f1864, 0fBF6B385A; - fma.rn.f32 %f1865, %f1863, %f344, %f1864; - mov.f32 %f1866, 0fBFD0316E; - fma.rn.f32 %f1867, %f1865, %f344, %f1866; - mov.f32 %f1868, 0fBA031CCE; - fma.rn.f32 %f1869, %f1867, %f344, %f1868; - ex2.approx.ftz.f32 %f1870, %f1869; - sub.f32 %f1872, %f3159, %f1870; - mov.b32 %r213, %f1872; - selp.b32 %r214, %r213, 1065353216, %p196; - or.b32 %r215, %r214, %r45; - mov.b32 %f3368, %r215; - -BB7_127: - setp.ltu.f32 %p197, %f347, 0f3F800000; - @%p197 bra BB7_129; - bra.uni BB7_128; - -BB7_129: - cvt.rn.f32.s32 %f3206, %r330; - sub.f32 %f3205, %f3206, %f3343; - mul.f32 %f3204, %f3205, %f1801; - mul.f32 %f3203, %f3204, %f3204; - mov.f32 %f1902, 0f3BA0C9F8; - mov.f32 %f1903, 0fBA1268FB; - fma.rn.f32 %f1904, %f1903, %f3203, %f1902; - mov.f32 %f1905, 0fBCDABFD4; - fma.rn.f32 %f1906, %f1904, %f3203, %f1905; - mov.f32 %f1907, 0f3DE70331; - fma.rn.f32 %f1908, %f1906, %f3203, %f1907; - mov.f32 %f1909, 0fBEC09330; - fma.rn.f32 %f1910, %f1908, %f3203, %f1909; - mov.f32 %f1911, 0f3F906EBA; - fma.rn.f32 %f1912, %f1910, %f3203, %f1911; - mul.f32 %f3369, %f3204, %f1912; - bra.uni BB7_130; - -BB7_128: - cvt.rn.f32.s32 %f3163, %r330; - sub.f32 %f3162, %f3163, %f3343; - mul.f32 %f3161, %f3162, %f1801; - mov.b32 %r310, %f3161; - and.b32 %r309, %r310, -2147483648; - mov.f32 %f3160, 0f3F800000; - setp.ltu.f32 %p198, %f347, 0f407AD445; - mov.f32 %f1884, 0f3A03BB71; - mov.f32 %f1885, 0fB7B730FB; - fma.rn.f32 %f1886, %f1885, %f347, %f1884; - mov.f32 %f1887, 0fBBACA3B3; - fma.rn.f32 %f1888, %f1886, %f347, %f1887; - mov.f32 %f1889, 0f3D0A7445; - fma.rn.f32 %f1890, %f1888, %f347, %f1889; - mov.f32 %f1891, 0fBE1B3B75; - fma.rn.f32 %f1892, %f1890, %f347, %f1891; - mov.f32 %f1893, 0fBF6B385A; - fma.rn.f32 %f1894, %f1892, %f347, %f1893; - mov.f32 %f1895, 0fBFD0316E; - fma.rn.f32 %f1896, %f1894, %f347, %f1895; - mov.f32 %f1897, 0fBA031CCE; - fma.rn.f32 %f1898, %f1896, %f347, %f1897; - ex2.approx.ftz.f32 %f1899, %f1898; - sub.f32 %f1901, %f3160, %f1899; - mov.b32 %r216, %f1901; - selp.b32 %r217, %r216, 1065353216, %p198; - or.b32 %r218, %r217, %r309; - mov.b32 %f3369, %r218; - -BB7_130: - sub.f32 %f1913, %f3368, %f3369; - mul.f32 %f420, %f1913, 0f3F000000; - cvt.rn.f32.s32 %f421, %r331; - sub.f32 %f422, %f421, %f3342; - add.f32 %f423, %f422, 0f3F800000; - mul.f32 %f424, %f423, %f349; - abs.f32 %f425, %f424; - setp.ltu.f32 %p199, %f425, 0f3F800000; - @%p199 bra BB7_132; - bra.uni BB7_131; - -BB7_132: - mul.f32 %f1932, %f424, %f424; - mov.f32 %f1933, 0f3BA0C9F8; - mov.f32 %f1934, 0fBA1268FB; - fma.rn.f32 %f1935, %f1934, %f1932, %f1933; - mov.f32 %f1936, 0fBCDABFD4; - fma.rn.f32 %f1937, %f1935, %f1932, %f1936; - mov.f32 %f1938, 0f3DE70331; - fma.rn.f32 %f1939, %f1937, %f1932, %f1938; - mov.f32 %f1940, 0fBEC09330; - fma.rn.f32 %f1941, %f1939, %f1932, %f1940; - mov.f32 %f1942, 0f3F906EBA; - fma.rn.f32 %f1943, %f1941, %f1932, %f1942; - mul.f32 %f3370, %f424, %f1943; - bra.uni BB7_133; - -BB7_131: - mov.f32 %f3164, 0f3F800000; - mov.f32 %f1914, 0f3A03BB71; - mov.f32 %f1915, 0fB7B730FB; - fma.rn.f32 %f1916, %f1915, %f425, %f1914; - mov.f32 %f1917, 0fBBACA3B3; - fma.rn.f32 %f1918, %f1916, %f425, %f1917; - mov.f32 %f1919, 0f3D0A7445; - fma.rn.f32 %f1920, %f1918, %f425, %f1919; - mov.f32 %f1921, 0fBE1B3B75; - fma.rn.f32 %f1922, %f1920, %f425, %f1921; - mov.f32 %f1923, 0fBF6B385A; - fma.rn.f32 %f1924, %f1922, %f425, %f1923; - mov.f32 %f1925, 0fBFD0316E; - fma.rn.f32 %f1926, %f1924, %f425, %f1925; - mov.f32 %f1927, 0fBA031CCE; - fma.rn.f32 %f1928, %f1926, %f425, %f1927; - ex2.approx.ftz.f32 %f1929, %f1928; - sub.f32 %f1931, %f3164, %f1929; - mov.b32 %r219, %f1931; - setp.ltu.f32 %p200, %f425, 0f407AD445; - selp.b32 %r220, %r219, 1065353216, %p200; - mov.b32 %r221, %f424; - and.b32 %r222, %r221, -2147483648; - or.b32 %r223, %r220, %r222; - mov.b32 %f3370, %r223; - -BB7_133: - cvt.rn.f32.s32 %f3166, %r331; - sub.f32 %f3165, %f3166, %f3342; - mul.f32 %f429, %f3165, %f349; - abs.f32 %f430, %f429; - setp.ltu.f32 %p201, %f430, 0f3F800000; - @%p201 bra BB7_135; - bra.uni BB7_134; - -BB7_135: - mul.f32 %f1962, %f429, %f429; - mov.f32 %f1963, 0f3BA0C9F8; - mov.f32 %f1964, 0fBA1268FB; - fma.rn.f32 %f1965, %f1964, %f1962, %f1963; - mov.f32 %f1966, 0fBCDABFD4; - fma.rn.f32 %f1967, %f1965, %f1962, %f1966; - mov.f32 %f1968, 0f3DE70331; - fma.rn.f32 %f1969, %f1967, %f1962, %f1968; - mov.f32 %f1970, 0fBEC09330; - fma.rn.f32 %f1971, %f1969, %f1962, %f1970; - mov.f32 %f1972, 0f3F906EBA; - fma.rn.f32 %f1973, %f1971, %f1962, %f1972; - mul.f32 %f3371, %f429, %f1973; - bra.uni BB7_136; - -BB7_134: - mov.f32 %f3167, 0f3F800000; - mov.f32 %f1944, 0f3A03BB71; - mov.f32 %f1945, 0fB7B730FB; - fma.rn.f32 %f1946, %f1945, %f430, %f1944; - mov.f32 %f1947, 0fBBACA3B3; - fma.rn.f32 %f1948, %f1946, %f430, %f1947; - mov.f32 %f1949, 0f3D0A7445; - fma.rn.f32 %f1950, %f1948, %f430, %f1949; - mov.f32 %f1951, 0fBE1B3B75; - fma.rn.f32 %f1952, %f1950, %f430, %f1951; - mov.f32 %f1953, 0fBF6B385A; - fma.rn.f32 %f1954, %f1952, %f430, %f1953; - mov.f32 %f1955, 0fBFD0316E; - fma.rn.f32 %f1956, %f1954, %f430, %f1955; - mov.f32 %f1957, 0fBA031CCE; - fma.rn.f32 %f1958, %f1956, %f430, %f1957; - ex2.approx.ftz.f32 %f1959, %f1958; - sub.f32 %f1961, %f3167, %f1959; - mov.b32 %r224, %f1961; - setp.ltu.f32 %p202, %f430, 0f407AD445; - selp.b32 %r225, %r224, 1065353216, %p202; - mov.b32 %r226, %f429; - and.b32 %r227, %r226, -2147483648; - or.b32 %r228, %r225, %r227; - mov.b32 %f3371, %r228; - -BB7_136: - sub.f32 %f1976, %f3370, %f3371; - mul.f32 %f434, %f1976, 0f3F000000; - mul.f32 %f1977, %f420, %f3341; - fma.rn.f32 %f435, %f434, %f1977, %f3246; - mad.lo.s32 %r229, %r331, %r71, %r330; - add.s32 %r230, %r229, %r4; - mul.wide.s32 %rd108, %r230, 4; - add.s64 %rd109, %rd1, %rd108; - ld.global.f32 %f436, [%rd109]; - // inline asm - rcp.approx.ftz.f32 %f1974,%f354; - // inline asm - mul.f32 %f1978, %f1974, %f355; - mul.f32 %f1979, %f1978, %f1978; - mov.f32 %f1980, 0f3C4CAF63; - mov.f32 %f1981, 0f3B18F0FE; - fma.rn.f32 %f1982, %f1981, %f1979, %f1980; - mov.f32 %f1983, 0f3DAAAABD; - fma.rn.f32 %f1984, %f1982, %f1979, %f1983; - mul.rn.f32 %f1985, %f1984, %f1979; - mul.rn.f32 %f1986, %f1985, %f1978; - sub.f32 %f1987, %f353, %f1978; - neg.f32 %f1988, %f1978; - add.f32 %f1989, %f1987, %f1987; - fma.rn.f32 %f1990, %f1988, %f353, %f1989; - mul.rn.f32 %f1991, %f1974, %f1990; - add.f32 %f1992, %f1986, %f1978; - sub.f32 %f1993, %f1978, %f1992; - add.f32 %f1994, %f1986, %f1993; - add.f32 %f1995, %f1991, %f1994; - add.f32 %f1996, %f1992, %f1995; - sub.f32 %f1997, %f1992, %f1996; - add.f32 %f1998, %f1995, %f1997; - add.f32 %f1999, %f356, %f1996; - sub.f32 %f2000, %f356, %f1999; - add.f32 %f2001, %f1996, %f2000; - add.f32 %f2002, %f1998, %f2001; - add.f32 %f2003, %f357, %f2002; - add.f32 %f2004, %f1999, %f2003; - sub.f32 %f2005, %f1999, %f2004; - add.f32 %f2006, %f2003, %f2005; - mul.rn.f32 %f2008, %f1807, %f2004; + fma.rn.f32 %f1737, %f1662, %f1736, %f1661; + fma.rn.f32 %f1738, %f1737, %f1736, %f1664; + mul.rn.f32 %f1739, %f1738, %f1736; + mul.rn.f32 %f1740, %f1739, %f1735; + sub.f32 %f1741, %f1731, %f1735; + add.f32 %f1742, %f1741, %f1741; + neg.f32 %f1743, %f1735; + fma.rn.f32 %f1744, %f1743, %f1731, %f1742; + mul.rn.f32 %f1745, %f1733, %f1744; + add.f32 %f1746, %f1740, %f1735; + sub.f32 %f1747, %f1735, %f1746; + add.f32 %f1748, %f1740, %f1747; + add.f32 %f1749, %f1745, %f1748; + add.f32 %f1750, %f1746, %f1749; + sub.f32 %f1751, %f1746, %f1750; + add.f32 %f1752, %f1749, %f1751; + mul.rn.f32 %f1753, %f1729, %f1680; + mul.rn.f32 %f1754, %f1729, %f1682; + add.f32 %f1755, %f1753, %f1750; + sub.f32 %f1756, %f1753, %f1755; + add.f32 %f1757, %f1750, %f1756; + add.f32 %f1758, %f1752, %f1757; + add.f32 %f1759, %f1754, %f1758; + add.f32 %f1760, %f1755, %f1759; + sub.f32 %f1761, %f1755, %f1760; + add.f32 %f1762, %f1759, %f1761; + mul.rn.f32 %f1763, %f1601, %f1760; + neg.f32 %f1764, %f1763; + fma.rn.f32 %f1765, %f1601, %f1760, %f1764; + fma.rn.f32 %f1766, %f1601, %f1762, %f1765; + fma.rn.f32 %f1767, %f1569, %f1760, %f1766; + add.rn.f32 %f1768, %f1763, %f1767; + neg.f32 %f1769, %f1768; + add.rn.f32 %f1770, %f1763, %f1769; + add.rn.f32 %f1771, %f1770, %f1767; + mov.b32 %r692, %f1768; + setp.eq.s32 %p609, %r692, 1118925336; + add.s32 %r693, %r692, -1; + mov.b32 %f1772, %r693; + add.f32 %f1773, %f1771, 0f37000000; + selp.f32 %f313, %f1773, %f1771, %p609; + selp.f32 %f1774, %f1772, %f1768, %p609; + mul.rn.f32 %f1775, %f1774, %f1705; + cvt.rzi.f32.f32 %f1776, %f1775; + abs.f32 %f1777, %f1776; + setp.gt.f32 %p610, %f1777, 0f42FC0000; + mov.b32 %r694, %f1776; + and.b32 %r695, %r694, -2147483648; + or.b32 %r696, %r695, 1123811328; + mov.b32 %f1778, %r696; + selp.f32 %f1779, %f1778, %f1776, %p610; + fma.rn.f32 %f1780, %f1779, %f1711, %f1774; + fma.rn.f32 %f1781, %f1779, %f1713, %f1780; + mul.f32 %f1782, %f1781, 0f3FB8AA3B; + add.f32 %f1783, %f1779, 0f4B40007F; + mov.b32 %r697, %f1783; + shl.b32 %r698, %r697, 23; + mov.b32 %f1784, %r698; + ex2.approx.ftz.f32 %f1785, %f1782; + mul.f32 %f314, %f1785, %f1784; + add.f32 %f315, %f306, 0f40000000; + setp.lt.f32 %p611, %f311, 0f00000000; + and.pred %p32, %p611, %p606; + selp.f32 %f316, 0fFF800000, 0f7F800000, %p31; + add.f32 %f1786, %f311, %f311; + selp.f32 %f317, %f1786, 0f00000000, %p606; + add.f32 %f1787, %f312, 0f40000000; + mov.b32 %r98, %f1787; + add.f32 %f318, %f311, 0f40000000; + selp.f32 %f319, 0fFF800000, 0f7F800000, %p32; + add.f32 %f1788, %f1603, 0f3F800000; + sub.f32 %f1789, %f1788, %f3069; + div.rn.f32 %f320, %f1789, %f3065; + abs.f32 %f321, %f320; + setp.lt.f32 %p612, %f321, 0f00800000; + mul.f32 %f1790, %f321, 0f4B800000; + selp.f32 %f1791, %f1790, %f321, %p612; + selp.f32 %f1792, 0fC3170000, 0fC2FE0000, %p612; + mov.b32 %r699, %f1791; + and.b32 %r700, %r699, 8388607; + or.b32 %r701, %r700, 1065353216; + mov.b32 %f1793, %r701; + shr.u32 %r702, %r699, 23; + cvt.rn.f32.u32 %f1794, %r702; + add.f32 %f1795, %f1792, %f1794; + setp.gt.f32 %p613, %f1793, 0f3FB504F3; + mul.f32 %f1796, %f1793, 0f3F000000; + add.f32 %f1797, %f1795, 0f3F800000; + selp.f32 %f1798, %f1797, %f1795, %p613; + selp.f32 %f1799, %f1796, %f1793, %p613; + add.f32 %f1800, %f1799, 0fBF800000; + add.f32 %f1801, %f1799, 0f3F800000; + rcp.approx.ftz.f32 %f1802, %f1801; + add.f32 %f1803, %f1800, %f1800; + mul.f32 %f1804, %f1803, %f1802; + mul.f32 %f1805, %f1804, %f1804; + fma.rn.f32 %f1806, %f1662, %f1805, %f1661; + fma.rn.f32 %f1807, %f1806, %f1805, %f1664; + mul.rn.f32 %f1808, %f1807, %f1805; + mul.rn.f32 %f1809, %f1808, %f1804; + sub.f32 %f1810, %f1800, %f1804; + add.f32 %f1811, %f1810, %f1810; + neg.f32 %f1812, %f1804; + fma.rn.f32 %f1813, %f1812, %f1800, %f1811; + mul.rn.f32 %f1814, %f1802, %f1813; + add.f32 %f1815, %f1809, %f1804; + sub.f32 %f1816, %f1804, %f1815; + add.f32 %f1817, %f1809, %f1816; + add.f32 %f1818, %f1814, %f1817; + add.f32 %f1819, %f1815, %f1818; + sub.f32 %f1820, %f1815, %f1819; + add.f32 %f1821, %f1818, %f1820; + mul.rn.f32 %f1822, %f1798, %f1680; + mul.rn.f32 %f1823, %f1798, %f1682; + add.f32 %f1824, %f1822, %f1819; + sub.f32 %f1825, %f1822, %f1824; + add.f32 %f1826, %f1819, %f1825; + add.f32 %f1827, %f1821, %f1826; + add.f32 %f1828, %f1823, %f1827; + add.f32 %f1829, %f1824, %f1828; + sub.f32 %f1830, %f1824, %f1829; + add.f32 %f1831, %f1828, %f1830; + mul.rn.f32 %f1832, %f1601, %f1829; + neg.f32 %f1833, %f1832; + fma.rn.f32 %f1834, %f1601, %f1829, %f1833; + fma.rn.f32 %f1835, %f1601, %f1831, %f1834; + fma.rn.f32 %f1836, %f1569, %f1829, %f1835; + add.rn.f32 %f1837, %f1832, %f1836; + neg.f32 %f1838, %f1837; + add.rn.f32 %f1839, %f1832, %f1838; + add.rn.f32 %f1840, %f1839, %f1836; + mov.b32 %r703, %f1837; + setp.eq.s32 %p614, %r703, 1118925336; + add.s32 %r704, %r703, -1; + mov.b32 %f1841, %r704; + add.f32 %f1842, %f1840, 0f37000000; + selp.f32 %f322, %f1842, %f1840, %p614; + selp.f32 %f1843, %f1841, %f1837, %p614; + mul.rn.f32 %f1844, %f1843, %f1705; + cvt.rzi.f32.f32 %f1845, %f1844; + abs.f32 %f1846, %f1845; + setp.gt.f32 %p615, %f1846, 0f42FC0000; + mov.b32 %r705, %f1845; + and.b32 %r706, %r705, -2147483648; + or.b32 %r707, %r706, 1123811328; + mov.b32 %f1847, %r707; + selp.f32 %f1848, %f1847, %f1845, %p615; + fma.rn.f32 %f1849, %f1848, %f1711, %f1843; + fma.rn.f32 %f1850, %f1848, %f1713, %f1849; + mul.f32 %f1851, %f1850, 0f3FB8AA3B; + add.f32 %f1852, %f1848, 0f4B40007F; + mov.b32 %r708, %f1852; + shl.b32 %r709, %r708, 23; + mov.b32 %f1853, %r709; + ex2.approx.ftz.f32 %f1854, %f1851; + mul.f32 %f323, %f1854, %f1853; + setp.lt.f32 %p616, %f320, 0f00000000; + and.pred %p33, %p616, %p606; + add.f32 %f1855, %f320, %f320; + selp.f32 %f324, %f1855, 0f00000000, %p606; + add.f32 %f1856, %f321, 0f40000000; + mov.b32 %r99, %f1856; + div.rn.f32 %f325, %f301, %f3065; + abs.f32 %f326, %f325; + setp.lt.f32 %p617, %f326, 0f00800000; + mul.f32 %f1857, %f326, 0f4B800000; + selp.f32 %f1858, %f1857, %f326, %p617; + selp.f32 %f1859, 0fC3170000, 0fC2FE0000, %p617; + mov.b32 %r710, %f1858; + and.b32 %r711, %r710, 8388607; + or.b32 %r712, %r711, 1065353216; + mov.b32 %f1860, %r712; + shr.u32 %r713, %r710, 23; + cvt.rn.f32.u32 %f1861, %r713; + add.f32 %f1862, %f1859, %f1861; + setp.gt.f32 %p618, %f1860, 0f3FB504F3; + mul.f32 %f1863, %f1860, 0f3F000000; + add.f32 %f1864, %f1862, 0f3F800000; + selp.f32 %f1865, %f1864, %f1862, %p618; + selp.f32 %f1866, %f1863, %f1860, %p618; + add.f32 %f1867, %f1866, 0fBF800000; + add.f32 %f1868, %f1866, 0f3F800000; + rcp.approx.ftz.f32 %f1869, %f1868; + add.f32 %f1870, %f1867, %f1867; + mul.f32 %f1871, %f1870, %f1869; + mul.f32 %f1872, %f1871, %f1871; + fma.rn.f32 %f1873, %f1662, %f1872, %f1661; + fma.rn.f32 %f1874, %f1873, %f1872, %f1664; + mul.rn.f32 %f1875, %f1874, %f1872; + mul.rn.f32 %f1876, %f1875, %f1871; + sub.f32 %f1877, %f1867, %f1871; + add.f32 %f1878, %f1877, %f1877; + neg.f32 %f1879, %f1871; + fma.rn.f32 %f1880, %f1879, %f1867, %f1878; + mul.rn.f32 %f1881, %f1869, %f1880; + add.f32 %f1882, %f1876, %f1871; + sub.f32 %f1883, %f1871, %f1882; + add.f32 %f1884, %f1876, %f1883; + add.f32 %f1885, %f1881, %f1884; + add.f32 %f1886, %f1882, %f1885; + sub.f32 %f1887, %f1882, %f1886; + add.f32 %f1888, %f1885, %f1887; + mul.rn.f32 %f1889, %f1865, %f1680; + mul.rn.f32 %f1890, %f1865, %f1682; + add.f32 %f1891, %f1889, %f1886; + sub.f32 %f1892, %f1889, %f1891; + add.f32 %f1893, %f1886, %f1892; + add.f32 %f1894, %f1888, %f1893; + add.f32 %f1895, %f1890, %f1894; + add.f32 %f1896, %f1891, %f1895; + sub.f32 %f1897, %f1891, %f1896; + add.f32 %f1898, %f1895, %f1897; + mul.rn.f32 %f1899, %f1601, %f1896; + neg.f32 %f1900, %f1899; + fma.rn.f32 %f1901, %f1601, %f1896, %f1900; + fma.rn.f32 %f1902, %f1601, %f1898, %f1901; + fma.rn.f32 %f1903, %f1569, %f1896, %f1902; + add.rn.f32 %f1904, %f1899, %f1903; + neg.f32 %f1905, %f1904; + add.rn.f32 %f1906, %f1899, %f1905; + add.rn.f32 %f1907, %f1906, %f1903; + mov.b32 %r714, %f1904; + setp.eq.s32 %p619, %r714, 1118925336; + add.s32 %r715, %r714, -1; + mov.b32 %f1908, %r715; + add.f32 %f1909, %f1907, 0f37000000; + selp.f32 %f327, %f1909, %f1907, %p619; + selp.f32 %f1910, %f1908, %f1904, %p619; + mul.rn.f32 %f1911, %f1910, %f1705; + cvt.rzi.f32.f32 %f1912, %f1911; + abs.f32 %f1913, %f1912; + setp.gt.f32 %p620, %f1913, 0f42FC0000; + mov.b32 %r716, %f1912; + and.b32 %r717, %r716, -2147483648; + or.b32 %r718, %r717, 1123811328; + mov.b32 %f1914, %r718; + selp.f32 %f1915, %f1914, %f1912, %p620; + fma.rn.f32 %f1916, %f1915, %f1711, %f1910; + fma.rn.f32 %f1917, %f1915, %f1713, %f1916; + mul.f32 %f1918, %f1917, 0f3FB8AA3B; + add.f32 %f1919, %f1915, 0f4B40007F; + mov.b32 %r719, %f1919; + shl.b32 %r720, %r719, 23; + mov.b32 %f1920, %r720; + ex2.approx.ftz.f32 %f1921, %f1918; + mul.f32 %f328, %f1921, %f1920; + add.f32 %f329, %f320, 0f40000000; + setp.lt.f32 %p621, %f325, 0f00000000; + and.pred %p34, %p621, %p606; + selp.f32 %f330, 0fFF800000, 0f7F800000, %p33; + add.f32 %f1922, %f325, %f325; + selp.f32 %f331, %f1922, 0f00000000, %p606; + add.f32 %f1923, %f326, 0f40000000; + mov.b32 %r100, %f1923; + add.f32 %f332, %f301, 0f3F800000; + add.f32 %f333, %f325, 0f40000000; + selp.f32 %f334, 0fFF800000, 0f7F800000, %p34; + setp.geu.f32 %p35, %f306, 0f00000000; + setp.geu.f32 %p36, %f311, 0f00000000; + setp.geu.f32 %p37, %f320, 0f00000000; + setp.geu.f32 %p38, %f325, 0f00000000; + mov.u32 %r849, %r673; + +$L__BB7_376: + setp.ltu.f32 %p622, %f302, 0f3F8060FE; + mov.f32 %f3115, %f303; + @%p622 bra $L__BB7_378; + + ex2.approx.ftz.f32 %f1924, %f303; + sub.f32 %f1926, %f1598, %f1924; + mov.b32 %r721, %f1926; + or.b32 %r722, %r95, %r721; + mov.b32 %f3115, %r722; + +$L__BB7_378: + setp.ltu.f32 %p623, %f304, 0f3F8060FE; + mov.f32 %f3116, %f305; + @%p623 bra $L__BB7_380; + + ex2.approx.ftz.f32 %f1927, %f305; + sub.f32 %f1929, %f1598, %f1927; + mov.b32 %r723, %f1929; + or.b32 %r724, %r96, %r723; + mov.b32 %f3116, %r724; + +$L__BB7_380: + sub.f32 %f1930, %f3115, %f3116; + mul.f32 %f361, %f1930, 0f3F000000; + cvt.rn.f32.s32 %f362, %r849; + sub.f32 %f363, %f362, %f3068; + add.f32 %f1931, %f363, 0f3F000000; + mul.f32 %f364, %f1931, %f277; + abs.f32 %f1932, %f364; + setp.ltu.f32 %p624, %f1932, 0f3F8060FE; + setp.ge.f32 %p625, %f1932, 0f3F8060FE; + mul.f32 %f1933, %f364, %f364; + selp.f32 %f1934, %f1932, %f1933, %p625; + selp.f32 %f1935, 0f3789CA3C, 0f38B1E96A, %p625; + selp.f32 %f1936, 0fB9F560B9, 0fBA574D20, %p625; + fma.rn.f32 %f1937, %f1935, %f1934, %f1936; + selp.f32 %f1938, 0f3BAC840B, 0f3BAAD5EA, %p625; + fma.rn.f32 %f1939, %f1937, %f1934, %f1938; + selp.f32 %f1940, 0fBD0C8162, 0fBCDC1BE7, %p625; + fma.rn.f32 %f1941, %f1939, %f1934, %f1940; + selp.f32 %f1942, 0f3E1CF906, 0f3DE718AF, %p625; + fma.rn.f32 %f1943, %f1941, %f1934, %f1942; + selp.f32 %f1944, 0f3F6A937E, 0fBEC093AC, %p625; + fma.rn.f32 %f1945, %f1943, %f1934, %f1944; + selp.f32 %f1946, 0f3F20D842, 0f3E0375D3, %p625; + fma.rn.f32 %f1947, %f1945, %f1934, %f1946; + neg.f32 %f1948, %f1932; + selp.f32 %f1949, %f1948, %f364, %p625; + fma.rn.f32 %f3117, %f1947, %f1949, %f1949; + @%p624 bra $L__BB7_382; + + ex2.approx.ftz.f32 %f1950, %f3117; + sub.f32 %f1952, %f1598, %f1950; + mov.b32 %r725, %f1952; + mov.b32 %r726, %f364; + and.b32 %r727, %r726, -2147483648; + or.b32 %r728, %r727, %r725; + mov.b32 %f3117, %r728; + +$L__BB7_382: + add.f32 %f368, %f363, 0fBF000000; + mul.f32 %f369, %f368, %f277; + abs.f32 %f1953, %f369; + setp.ltu.f32 %p626, %f1953, 0f3F8060FE; + setp.ge.f32 %p627, %f1953, 0f3F8060FE; + mul.f32 %f1954, %f369, %f369; + selp.f32 %f1955, %f1953, %f1954, %p627; + selp.f32 %f1956, 0f3789CA3C, 0f38B1E96A, %p627; + selp.f32 %f1957, 0fB9F560B9, 0fBA574D20, %p627; + fma.rn.f32 %f1958, %f1956, %f1955, %f1957; + selp.f32 %f1959, 0f3BAC840B, 0f3BAAD5EA, %p627; + fma.rn.f32 %f1960, %f1958, %f1955, %f1959; + selp.f32 %f1961, 0fBD0C8162, 0fBCDC1BE7, %p627; + fma.rn.f32 %f1962, %f1960, %f1955, %f1961; + selp.f32 %f1963, 0f3E1CF906, 0f3DE718AF, %p627; + fma.rn.f32 %f1964, %f1962, %f1955, %f1963; + selp.f32 %f1965, 0f3F6A937E, 0fBEC093AC, %p627; + fma.rn.f32 %f1966, %f1964, %f1955, %f1965; + selp.f32 %f1967, 0f3F20D842, 0f3E0375D3, %p627; + fma.rn.f32 %f1968, %f1966, %f1955, %f1967; + neg.f32 %f1969, %f1953; + selp.f32 %f1970, %f1969, %f369, %p627; + fma.rn.f32 %f3118, %f1968, %f1970, %f1970; + @%p626 bra $L__BB7_384; + + ex2.approx.ftz.f32 %f1971, %f3118; + sub.f32 %f1973, %f1598, %f1971; + mov.b32 %r729, %f1973; + mov.b32 %r730, %f369; + and.b32 %r731, %r730, -2147483648; + or.b32 %r732, %r731, %r729; + mov.b32 %f3118, %r732; + +$L__BB7_384: + sub.f32 %f1975, %f3117, %f3118; + mul.f32 %f373, %f1975, 0f3F000000; + mul.f32 %f1976, %f361, %f3067; + fma.rn.f32 %f374, %f373, %f1976, %f3066; + mad.lo.s32 %r733, %r849, %r104, %r848; + add.s32 %r734, %r733, %r2; + mul.wide.s32 %rd32, %r734, 4; + add.s64 %rd33, %rd1, %rd32; + ld.global.f32 %f375, [%rd33]; + setp.eq.f32 %p628, %f309, 0f7F800000; + mov.f32 %f3119, 0f7F800000; + @%p628 bra $L__BB7_386; + + fma.rn.f32 %f3119, %f309, %f308, %f309; + +$L__BB7_386: + mov.b32 %r735, %f3119; + xor.b32 %r736, %r735, -2147483648; + mov.b32 %f1977, %r736; + selp.f32 %f378, %f1977, %f3119, %p31; + setp.eq.f32 %p629, %f306, 0f00000000; + selp.f32 %f3120, %f310, %f378, %p629; + @%p35 bra $L__BB7_389; + + cvt.rzi.f32.f32 %f1979, %f1601; + setp.eq.f32 %p630, %f1979, 0f40000000; + mov.f32 %f3120, %f378; + @%p630 bra $L__BB7_389; + + mov.f32 %f3120, 0f7FFFFFFF; + +$L__BB7_389: + setp.eq.f32 %p631, %f314, 0f7F800000; + mov.f32 %f3121, 0f7F800000; + @%p631 bra $L__BB7_391; + + fma.rn.f32 %f3121, %f314, %f313, %f314; + +$L__BB7_391: + mov.b32 %r737, %f3121; + xor.b32 %r738, %r737, -2147483648; + mov.b32 %f1982, %r738; + selp.f32 %f383, %f1982, %f3121, %p32; + setp.eq.f32 %p632, %f311, 0f00000000; + selp.f32 %f3122, %f317, %f383, %p632; + @%p36 bra $L__BB7_394; + + cvt.rzi.f32.f32 %f1984, %f1601; + setp.eq.f32 %p633, %f1984, 0f40000000; + mov.f32 %f3122, %f383; + @%p633 bra $L__BB7_394; + + mov.f32 %f3122, 0f7FFFFFFF; + +$L__BB7_394: + setp.gtu.f32 %p634, %f307, 0f7F800000; + mov.f32 %f3123, 0f7F800000; + selp.f32 %f1987, %f315, %f3120, %p634; + setp.neu.f32 %p635, %f307, 0f7F800000; + selp.f32 %f1988, %f1987, %f316, %p635; + setp.gt.s32 %p636, %r97, 2139095039; + selp.f32 %f1989, %f1988, %f3120, %p636; + mul.f32 %f1990, %f1989, 0fBF000000; + setp.eq.f32 %p637, %f306, 0f3F800000; + selp.f32 %f1991, 0fBF000000, %f1990, %p637; + mov.f32 %f1993, 0f3BBB989D; + fma.rn.f32 %f1994, %f1991, %f1993, %f1592; + mov.f32 %f1996, 0f437C0000; + cvt.sat.f32.f32 %f1997, %f1994; + mov.f32 %f1998, 0f4B400001; + fma.rm.f32 %f1999, %f1997, %f1996, %f1998; + setp.gtu.f32 %p638, %f312, 0f7F800000; + selp.f32 %f2000, %f318, %f3122, %p638; + setp.neu.f32 %p639, %f312, 0f7F800000; + selp.f32 %f2001, %f2000, %f319, %p639; + setp.gt.s32 %p640, %r98, 2139095039; + selp.f32 %f2002, %f2001, %f3122, %p640; + mul.f32 %f2003, %f2002, 0fBF000000; + setp.eq.f32 %p641, %f311, 0f3F800000; + selp.f32 %f2004, 0fBF000000, %f2003, %p641; + fma.rn.f32 %f2005, %f2004, %f1993, %f1592; + cvt.sat.f32.f32 %f2006, %f2005; + fma.rm.f32 %f2007, %f2006, %f1996, %f1998; + add.f32 %f2008, %f2007, 0fCB40007F; neg.f32 %f2009, %f2008; - fma.rn.f32 %f2010, %f1807, %f2004, %f2009; - fma.rn.f32 %f2011, %f1807, %f2006, %f2010; - mov.f32 %f2012, 0f00000000; - fma.rn.f32 %f2013, %f2012, %f2004, %f2011; - add.rn.f32 %f2014, %f2008, %f2013; - neg.f32 %f2015, %f2014; - add.rn.f32 %f2016, %f2008, %f2015; - add.rn.f32 %f2017, %f2016, %f2013; - mov.b32 %r231, %f2014; - setp.eq.s32 %p203, %r231, 1118925336; - add.s32 %r232, %r231, -1; - mov.b32 %f2018, %r232; - add.f32 %f2019, %f2017, 0f37000000; - selp.f32 %f2020, %f2018, %f2014, %p203; - selp.f32 %f437, %f2019, %f2017, %p203; - mul.f32 %f2021, %f2020, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2022, %f2021; - mov.f32 %f2023, 0fBF317200; - fma.rn.f32 %f2024, %f2022, %f2023, %f2020; - mov.f32 %f2025, 0fB5BFBE8E; - fma.rn.f32 %f2026, %f2022, %f2025, %f2024; - mul.f32 %f2027, %f2026, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2028, %f2027; - add.f32 %f2029, %f2022, 0f00000000; - ex2.approx.f32 %f2030, %f2029; - mul.f32 %f2031, %f2028, %f2030; - setp.lt.f32 %p204, %f2020, 0fC2D20000; - selp.f32 %f2032, 0f00000000, %f2031, %p204; - setp.gt.f32 %p205, %f2020, 0f42D20000; - selp.f32 %f3372, 0f7F800000, %f2032, %p205; - setp.eq.f32 %p206, %f3372, 0f7F800000; - @%p206 bra BB7_138; - - fma.rn.f32 %f3372, %f3372, %f437, %f3372; - -BB7_138: - setp.geu.f32 %p370, %f350, 0f00000000; - mov.b32 %r233, %f3372; - xor.b32 %r234, %r233, -2147483648; - mov.b32 %f2033, %r234; - selp.f32 %f441, %f2033, %f3372, %p7; - setp.eq.f32 %p207, %f350, 0f00000000; - selp.f32 %f3373, %f358, %f441, %p207; - @%p370 bra BB7_140; - - cvt.rzi.f32.f32 %f2035, %f1807; - setp.neu.f32 %p208, %f2035, 0f40000000; - selp.f32 %f3373, 0f7FFFFFFF, %f441, %p208; - -BB7_140: - abs.f32 %f3177, %f350; - add.f32 %f3176, %f3177, 0f40000000; - mov.b32 %r311, %f3176; - mov.f32 %f3175, 0f00000000; - mov.f32 %f3174, 0f3DAAAABD; - mov.f32 %f3173, 0f3C4CAF63; - mov.f32 %f3172, 0f3B18F0FE; - mov.f32 %f3171, 0fB5BFBE8E; - mov.f32 %f3170, 0fBF317200; - selp.f32 %f3169, 0fFF800000, 0f7F800000, %p7; - add.f32 %f3168, %f350, 0f40000000; - setp.gtu.f32 %p209, %f3177, 0f7F800000; - selp.f32 %f2038, %f3168, %f3373, %p209; - setp.neu.f32 %p210, %f3177, 0f7F800000; - selp.f32 %f2039, %f2038, %f3169, %p210; - setp.gt.s32 %p211, %r311, 2139095039; - selp.f32 %f2040, %f2039, %f3373, %p211; - mul.f32 %f2041, %f2040, 0fBF000000; - setp.eq.f32 %p212, %f350, 0f3F800000; - selp.f32 %f2042, 0fBF000000, %f2041, %p212; - mul.f32 %f2043, %f2042, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2044, %f2043; - fma.rn.f32 %f2046, %f2044, %f3170, %f2042; - fma.rn.f32 %f2048, %f2044, %f3171, %f2046; - mul.f32 %f2049, %f2048, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2050, %f2049; - add.f32 %f2051, %f2044, 0f00000000; - ex2.approx.f32 %f2052, %f2051; - mul.f32 %f2053, %f2050, %f2052; - setp.lt.f32 %p213, %f2042, 0fC2D20000; - selp.f32 %f2054, 0f00000000, %f2053, %p213; - setp.gt.f32 %p214, %f2042, 0f42D20000; - selp.f32 %f445, 0f7F800000, %f2054, %p214; - // inline asm - rcp.approx.ftz.f32 %f2036,%f364; - // inline asm - mul.f32 %f2055, %f2036, %f365; - mul.f32 %f2056, %f2055, %f2055; - fma.rn.f32 %f2059, %f3172, %f2056, %f3173; - fma.rn.f32 %f2061, %f2059, %f2056, %f3174; - mul.rn.f32 %f2062, %f2061, %f2056; - mul.rn.f32 %f2063, %f2062, %f2055; - sub.f32 %f2064, %f363, %f2055; - neg.f32 %f2065, %f2055; - add.f32 %f2066, %f2064, %f2064; - fma.rn.f32 %f2067, %f2065, %f363, %f2066; - mul.rn.f32 %f2068, %f2036, %f2067; - add.f32 %f2069, %f2063, %f2055; - sub.f32 %f2070, %f2055, %f2069; - add.f32 %f2071, %f2063, %f2070; - add.f32 %f2072, %f2068, %f2071; - add.f32 %f2073, %f2069, %f2072; - sub.f32 %f2074, %f2069, %f2073; - add.f32 %f2075, %f2072, %f2074; - add.f32 %f2076, %f366, %f2073; - sub.f32 %f2077, %f366, %f2076; - add.f32 %f2078, %f2073, %f2077; - add.f32 %f2079, %f2075, %f2078; - add.f32 %f2080, %f367, %f2079; - add.f32 %f2081, %f2076, %f2080; - sub.f32 %f2082, %f2076, %f2081; - add.f32 %f2083, %f2080, %f2082; - mul.rn.f32 %f2085, %f1807, %f2081; - neg.f32 %f2086, %f2085; - fma.rn.f32 %f2087, %f1807, %f2081, %f2086; - fma.rn.f32 %f2088, %f1807, %f2083, %f2087; - fma.rn.f32 %f2090, %f3175, %f2081, %f2088; - add.rn.f32 %f2091, %f2085, %f2090; - neg.f32 %f2092, %f2091; - add.rn.f32 %f2093, %f2085, %f2092; - add.rn.f32 %f2094, %f2093, %f2090; - mov.b32 %r235, %f2091; - setp.eq.s32 %p215, %r235, 1118925336; - add.s32 %r236, %r235, -1; - mov.b32 %f2095, %r236; - add.f32 %f2096, %f2094, 0f37000000; - selp.f32 %f2097, %f2095, %f2091, %p215; - selp.f32 %f446, %f2096, %f2094, %p215; - mul.f32 %f2098, %f2097, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2099, %f2098; - fma.rn.f32 %f2100, %f2099, %f3170, %f2097; - fma.rn.f32 %f2101, %f2099, %f3171, %f2100; - mul.f32 %f2102, %f2101, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2103, %f2102; - add.f32 %f2104, %f2099, 0f00000000; - ex2.approx.f32 %f2105, %f2104; - mul.f32 %f2106, %f2103, %f2105; - setp.lt.f32 %p216, %f2097, 0fC2D20000; - selp.f32 %f2107, 0f00000000, %f2106, %p216; - setp.gt.f32 %p217, %f2097, 0f42D20000; - selp.f32 %f3374, 0f7F800000, %f2107, %p217; - setp.eq.f32 %p218, %f3374, 0f7F800000; - @%p218 bra BB7_142; - - fma.rn.f32 %f3374, %f3374, %f446, %f3374; - -BB7_142: - setp.geu.f32 %p371, %f361, 0f00000000; - mov.b32 %r237, %f3374; - xor.b32 %r238, %r237, -2147483648; - mov.b32 %f2108, %r238; - selp.f32 %f450, %f2108, %f3374, %p8; - setp.eq.f32 %p219, %f361, 0f00000000; - selp.f32 %f3375, %f368, %f450, %p219; - @%p371 bra BB7_144; - - cvt.rzi.f32.f32 %f2110, %f1807; - setp.neu.f32 %p220, %f2110, 0f40000000; - selp.f32 %f3375, 0f7FFFFFFF, %f450, %p220; - -BB7_144: - abs.f32 %f3189, %f361; - add.f32 %f3188, %f3189, 0f40000000; - mov.b32 %r312, %f3188; - mov.f32 %f3187, 0f35BFBE8E; - mov.f32 %f3186, 0f3F317200; - selp.f32 %f3185, 0fFF800000, 0f7F800000, %p8; - add.f32 %f3184, %f361, 0f40000000; - mov.f32 %f3183, 0f00000000; - mov.f32 %f3182, 0f3DAAAABD; - mov.f32 %f3181, 0f3C4CAF63; - mov.f32 %f3180, 0f3B18F0FE; - mov.f32 %f3179, 0fB5BFBE8E; - mov.f32 %f3178, 0fBF317200; - setp.gtu.f32 %p221, %f3189, 0f7F800000; - selp.f32 %f2113, %f3184, %f3375, %p221; - setp.neu.f32 %p222, %f3189, 0f7F800000; - selp.f32 %f2114, %f2113, %f3185, %p222; - setp.gt.s32 %p223, %r312, 2139095039; - selp.f32 %f2115, %f2114, %f3375, %p223; - mul.f32 %f2116, %f2115, 0fBF000000; - setp.eq.f32 %p224, %f361, 0f3F800000; - selp.f32 %f2117, 0fBF000000, %f2116, %p224; - mul.f32 %f2118, %f2117, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2119, %f2118; - fma.rn.f32 %f2121, %f2119, %f3178, %f2117; - fma.rn.f32 %f2123, %f2119, %f3179, %f2121; - mul.f32 %f2124, %f2123, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2125, %f2124; - add.f32 %f2126, %f2119, 0f00000000; - ex2.approx.f32 %f2127, %f2126; - mul.f32 %f2128, %f2125, %f2127; - setp.lt.f32 %p225, %f2117, 0fC2D20000; - selp.f32 %f2129, 0f00000000, %f2128, %p225; - setp.gt.f32 %p226, %f2117, 0f42D20000; - selp.f32 %f2130, 0f7F800000, %f2129, %p226; - sub.f32 %f2131, %f445, %f2130; - mul.f32 %f2132, %f336, %f2131; - mul.f32 %f454, %f434, %f2132; - add.f32 %f2133, %f421, 0f3F800000; - sub.f32 %f2134, %f2133, %f3342; - div.rn.f32 %f455, %f2134, %f3338; - abs.f32 %f456, %f455; - setp.lt.f32 %p227, %f456, 0f00800000; - mul.f32 %f2135, %f456, 0f4B800000; - selp.f32 %f2136, 0fC3170000, 0fC2FE0000, %p227; - selp.f32 %f2137, %f2135, %f456, %p227; - mov.b32 %r239, %f2137; - and.b32 %r240, %r239, 8388607; - or.b32 %r241, %r240, 1065353216; - mov.b32 %f2138, %r241; - shr.u32 %r242, %r239, 23; - cvt.rn.f32.u32 %f2139, %r242; - add.f32 %f2140, %f2136, %f2139; - setp.gt.f32 %p228, %f2138, 0f3FB504F3; - mul.f32 %f2141, %f2138, 0f3F000000; - add.f32 %f2142, %f2140, 0f3F800000; - selp.f32 %f2143, %f2141, %f2138, %p228; - selp.f32 %f2144, %f2142, %f2140, %p228; - add.f32 %f457, %f2143, 0fBF800000; - add.f32 %f2112, %f2143, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2111,%f2112; - // inline asm - add.f32 %f459, %f457, %f457; - mul.f32 %f2145, %f2111, %f459; - mul.f32 %f2146, %f2145, %f2145; - fma.rn.f32 %f2149, %f3180, %f2146, %f3181; - fma.rn.f32 %f2151, %f2149, %f2146, %f3182; - mul.rn.f32 %f2152, %f2151, %f2146; - mul.rn.f32 %f2153, %f2152, %f2145; - sub.f32 %f2154, %f457, %f2145; - neg.f32 %f2155, %f2145; - add.f32 %f2156, %f2154, %f2154; - fma.rn.f32 %f2157, %f2155, %f457, %f2156; - mul.rn.f32 %f2158, %f2111, %f2157; - add.f32 %f2159, %f2153, %f2145; - sub.f32 %f2160, %f2145, %f2159; - add.f32 %f2161, %f2153, %f2160; - add.f32 %f2162, %f2158, %f2161; - add.f32 %f2163, %f2159, %f2162; - sub.f32 %f2164, %f2159, %f2163; - add.f32 %f2165, %f2162, %f2164; - mul.rn.f32 %f460, %f2144, %f3186; - mul.rn.f32 %f461, %f2144, %f3187; - add.f32 %f2168, %f460, %f2163; - sub.f32 %f2169, %f460, %f2168; - add.f32 %f2170, %f2163, %f2169; - add.f32 %f2171, %f2165, %f2170; - add.f32 %f2172, %f461, %f2171; - add.f32 %f2173, %f2168, %f2172; - sub.f32 %f2174, %f2168, %f2173; - add.f32 %f2175, %f2172, %f2174; - mul.rn.f32 %f2177, %f1807, %f2173; - neg.f32 %f2178, %f2177; - fma.rn.f32 %f2179, %f1807, %f2173, %f2178; - fma.rn.f32 %f2180, %f1807, %f2175, %f2179; - fma.rn.f32 %f2182, %f3183, %f2173, %f2180; - add.rn.f32 %f2183, %f2177, %f2182; - neg.f32 %f2184, %f2183; - add.rn.f32 %f2185, %f2177, %f2184; - add.rn.f32 %f2186, %f2185, %f2182; - mov.b32 %r243, %f2183; - setp.eq.s32 %p229, %r243, 1118925336; - add.s32 %r244, %r243, -1; - mov.b32 %f2187, %r244; - add.f32 %f2188, %f2186, 0f37000000; - selp.f32 %f2189, %f2187, %f2183, %p229; - selp.f32 %f462, %f2188, %f2186, %p229; - mul.f32 %f2190, %f2189, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2191, %f2190; - fma.rn.f32 %f2192, %f2191, %f3178, %f2189; - fma.rn.f32 %f2193, %f2191, %f3179, %f2192; - mul.f32 %f2194, %f2193, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2195, %f2194; - add.f32 %f2196, %f2191, 0f00000000; - ex2.approx.f32 %f2197, %f2196; - mul.f32 %f2198, %f2195, %f2197; - setp.lt.f32 %p230, %f2189, 0fC2D20000; - selp.f32 %f2199, 0f00000000, %f2198, %p230; - setp.gt.f32 %p231, %f2189, 0f42D20000; - selp.f32 %f3376, 0f7F800000, %f2199, %p231; - setp.eq.f32 %p232, %f3376, 0f7F800000; - @%p232 bra BB7_146; - - fma.rn.f32 %f3376, %f3376, %f462, %f3376; - -BB7_146: - setp.lt.f32 %p233, %f455, 0f00000000; - and.pred %p11, %p233, %p188; - mov.b32 %r245, %f3376; - xor.b32 %r246, %r245, -2147483648; - mov.b32 %f2200, %r246; - selp.f32 %f3378, %f2200, %f3376, %p11; - setp.eq.f32 %p235, %f455, 0f00000000; - @%p235 bra BB7_149; - bra.uni BB7_147; - -BB7_149: - add.f32 %f2203, %f455, %f455; - selp.f32 %f3378, %f2203, 0f00000000, %p188; - bra.uni BB7_150; - -BB7_147: - setp.geu.f32 %p236, %f455, 0f00000000; - @%p236 bra BB7_150; - - cvt.rzi.f32.f32 %f2202, %f1807; - setp.neu.f32 %p237, %f2202, 0f40000000; - selp.f32 %f3378, 0f7FFFFFFF, %f3378, %p237; - -BB7_150: - abs.f32 %f3190, %f455; - add.f32 %f2204, %f3190, 0f40000000; - mov.b32 %r47, %f2204; - setp.lt.s32 %p239, %r47, 2139095040; - @%p239 bra BB7_155; - - abs.f32 %f3201, %f455; - setp.gtu.f32 %p240, %f3201, 0f7F800000; - @%p240 bra BB7_154; - bra.uni BB7_152; - -BB7_154: - add.f32 %f3378, %f455, 0f40000000; - bra.uni BB7_155; - -BB7_152: - abs.f32 %f3202, %f455; - setp.neu.f32 %p241, %f3202, 0f7F800000; - @%p241 bra BB7_155; - - selp.f32 %f3378, 0fFF800000, 0f7F800000, %p11; - -BB7_155: - mov.f32 %f3200, 0f35BFBE8E; - mov.f32 %f3199, 0f3F317200; - mov.f32 %f3198, 0f00000000; - mov.f32 %f3197, 0f3DAAAABD; - mov.f32 %f3196, 0f3C4CAF63; - mov.f32 %f3195, 0f3B18F0FE; - mov.f32 %f3194, 0fB5BFBE8E; - mov.f32 %f3193, 0fBF317200; - cvt.rn.f32.s32 %f3192, %r331; - sub.f32 %f3191, %f3192, %f3342; - mul.f32 %f2207, %f3378, 0fBF000000; - setp.eq.f32 %p242, %f455, 0f3F800000; - selp.f32 %f2208, 0fBF000000, %f2207, %p242; - mul.f32 %f2209, %f2208, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2210, %f2209; - fma.rn.f32 %f2212, %f2210, %f3193, %f2208; - fma.rn.f32 %f2214, %f2210, %f3194, %f2212; - mul.f32 %f2215, %f2214, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2216, %f2215; - add.f32 %f2217, %f2210, 0f00000000; - ex2.approx.f32 %f2218, %f2217; - mul.f32 %f2219, %f2216, %f2218; - setp.lt.f32 %p243, %f2208, 0fC2D20000; - selp.f32 %f2220, 0f00000000, %f2219, %p243; - setp.gt.f32 %p244, %f2208, 0f42D20000; - selp.f32 %f473, 0f7F800000, %f2220, %p244; - div.rn.f32 %f474, %f3191, %f3338; - abs.f32 %f475, %f474; - setp.lt.f32 %p245, %f475, 0f00800000; - mul.f32 %f2221, %f475, 0f4B800000; - selp.f32 %f2222, 0fC3170000, 0fC2FE0000, %p245; - selp.f32 %f2223, %f2221, %f475, %p245; - mov.b32 %r247, %f2223; - and.b32 %r248, %r247, 8388607; - or.b32 %r249, %r248, 1065353216; - mov.b32 %f2224, %r249; - shr.u32 %r250, %r247, 23; - cvt.rn.f32.u32 %f2225, %r250; - add.f32 %f2226, %f2222, %f2225; - setp.gt.f32 %p246, %f2224, 0f3FB504F3; - mul.f32 %f2227, %f2224, 0f3F000000; - add.f32 %f2228, %f2226, 0f3F800000; - selp.f32 %f2229, %f2227, %f2224, %p246; - selp.f32 %f2230, %f2228, %f2226, %p246; - add.f32 %f476, %f2229, 0fBF800000; - add.f32 %f2206, %f2229, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f2205,%f2206; - // inline asm - add.f32 %f478, %f476, %f476; - mul.f32 %f2231, %f2205, %f478; - mul.f32 %f2232, %f2231, %f2231; - fma.rn.f32 %f2235, %f3195, %f2232, %f3196; - fma.rn.f32 %f2237, %f2235, %f2232, %f3197; - mul.rn.f32 %f2238, %f2237, %f2232; - mul.rn.f32 %f2239, %f2238, %f2231; - sub.f32 %f2240, %f476, %f2231; - neg.f32 %f2241, %f2231; - add.f32 %f2242, %f2240, %f2240; - fma.rn.f32 %f2243, %f2241, %f476, %f2242; - mul.rn.f32 %f2244, %f2205, %f2243; - add.f32 %f2245, %f2239, %f2231; - sub.f32 %f2246, %f2231, %f2245; - add.f32 %f2247, %f2239, %f2246; - add.f32 %f2248, %f2244, %f2247; - add.f32 %f2249, %f2245, %f2248; - sub.f32 %f2250, %f2245, %f2249; - add.f32 %f2251, %f2248, %f2250; - mul.rn.f32 %f479, %f2230, %f3199; - mul.rn.f32 %f480, %f2230, %f3200; - add.f32 %f2254, %f479, %f2249; - sub.f32 %f2255, %f479, %f2254; - add.f32 %f2256, %f2249, %f2255; - add.f32 %f2257, %f2251, %f2256; - add.f32 %f2258, %f480, %f2257; - add.f32 %f2259, %f2254, %f2258; - sub.f32 %f2260, %f2254, %f2259; - add.f32 %f2261, %f2258, %f2260; - mul.rn.f32 %f2263, %f1807, %f2259; - neg.f32 %f2264, %f2263; - fma.rn.f32 %f2265, %f1807, %f2259, %f2264; - fma.rn.f32 %f2266, %f1807, %f2261, %f2265; - fma.rn.f32 %f2268, %f3198, %f2259, %f2266; - add.rn.f32 %f2269, %f2263, %f2268; - neg.f32 %f2270, %f2269; - add.rn.f32 %f2271, %f2263, %f2270; - add.rn.f32 %f2272, %f2271, %f2268; - mov.b32 %r251, %f2269; - setp.eq.s32 %p247, %r251, 1118925336; - add.s32 %r252, %r251, -1; - mov.b32 %f2273, %r252; - add.f32 %f2274, %f2272, 0f37000000; - selp.f32 %f2275, %f2273, %f2269, %p247; - selp.f32 %f481, %f2274, %f2272, %p247; - mul.f32 %f2276, %f2275, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2277, %f2276; - fma.rn.f32 %f2278, %f2277, %f3193, %f2275; - fma.rn.f32 %f2279, %f2277, %f3194, %f2278; - mul.f32 %f2280, %f2279, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2281, %f2280; - add.f32 %f2282, %f2277, 0f00000000; - ex2.approx.f32 %f2283, %f2282; - mul.f32 %f2284, %f2281, %f2283; - setp.lt.f32 %p248, %f2275, 0fC2D20000; - selp.f32 %f2285, 0f00000000, %f2284, %p248; - setp.gt.f32 %p249, %f2275, 0f42D20000; - selp.f32 %f3379, 0f7F800000, %f2285, %p249; - setp.eq.f32 %p250, %f3379, 0f7F800000; - @%p250 bra BB7_157; - - fma.rn.f32 %f3379, %f3379, %f481, %f3379; - -BB7_157: - setp.lt.f32 %p251, %f474, 0f00000000; - and.pred %p12, %p251, %p188; - mov.b32 %r253, %f3379; - xor.b32 %r254, %r253, -2147483648; - mov.b32 %f2286, %r254; - selp.f32 %f3381, %f2286, %f3379, %p12; - setp.eq.f32 %p253, %f474, 0f00000000; - @%p253 bra BB7_160; - bra.uni BB7_158; - -BB7_160: - add.f32 %f2289, %f474, %f474; - selp.f32 %f3381, %f2289, 0f00000000, %p188; - bra.uni BB7_161; - -BB7_158: - setp.geu.f32 %p254, %f474, 0f00000000; - @%p254 bra BB7_161; - - cvt.rzi.f32.f32 %f2288, %f1807; - setp.neu.f32 %p255, %f2288, 0f40000000; - selp.f32 %f3381, 0f7FFFFFFF, %f3381, %p255; - -BB7_161: - abs.f32 %f3115, %f474; - add.f32 %f2290, %f3115, 0f40000000; - mov.b32 %r48, %f2290; - setp.lt.s32 %p257, %r48, 2139095040; - @%p257 bra BB7_166; - - abs.f32 %f3216, %f474; - setp.gtu.f32 %p258, %f3216, 0f7F800000; - @%p258 bra BB7_165; - bra.uni BB7_163; - -BB7_165: - add.f32 %f3381, %f474, 0f40000000; - bra.uni BB7_166; - -BB7_163: - abs.f32 %f3217, %f474; - setp.neu.f32 %p259, %f3217, 0f7F800000; - @%p259 bra BB7_166; - - selp.f32 %f3381, 0fFF800000, 0f7F800000, %p12; - -BB7_166: - mov.f32 %f3121, 0f00000000; - mov.f32 %f3120, 0f3DAAAABD; - mov.f32 %f3119, 0f3C4CAF63; - mov.f32 %f3118, 0f3B18F0FE; - mov.f32 %f3117, 0fB5BFBE8E; - mov.f32 %f3116, 0fBF317200; - mul.f32 %f2293, %f3381, 0fBF000000; - setp.eq.f32 %p260, %f474, 0f3F800000; - selp.f32 %f2294, 0fBF000000, %f2293, %p260; - mul.f32 %f2295, %f2294, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2296, %f2295; - fma.rn.f32 %f2298, %f2296, %f3116, %f2294; - fma.rn.f32 %f2300, %f2296, %f3117, %f2298; - mul.f32 %f2301, %f2300, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2302, %f2301; - add.f32 %f2303, %f2296, 0f00000000; - ex2.approx.f32 %f2304, %f2303; - mul.f32 %f2305, %f2302, %f2304; - setp.lt.f32 %p261, %f2294, 0fC2D20000; - selp.f32 %f2306, 0f00000000, %f2305, %p261; - setp.gt.f32 %p262, %f2294, 0f42D20000; - selp.f32 %f2307, 0f7F800000, %f2306, %p262; - sub.f32 %f2308, %f473, %f2307; - mul.f32 %f2309, %f337, %f2308; - mul.f32 %f492, %f420, %f2309; - // inline asm - rcp.approx.ftz.f32 %f2291,%f354; - // inline asm - mul.f32 %f2310, %f2291, %f355; - mul.f32 %f2311, %f2310, %f2310; - fma.rn.f32 %f2314, %f3118, %f2311, %f3119; - fma.rn.f32 %f2316, %f2314, %f2311, %f3120; - mul.rn.f32 %f2317, %f2316, %f2311; - mul.rn.f32 %f2318, %f2317, %f2310; - sub.f32 %f2319, %f353, %f2310; - neg.f32 %f2320, %f2310; - add.f32 %f2321, %f2319, %f2319; - fma.rn.f32 %f2322, %f2320, %f353, %f2321; - mul.rn.f32 %f2323, %f2291, %f2322; - add.f32 %f2324, %f2318, %f2310; - sub.f32 %f2325, %f2310, %f2324; - add.f32 %f2326, %f2318, %f2325; - add.f32 %f2327, %f2323, %f2326; - add.f32 %f2328, %f2324, %f2327; - sub.f32 %f2329, %f2324, %f2328; - add.f32 %f2330, %f2327, %f2329; - add.f32 %f2331, %f356, %f2328; - sub.f32 %f2332, %f356, %f2331; - add.f32 %f2333, %f2328, %f2332; - add.f32 %f2334, %f2330, %f2333; - add.f32 %f2335, %f357, %f2334; - add.f32 %f2336, %f2331, %f2335; - sub.f32 %f2337, %f2331, %f2336; - add.f32 %f2338, %f2335, %f2337; - mul.rn.f32 %f2340, %f1807, %f2336; - neg.f32 %f2341, %f2340; - fma.rn.f32 %f2342, %f1807, %f2336, %f2341; - fma.rn.f32 %f2343, %f1807, %f2338, %f2342; - fma.rn.f32 %f2345, %f3121, %f2336, %f2343; - add.rn.f32 %f2346, %f2340, %f2345; - neg.f32 %f2347, %f2346; - add.rn.f32 %f2348, %f2340, %f2347; - add.rn.f32 %f2349, %f2348, %f2345; - mov.b32 %r255, %f2346; - setp.eq.s32 %p263, %r255, 1118925336; - add.s32 %r256, %r255, -1; - mov.b32 %f2350, %r256; - add.f32 %f2351, %f2349, 0f37000000; - selp.f32 %f2352, %f2350, %f2346, %p263; - selp.f32 %f493, %f2351, %f2349, %p263; - mul.f32 %f2353, %f2352, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2354, %f2353; - fma.rn.f32 %f2355, %f2354, %f3116, %f2352; - fma.rn.f32 %f2356, %f2354, %f3117, %f2355; - mul.f32 %f2357, %f2356, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2358, %f2357; - add.f32 %f2359, %f2354, 0f00000000; - ex2.approx.f32 %f2360, %f2359; - mul.f32 %f2361, %f2358, %f2360; - setp.lt.f32 %p264, %f2352, 0fC2D20000; - selp.f32 %f2362, 0f00000000, %f2361, %p264; - setp.gt.f32 %p265, %f2352, 0f42D20000; - selp.f32 %f3382, 0f7F800000, %f2362, %p265; - setp.eq.f32 %p266, %f3382, 0f7F800000; - @%p266 bra BB7_168; - - fma.rn.f32 %f3382, %f3382, %f493, %f3382; - -BB7_168: - setp.eq.f32 %p356, %f350, 0f00000000; - setp.geu.f32 %p355, %f350, 0f00000000; - mov.b32 %r257, %f3382; - xor.b32 %r258, %r257, -2147483648; - mov.b32 %f2363, %r258; - selp.f32 %f497, %f2363, %f3382, %p7; - selp.f32 %f3383, %f358, %f497, %p356; - @%p355 bra BB7_170; - - cvt.rzi.f32.f32 %f2365, %f1807; - setp.neu.f32 %p268, %f2365, 0f40000000; - selp.f32 %f3383, 0f7FFFFFFF, %f497, %p268; - -BB7_170: - abs.f32 %f3131, %f350; - setp.eq.f32 %p360, %f350, 0f3F800000; - add.f32 %f3130, %f3131, 0f40000000; - mov.b32 %r302, %f3130; - setp.gt.s32 %p359, %r302, 2139095039; - setp.neu.f32 %p358, %f3131, 0f7F800000; - setp.gtu.f32 %p357, %f3131, 0f7F800000; - mov.f32 %f3129, 0f00000000; - mov.f32 %f3128, 0f3DAAAABD; - mov.f32 %f3127, 0f3C4CAF63; - mov.f32 %f3126, 0f3B18F0FE; - mov.f32 %f3125, 0fB5BFBE8E; - mov.f32 %f3124, 0fBF317200; - selp.f32 %f3123, 0fFF800000, 0f7F800000, %p7; - add.f32 %f3122, %f350, 0f40000000; - selp.f32 %f2368, %f3122, %f3383, %p357; - selp.f32 %f2369, %f2368, %f3123, %p358; - selp.f32 %f2370, %f2369, %f3383, %p359; - mul.f32 %f2371, %f2370, 0fBF000000; - selp.f32 %f2372, 0fBF000000, %f2371, %p360; - mul.f32 %f2373, %f2372, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2374, %f2373; - fma.rn.f32 %f2376, %f2374, %f3124, %f2372; - fma.rn.f32 %f2378, %f2374, %f3125, %f2376; - mul.f32 %f2379, %f2378, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2380, %f2379; - add.f32 %f2381, %f2374, 0f00000000; - ex2.approx.f32 %f2382, %f2381; - mul.f32 %f2383, %f2380, %f2382; - setp.lt.f32 %p273, %f2372, 0fC2D20000; - selp.f32 %f2384, 0f00000000, %f2383, %p273; - setp.gt.f32 %p274, %f2372, 0f42D20000; - selp.f32 %f501, 0f7F800000, %f2384, %p274; - // inline asm - rcp.approx.ftz.f32 %f2366,%f364; - // inline asm - mul.f32 %f2385, %f2366, %f365; - mul.f32 %f2386, %f2385, %f2385; - fma.rn.f32 %f2389, %f3126, %f2386, %f3127; - fma.rn.f32 %f2391, %f2389, %f2386, %f3128; - mul.rn.f32 %f2392, %f2391, %f2386; - mul.rn.f32 %f2393, %f2392, %f2385; - sub.f32 %f2394, %f363, %f2385; - neg.f32 %f2395, %f2385; - add.f32 %f2396, %f2394, %f2394; - fma.rn.f32 %f2397, %f2395, %f363, %f2396; - mul.rn.f32 %f2398, %f2366, %f2397; - add.f32 %f2399, %f2393, %f2385; - sub.f32 %f2400, %f2385, %f2399; - add.f32 %f2401, %f2393, %f2400; - add.f32 %f2402, %f2398, %f2401; - add.f32 %f2403, %f2399, %f2402; - sub.f32 %f2404, %f2399, %f2403; - add.f32 %f2405, %f2402, %f2404; - add.f32 %f2406, %f366, %f2403; - sub.f32 %f2407, %f366, %f2406; - add.f32 %f2408, %f2403, %f2407; - add.f32 %f2409, %f2405, %f2408; - add.f32 %f2410, %f367, %f2409; - add.f32 %f2411, %f2406, %f2410; - sub.f32 %f2412, %f2406, %f2411; - add.f32 %f2413, %f2410, %f2412; - mul.rn.f32 %f2415, %f1807, %f2411; - neg.f32 %f2416, %f2415; - fma.rn.f32 %f2417, %f1807, %f2411, %f2416; - fma.rn.f32 %f2418, %f1807, %f2413, %f2417; - fma.rn.f32 %f2420, %f3129, %f2411, %f2418; - add.rn.f32 %f2421, %f2415, %f2420; - neg.f32 %f2422, %f2421; - add.rn.f32 %f2423, %f2415, %f2422; - add.rn.f32 %f2424, %f2423, %f2420; - mov.b32 %r259, %f2421; - setp.eq.s32 %p275, %r259, 1118925336; - add.s32 %r260, %r259, -1; - mov.b32 %f2425, %r260; - add.f32 %f2426, %f2424, 0f37000000; - selp.f32 %f2427, %f2425, %f2421, %p275; - selp.f32 %f502, %f2426, %f2424, %p275; - mul.f32 %f2428, %f2427, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2429, %f2428; - fma.rn.f32 %f2430, %f2429, %f3124, %f2427; - fma.rn.f32 %f2431, %f2429, %f3125, %f2430; - mul.f32 %f2432, %f2431, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2433, %f2432; - add.f32 %f2434, %f2429, 0f00000000; - ex2.approx.f32 %f2435, %f2434; - mul.f32 %f2436, %f2433, %f2435; - setp.lt.f32 %p276, %f2427, 0fC2D20000; - selp.f32 %f2437, 0f00000000, %f2436, %p276; - setp.gt.f32 %p277, %f2427, 0f42D20000; - selp.f32 %f3384, 0f7F800000, %f2437, %p277; - setp.eq.f32 %p278, %f3384, 0f7F800000; - @%p278 bra BB7_172; - - fma.rn.f32 %f3384, %f3384, %f502, %f3384; - -BB7_172: - setp.eq.f32 %p362, %f361, 0f00000000; - setp.geu.f32 %p361, %f361, 0f00000000; - mov.b32 %r261, %f3384; - xor.b32 %r262, %r261, -2147483648; - mov.b32 %f2438, %r262; - selp.f32 %f506, %f2438, %f3384, %p8; - selp.f32 %f3385, %f368, %f506, %p362; - @%p361 bra BB7_174; - - cvt.rzi.f32.f32 %f2440, %f1807; - setp.neu.f32 %p280, %f2440, 0f40000000; - selp.f32 %f3385, 0f7FFFFFFF, %f506, %p280; - -BB7_174: - abs.f32 %f3144, %f361; - cvt.rn.f32.s32 %f3143, %r330; - sub.f32 %f3142, %f3143, %f3343; - add.f32 %f3141, %f3142, 0f3F800000; - setp.eq.f32 %p366, %f361, 0f3F800000; - add.f32 %f3140, %f3144, 0f40000000; - mov.b32 %r303, %f3140; - setp.gt.s32 %p365, %r303, 2139095039; - setp.neu.f32 %p364, %f3144, 0f7F800000; - setp.gtu.f32 %p363, %f3144, 0f7F800000; - selp.f32 %f3139, 0fFF800000, 0f7F800000, %p8; - add.f32 %f3138, %f361, 0f40000000; - mov.f32 %f3137, 0f00000000; - mov.f32 %f3136, 0f3DAAAABD; - mov.f32 %f3135, 0f3C4CAF63; - mov.f32 %f3134, 0f3B18F0FE; - mov.f32 %f3133, 0fB5BFBE8E; - mov.f32 %f3132, 0fBF317200; - selp.f32 %f2443, %f3138, %f3385, %p363; - selp.f32 %f2444, %f2443, %f3139, %p364; - selp.f32 %f2445, %f2444, %f3385, %p365; - mul.f32 %f2446, %f2445, 0fBF000000; - selp.f32 %f2447, 0fBF000000, %f2446, %p366; - mul.f32 %f2448, %f2447, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2449, %f2448; - fma.rn.f32 %f2451, %f2449, %f3132, %f2447; - fma.rn.f32 %f2453, %f2449, %f3133, %f2451; - mul.f32 %f2454, %f2453, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2455, %f2454; - add.f32 %f2456, %f2449, 0f00000000; - ex2.approx.f32 %f2457, %f2456; - mul.f32 %f2458, %f2455, %f2457; - setp.lt.f32 %p285, %f2447, 0fC2D20000; - selp.f32 %f2459, 0f00000000, %f2458, %p285; - setp.gt.f32 %p286, %f2447, 0f42D20000; - selp.f32 %f2460, 0f7F800000, %f2459, %p286; - mul.f32 %f2461, %f3142, %f2460; - mul.f32 %f2462, %f3141, %f501; - sub.f32 %f2463, %f2462, %f2461; - mul.f32 %f2464, %f338, %f2463; - mul.f32 %f510, %f434, %f2464; - // inline asm - rcp.approx.ftz.f32 %f2441,%f2112; - // inline asm - mul.f32 %f2465, %f2441, %f459; - mul.f32 %f2466, %f2465, %f2465; - fma.rn.f32 %f2469, %f3134, %f2466, %f3135; - fma.rn.f32 %f2471, %f2469, %f2466, %f3136; - mul.rn.f32 %f2472, %f2471, %f2466; - mul.rn.f32 %f2473, %f2472, %f2465; - sub.f32 %f2474, %f457, %f2465; - neg.f32 %f2475, %f2465; - add.f32 %f2476, %f2474, %f2474; - fma.rn.f32 %f2477, %f2475, %f457, %f2476; - mul.rn.f32 %f2478, %f2441, %f2477; - add.f32 %f2479, %f2473, %f2465; - sub.f32 %f2480, %f2465, %f2479; - add.f32 %f2481, %f2473, %f2480; - add.f32 %f2482, %f2478, %f2481; - add.f32 %f2483, %f2479, %f2482; - sub.f32 %f2484, %f2479, %f2483; - add.f32 %f2485, %f2482, %f2484; - add.f32 %f2486, %f460, %f2483; - sub.f32 %f2487, %f460, %f2486; - add.f32 %f2488, %f2483, %f2487; - add.f32 %f2489, %f2485, %f2488; - add.f32 %f2490, %f461, %f2489; - add.f32 %f2491, %f2486, %f2490; - sub.f32 %f2492, %f2486, %f2491; - add.f32 %f2493, %f2490, %f2492; - mul.rn.f32 %f2495, %f1807, %f2491; - neg.f32 %f2496, %f2495; - fma.rn.f32 %f2497, %f1807, %f2491, %f2496; - fma.rn.f32 %f2498, %f1807, %f2493, %f2497; - fma.rn.f32 %f2500, %f3137, %f2491, %f2498; - add.rn.f32 %f2501, %f2495, %f2500; - neg.f32 %f2502, %f2501; - add.rn.f32 %f2503, %f2495, %f2502; - add.rn.f32 %f2504, %f2503, %f2500; - mov.b32 %r263, %f2501; - setp.eq.s32 %p287, %r263, 1118925336; - add.s32 %r264, %r263, -1; - mov.b32 %f2505, %r264; - add.f32 %f2506, %f2504, 0f37000000; - selp.f32 %f2507, %f2505, %f2501, %p287; - selp.f32 %f511, %f2506, %f2504, %p287; - mul.f32 %f2508, %f2507, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2509, %f2508; - fma.rn.f32 %f2510, %f2509, %f3132, %f2507; - fma.rn.f32 %f2511, %f2509, %f3133, %f2510; - mul.f32 %f2512, %f2511, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2513, %f2512; - add.f32 %f2514, %f2509, 0f00000000; - ex2.approx.f32 %f2515, %f2514; - mul.f32 %f2516, %f2513, %f2515; - setp.lt.f32 %p288, %f2507, 0fC2D20000; - selp.f32 %f2517, 0f00000000, %f2516, %p288; - setp.gt.f32 %p289, %f2507, 0f42D20000; - selp.f32 %f3386, 0f7F800000, %f2517, %p289; - setp.eq.f32 %p290, %f3386, 0f7F800000; - @%p290 bra BB7_176; - - fma.rn.f32 %f3386, %f3386, %f511, %f3386; - -BB7_176: - setp.eq.f32 %p367, %f455, 0f00000000; - mov.b32 %r265, %f3386; - xor.b32 %r266, %r265, -2147483648; - mov.b32 %f2518, %r266; - selp.f32 %f3388, %f2518, %f3386, %p11; - @%p367 bra BB7_179; - bra.uni BB7_177; - -BB7_179: - add.f32 %f2521, %f455, %f455; - selp.f32 %f3388, %f2521, 0f00000000, %p188; - bra.uni BB7_180; - -BB7_177: - setp.geu.f32 %p292, %f455, 0f00000000; - @%p292 bra BB7_180; - - cvt.rzi.f32.f32 %f2520, %f1807; - setp.neu.f32 %p293, %f2520, 0f40000000; - selp.f32 %f3388, 0f7FFFFFFF, %f3388, %p293; - -BB7_180: - abs.f32 %f3146, %f455; - add.f32 %f3145, %f3146, 0f40000000; - mov.b32 %r304, %f3145; - setp.lt.s32 %p368, %r304, 2139095040; - @%p368 bra BB7_185; - - abs.f32 %f3214, %f455; - setp.gtu.f32 %p296, %f3214, 0f7F800000; - @%p296 bra BB7_184; - bra.uni BB7_182; - -BB7_184: - add.f32 %f3388, %f455, 0f40000000; - bra.uni BB7_185; - -BB7_182: - abs.f32 %f3215, %f455; - setp.neu.f32 %p297, %f3215, 0f7F800000; - @%p297 bra BB7_185; - - selp.f32 %f3388, 0fFF800000, 0f7F800000, %p11; - -BB7_185: - setp.eq.f32 %p369, %f455, 0f3F800000; - mov.f32 %f3152, 0f00000000; - mov.f32 %f3151, 0f3DAAAABD; - mov.f32 %f3150, 0f3C4CAF63; - mov.f32 %f3149, 0f3B18F0FE; - mov.f32 %f3148, 0fB5BFBE8E; - mov.f32 %f3147, 0fBF317200; - mul.f32 %f2524, %f3388, 0fBF000000; - selp.f32 %f2525, 0fBF000000, %f2524, %p369; - mul.f32 %f2526, %f2525, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2527, %f2526; - fma.rn.f32 %f2529, %f2527, %f3147, %f2525; - fma.rn.f32 %f2531, %f2527, %f3148, %f2529; - mul.f32 %f2532, %f2531, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2533, %f2532; - add.f32 %f2534, %f2527, 0f00000000; - ex2.approx.f32 %f2535, %f2534; - mul.f32 %f2536, %f2533, %f2535; - setp.lt.f32 %p299, %f2525, 0fC2D20000; - selp.f32 %f2537, 0f00000000, %f2536, %p299; - setp.gt.f32 %p300, %f2525, 0f42D20000; - selp.f32 %f522, 0f7F800000, %f2537, %p300; - // inline asm - rcp.approx.ftz.f32 %f2522,%f2206; - // inline asm - mul.f32 %f2538, %f2522, %f478; - mul.f32 %f2539, %f2538, %f2538; - fma.rn.f32 %f2542, %f3149, %f2539, %f3150; - fma.rn.f32 %f2544, %f2542, %f2539, %f3151; - mul.rn.f32 %f2545, %f2544, %f2539; - mul.rn.f32 %f2546, %f2545, %f2538; - sub.f32 %f2547, %f476, %f2538; - neg.f32 %f2548, %f2538; - add.f32 %f2549, %f2547, %f2547; - fma.rn.f32 %f2550, %f2548, %f476, %f2549; - mul.rn.f32 %f2551, %f2522, %f2550; - add.f32 %f2552, %f2546, %f2538; - sub.f32 %f2553, %f2538, %f2552; - add.f32 %f2554, %f2546, %f2553; - add.f32 %f2555, %f2551, %f2554; - add.f32 %f2556, %f2552, %f2555; - sub.f32 %f2557, %f2552, %f2556; - add.f32 %f2558, %f2555, %f2557; - add.f32 %f2559, %f479, %f2556; - sub.f32 %f2560, %f479, %f2559; - add.f32 %f2561, %f2556, %f2560; - add.f32 %f2562, %f2558, %f2561; - add.f32 %f2563, %f480, %f2562; - add.f32 %f2564, %f2559, %f2563; - sub.f32 %f2565, %f2559, %f2564; - add.f32 %f2566, %f2563, %f2565; - mul.rn.f32 %f2568, %f1807, %f2564; - neg.f32 %f2569, %f2568; - fma.rn.f32 %f2570, %f1807, %f2564, %f2569; - fma.rn.f32 %f2571, %f1807, %f2566, %f2570; - fma.rn.f32 %f2573, %f3152, %f2564, %f2571; - add.rn.f32 %f2574, %f2568, %f2573; - neg.f32 %f2575, %f2574; - add.rn.f32 %f2576, %f2568, %f2575; - add.rn.f32 %f2577, %f2576, %f2573; - mov.b32 %r267, %f2574; - setp.eq.s32 %p301, %r267, 1118925336; - add.s32 %r268, %r267, -1; - mov.b32 %f2578, %r268; - add.f32 %f2579, %f2577, 0f37000000; - selp.f32 %f2580, %f2578, %f2574, %p301; - selp.f32 %f523, %f2579, %f2577, %p301; - mul.f32 %f2581, %f2580, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2582, %f2581; - fma.rn.f32 %f2583, %f2582, %f3147, %f2580; - fma.rn.f32 %f2584, %f2582, %f3148, %f2583; - mul.f32 %f2585, %f2584, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2586, %f2585; - add.f32 %f2587, %f2582, 0f00000000; - ex2.approx.f32 %f2588, %f2587; - mul.f32 %f2589, %f2586, %f2588; - setp.lt.f32 %p302, %f2580, 0fC2D20000; - selp.f32 %f2590, 0f00000000, %f2589, %p302; - setp.gt.f32 %p303, %f2580, 0f42D20000; - selp.f32 %f3389, 0f7F800000, %f2590, %p303; - setp.eq.f32 %p304, %f3389, 0f7F800000; - @%p304 bra BB7_187; - - fma.rn.f32 %f3389, %f3389, %f523, %f3389; - -BB7_187: - setp.eq.f32 %p372, %f474, 0f00000000; - mov.b32 %r269, %f3389; - xor.b32 %r270, %r269, -2147483648; - mov.b32 %f2591, %r270; - selp.f32 %f3391, %f2591, %f3389, %p12; - @%p372 bra BB7_190; - bra.uni BB7_188; - -BB7_190: - add.f32 %f2594, %f474, %f474; - selp.f32 %f3391, %f2594, 0f00000000, %p188; - bra.uni BB7_191; - -BB7_188: - setp.geu.f32 %p306, %f474, 0f00000000; - @%p306 bra BB7_191; - - cvt.rzi.f32.f32 %f2593, %f1807; - setp.neu.f32 %p307, %f2593, 0f40000000; - selp.f32 %f3391, 0f7FFFFFFF, %f3391, %p307; - -BB7_191: - abs.f32 %f3219, %f474; - add.f32 %f3218, %f3219, 0f40000000; - mov.b32 %r313, %f3218; - setp.lt.s32 %p373, %r313, 2139095040; - @%p373 bra BB7_196; - - abs.f32 %f3212, %f474; - setp.gtu.f32 %p310, %f3212, 0f7F800000; - @%p310 bra BB7_195; - bra.uni BB7_193; - -BB7_195: - add.f32 %f3391, %f474, 0f40000000; - bra.uni BB7_196; - -BB7_193: - abs.f32 %f3213, %f474; - setp.neu.f32 %p311, %f3213, 0f7F800000; - @%p311 bra BB7_196; - - selp.f32 %f3391, 0fFF800000, 0f7F800000, %p12; - -BB7_196: - setp.eq.f32 %p374, %f474, 0f3F800000; - cvt.rn.f32.s32 %f3157, %r331; - sub.f32 %f3156, %f3157, %f3342; - add.f32 %f3155, %f3156, 0f3F800000; - mov.f32 %f3154, 0fB5BFBE8E; - mov.f32 %f3153, 0fBF317200; - mul.f32 %f2595, %f3391, 0fBF000000; - selp.f32 %f2596, 0fBF000000, %f2595, %p374; - mul.f32 %f2597, %f2596, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f2598, %f2597; - fma.rn.f32 %f2600, %f2598, %f3153, %f2596; - fma.rn.f32 %f2602, %f2598, %f3154, %f2600; - mul.f32 %f2603, %f2602, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f2604, %f2603; - add.f32 %f2605, %f2598, 0f00000000; - ex2.approx.f32 %f2606, %f2605; - mul.f32 %f2607, %f2604, %f2606; - setp.lt.f32 %p313, %f2596, 0fC2D20000; - selp.f32 %f2608, 0f00000000, %f2607, %p313; - setp.gt.f32 %p314, %f2596, 0f42D20000; - selp.f32 %f2609, 0f7F800000, %f2608, %p314; - mul.f32 %f2610, %f3156, %f2609; - mul.f32 %f2611, %f3155, %f522; - sub.f32 %f2612, %f2611, %f2610; - mul.f32 %f2613, %f339, %f2612; - mul.f32 %f2614, %f420, %f2613; - mul.f32 %f2615, %f454, %f454; - div.rn.f32 %f2616, %f2615, %f435; - add.f32 %f3366, %f2616, %f3366; - mul.f32 %f2617, %f492, %f454; - div.rn.f32 %f2618, %f2617, %f435; - add.f32 %f3365, %f2618, %f3365; - mul.f32 %f2619, %f420, %f434; - mul.f32 %f2620, %f2619, %f454; - div.rn.f32 %f2621, %f2620, %f435; - add.f32 %f3364, %f2621, %f3364; - div.rn.f32 %f2622, %f454, %f435; - add.f32 %f3363, %f2622, %f3363; - mul.f32 %f2623, %f510, %f454; - div.rn.f32 %f2624, %f2623, %f435; - add.f32 %f3362, %f2624, %f3362; - mul.f32 %f2625, %f2614, %f454; - div.rn.f32 %f2626, %f2625, %f435; - add.f32 %f3361, %f2626, %f3361; - mul.f32 %f2627, %f492, %f492; - div.rn.f32 %f2628, %f2627, %f435; - add.f32 %f3360, %f2628, %f3360; - mul.f32 %f2629, %f2619, %f492; - div.rn.f32 %f2630, %f2629, %f435; - add.f32 %f3359, %f2630, %f3359; - div.rn.f32 %f2631, %f492, %f435; - add.f32 %f3358, %f2631, %f3358; - mul.f32 %f2632, %f510, %f492; - div.rn.f32 %f2633, %f2632, %f435; - add.f32 %f3357, %f2633, %f3357; - mul.f32 %f2634, %f2614, %f492; - div.rn.f32 %f2635, %f2634, %f435; - add.f32 %f3356, %f2635, %f3356; - mul.f32 %f2636, %f2619, %f2619; - div.rn.f32 %f2637, %f2636, %f435; - add.f32 %f3355, %f2637, %f3355; - div.rn.f32 %f2638, %f2619, %f435; - add.f32 %f3354, %f2638, %f3354; - mul.f32 %f2639, %f510, %f2619; - div.rn.f32 %f2640, %f2639, %f435; - add.f32 %f3353, %f2640, %f3353; - mul.f32 %f2641, %f2614, %f2619; - div.rn.f32 %f2642, %f2641, %f435; - add.f32 %f3352, %f2642, %f3352; - rcp.rn.f32 %f2643, %f435; - add.f32 %f3351, %f2643, %f3351; - div.rn.f32 %f2644, %f510, %f435; - add.f32 %f3350, %f2644, %f3350; - div.rn.f32 %f2645, %f2614, %f435; - add.f32 %f3349, %f2645, %f3349; - mul.f32 %f2646, %f510, %f510; - div.rn.f32 %f2647, %f2646, %f435; - add.f32 %f3348, %f2647, %f3348; - mul.f32 %f2648, %f2614, %f510; - div.rn.f32 %f2649, %f2648, %f435; - add.f32 %f3347, %f2649, %f3347; - mul.f32 %f2650, %f2614, %f2614; - div.rn.f32 %f2651, %f2650, %f435; - add.f32 %f3346, %f2651, %f3346; - add.f32 %f555, %f3337, %f435; - setp.leu.f32 %p315, %f555, 0f00000000; - @%p315 bra BB7_205; - - add.f32 %f556, %f3337, %f436; - setp.gt.f32 %p316, %f556, 0f00000000; - @%p316 bra BB7_199; - bra.uni BB7_198; - -BB7_199: - setp.lt.f32 %p317, %f555, 0f00800000; - mul.f32 %f2653, %f555, 0f4B000000; - selp.f32 %f558, %f2653, %f555, %p317; - selp.f32 %f2654, 0fC1B80000, 0f00000000, %p317; - mov.b32 %r271, %f558; - add.s32 %r272, %r271, -1059760811; - and.b32 %r273, %r272, -8388608; - sub.s32 %r274, %r271, %r273; - mov.b32 %f2655, %r274; - cvt.rn.f32.s32 %f2656, %r273; - mov.f32 %f2657, 0f34000000; - fma.rn.f32 %f2658, %f2656, %f2657, %f2654; - add.f32 %f2659, %f2655, 0fBF800000; - mov.f32 %f2660, 0f3E1039F6; - mov.f32 %f2661, 0fBE055027; - fma.rn.f32 %f2662, %f2661, %f2659, %f2660; - mov.f32 %f2663, 0fBDF8CDCC; - fma.rn.f32 %f2664, %f2662, %f2659, %f2663; - mov.f32 %f2665, 0f3E0F2955; - fma.rn.f32 %f2666, %f2664, %f2659, %f2665; - mov.f32 %f2667, 0fBE2AD8B9; - fma.rn.f32 %f2668, %f2666, %f2659, %f2667; - mov.f32 %f2669, 0f3E4CED0B; - fma.rn.f32 %f2670, %f2668, %f2659, %f2669; - mov.f32 %f2671, 0fBE7FFF22; - fma.rn.f32 %f2672, %f2670, %f2659, %f2671; - mov.f32 %f2673, 0f3EAAAA78; - fma.rn.f32 %f2674, %f2672, %f2659, %f2673; - mov.f32 %f2675, 0fBF000000; - fma.rn.f32 %f2676, %f2674, %f2659, %f2675; - mul.f32 %f2677, %f2659, %f2676; - fma.rn.f32 %f2678, %f2677, %f2659, %f2659; - mov.f32 %f2679, 0f3F317218; - fma.rn.f32 %f3392, %f2658, %f2679, %f2678; - setp.lt.u32 %p318, %r271, 2139095040; - @%p318 bra BB7_201; - - mov.f32 %f2680, 0f7F800000; - fma.rn.f32 %f3392, %f558, %f2680, %f2680; - -BB7_201: - setp.eq.f32 %p319, %f558, 0f00000000; - selp.f32 %f2681, 0fFF800000, %f3392, %p319; - mul.f32 %f2682, %f556, %f2681; - sub.f32 %f562, %f2682, %f435; - mul.f32 %f2683, %f556, 0f4B000000; - setp.lt.f32 %p320, %f556, 0f00800000; - selp.f32 %f563, %f2683, %f556, %p320; - selp.f32 %f2684, 0fC1B80000, 0f00000000, %p320; - mov.b32 %r275, %f563; - add.s32 %r276, %r275, -1059760811; - and.b32 %r277, %r276, -8388608; - sub.s32 %r278, %r275, %r277; - mov.b32 %f2685, %r278; - cvt.rn.f32.s32 %f2686, %r277; - fma.rn.f32 %f2688, %f2686, %f2657, %f2684; - add.f32 %f2689, %f2685, 0fBF800000; - fma.rn.f32 %f2692, %f2661, %f2689, %f2660; - fma.rn.f32 %f2694, %f2692, %f2689, %f2663; - fma.rn.f32 %f2696, %f2694, %f2689, %f2665; - fma.rn.f32 %f2698, %f2696, %f2689, %f2667; - fma.rn.f32 %f2700, %f2698, %f2689, %f2669; - fma.rn.f32 %f2702, %f2700, %f2689, %f2671; - fma.rn.f32 %f2704, %f2702, %f2689, %f2673; - fma.rn.f32 %f2706, %f2704, %f2689, %f2675; - mul.f32 %f2707, %f2689, %f2706; - fma.rn.f32 %f2708, %f2707, %f2689, %f2689; - fma.rn.f32 %f3393, %f2688, %f2679, %f2708; - setp.lt.u32 %p321, %r275, 2139095040; - @%p321 bra BB7_203; - - mov.f32 %f2710, 0f7F800000; - fma.rn.f32 %f3393, %f563, %f2710, %f2710; - -BB7_203: - setp.eq.f32 %p322, %f563, 0f00000000; - selp.f32 %f2711, 0fFF800000, %f3393, %p322; - mul.f32 %f2712, %f556, %f2711; - sub.f32 %f2713, %f562, %f2712; - add.f32 %f3394, %f436, %f2713; - bra.uni BB7_204; - -BB7_198: - neg.f32 %f2652, %f435; - sub.f32 %f3394, %f2652, %f3337; - -BB7_204: - add.f32 %f3367, %f3367, %f3394; - -BB7_205: - add.s32 %r331, %r331, 1; - setp.lt.s32 %p323, %r331, %r71; - @%p323 bra BB7_124; - - st.local.v4.f32 [%rd2], {%f3366, %f3365, %f3364, %f3363}; - st.local.v4.f32 [%rd2+16], {%f3362, %f3361, %f3365, %f3360}; - st.local.v4.f32 [%rd2+32], {%f3359, %f3358, %f3357, %f3356}; - st.local.v4.f32 [%rd2+48], {%f3364, %f3359, %f3355, %f3354}; - st.local.v4.f32 [%rd2+64], {%f3353, %f3352, %f3363, %f3358}; - st.local.v4.f32 [%rd2+96], {%f3362, %f3357, %f3353, %f3350}; - st.local.v4.f32 [%rd2+80], {%f3354, %f3351, %f3350, %f3349}; - st.local.v4.f32 [%rd2+112], {%f3348, %f3347, %f3361, %f3356}; - st.local.v4.f32 [%rd2+128], {%f3352, %f3349, %f3347, %f3346}; - add.s32 %r330, %r330, 1; - setp.lt.s32 %p324, %r330, %r71; - @%p324 bra BB7_123; - -BB7_207: - mov.f32 %f3397, 0f00000000; - ld.local.v4.f32 {%f2715, %f2716, %f2717, %f2718}, [%rd2]; - rcp.rn.f32 %f572, %f2715; - mul.f32 %f573, %f572, %f2716; - st.local.f32 [%rd2+4], %f573; - mul.f32 %f574, %f572, %f2717; - mul.f32 %f575, %f572, %f2718; - st.local.v2.f32 [%rd2+8], {%f574, %f575}; - ld.local.v4.f32 {%f2723, %f2724, %f2725, %f2726}, [%rd2+16]; - mul.f32 %f576, %f572, %f2723; - mul.f32 %f577, %f572, %f2724; - st.local.v2.f32 [%rd2+16], {%f576, %f577}; - ld.local.f32 %f2731, [%rd2+4]; - fma.rn.f32 %f2732, %f2731, %f2725, 0f00000000; - sub.f32 %f2733, %f2726, %f2732; - ld.local.f32 %f578, [%rd2+24]; - st.local.f32 [%rd2+28], %f2733; - fma.rn.f32 %f2734, %f574, %f578, 0f00000000; - rcp.rn.f32 %f579, %f2733; - ld.local.v4.f32 {%f2735, %f2736, %f2737, %f2738}, [%rd2+32]; - sub.f32 %f2743, %f2735, %f2734; - mul.f32 %f580, %f579, %f2743; - fma.rn.f32 %f2744, %f575, %f578, 0f00000000; - sub.f32 %f2745, %f2736, %f2744; - mul.f32 %f581, %f579, %f2745; - fma.rn.f32 %f2746, %f576, %f578, 0f00000000; - sub.f32 %f2747, %f2737, %f2746; - mul.f32 %f582, %f579, %f2747; - fma.rn.f32 %f2748, %f577, %f578, 0f00000000; - sub.f32 %f2749, %f2738, %f2748; - mul.f32 %f583, %f579, %f2749; - st.local.v4.f32 [%rd2+32], {%f580, %f581, %f582, %f583}; - ld.local.v2.f32 {%f2750, %f2751}, [%rd2+48]; - ld.local.f32 %f2754, [%rd2+4]; - fma.rn.f32 %f2755, %f2754, %f2750, 0f00000000; - sub.f32 %f584, %f2751, %f2755; - st.local.f32 [%rd2+52], %f584; - add.s64 %rd137, %rd2, 48; - add.s64 %rd136, %rd2, 8; - mov.u32 %r332, -1; - -BB7_208: - ld.local.f32 %f2756, [%rd137]; - ld.local.f32 %f2757, [%rd136]; - fma.rn.f32 %f3397, %f2757, %f2756, %f3397; - add.s64 %rd137, %rd137, 4; - add.s64 %rd136, %rd136, 24; - add.s32 %r332, %r332, 1; - setp.lt.s32 %p325, %r332, 1; - @%p325 bra BB7_208; - - add.s64 %rd14, %rd2, 4; - ld.local.v4.f32 {%f2759, %f2760, %f2761, %f2762}, [%rd2+48]; - fma.rn.f32 %f2763, %f575, %f2759, 0f00000000; - fma.rn.f32 %f2764, %f581, %f584, %f2763; - sub.f32 %f2766, %f2761, %f3397; - rcp.rn.f32 %f588, %f2766; - sub.f32 %f2768, %f2762, %f2764; - mul.f32 %f589, %f588, %f2768; - ld.local.f32 %f2769, [%rd2+4]; - st.local.v2.f32 [%rd2+56], {%f2766, %f589}; - fma.rn.f32 %f2770, %f576, %f2759, 0f00000000; - fma.rn.f32 %f2771, %f582, %f584, %f2770; - ld.local.v4.f32 {%f2772, %f2773, %f2774, %f2775}, [%rd2+64]; - sub.f32 %f2780, %f2772, %f2771; - mul.f32 %f590, %f588, %f2780; - fma.rn.f32 %f2781, %f577, %f2759, 0f00000000; - fma.rn.f32 %f2782, %f583, %f584, %f2781; - sub.f32 %f2783, %f2773, %f2782; - mul.f32 %f591, %f588, %f2783; - st.local.v2.f32 [%rd2+64], {%f590, %f591}; - fma.rn.f32 %f2784, %f2769, %f2774, 0f00000000; - sub.f32 %f592, %f2775, %f2784; - st.local.f32 [%rd2+76], %f592; - add.s64 %rd139, %rd2, 72; - add.s64 %rd138, %rd2, 8; - mov.f32 %f3398, 0f00000000; - mov.u32 %r333, -1; - -BB7_210: - ld.local.f32 %f2785, [%rd139]; - ld.local.f32 %f2786, [%rd138]; - fma.rn.f32 %f3398, %f2786, %f2785, %f3398; - add.s64 %rd139, %rd139, 4; - add.s64 %rd138, %rd138, 24; - add.s32 %r333, %r333, 1; - setp.lt.s32 %p326, %r333, 1; - @%p326 bra BB7_210; - - ld.local.f32 %f2788, [%rd2+80]; - sub.f32 %f595, %f2788, %f3398; - st.local.f32 [%rd2+80], %f595; - add.s64 %rd141, %rd2, 72; - add.s64 %rd140, %rd2, 12; - mov.f32 %f3399, 0f00000000; - mov.u32 %r334, -1; - -BB7_212: - ld.local.f32 %f2789, [%rd141]; - ld.local.f32 %f2790, [%rd140]; - fma.rn.f32 %f3399, %f2790, %f2789, %f3399; - add.s64 %rd141, %rd141, 4; - add.s64 %rd140, %rd140, 24; - add.s32 %r334, %r334, 1; - setp.lt.s32 %p327, %r334, 2; - @%p327 bra BB7_212; - - ld.local.v2.f32 {%f2792, %f2793}, [%rd14+76]; - sub.f32 %f2795, %f2793, %f3399; - st.local.f32 [%rd14+80], %f2795; - ld.local.f32 %f598, [%rd14+68]; - fma.rn.f32 %f2796, %f576, %f598, 0f00000000; - fma.rn.f32 %f2797, %f582, %f592, %f2796; - fma.rn.f32 %f2798, %f590, %f595, %f2797; - rcp.rn.f32 %f599, %f2795; - ld.local.v2.f32 {%f2799, %f2800}, [%rd14+84]; - sub.f32 %f2803, %f2799, %f2798; - mul.f32 %f600, %f599, %f2803; - fma.rn.f32 %f2804, %f577, %f598, 0f00000000; - fma.rn.f32 %f2805, %f583, %f592, %f2804; - fma.rn.f32 %f2806, %f591, %f595, %f2805; + fma.rn.f32 %f2010, %f2004, %f1705, %f2009; + mov.f32 %f2011, 0f32A57060; + fma.rn.f32 %f2012, %f2004, %f2011, %f2010; + mov.b32 %r739, %f2007; + shl.b32 %r740, %r739, 23; + mov.b32 %f2013, %r740; + ex2.approx.ftz.f32 %f2014, %f2012; + mul.f32 %f2015, %f2014, %f2013; + mov.b32 %r741, %f1999; + shl.b32 %r742, %r741, 23; + mov.b32 %f2016, %r742; + add.f32 %f2017, %f1999, 0fCB40007F; + neg.f32 %f2018, %f2017; + fma.rn.f32 %f2019, %f1991, %f1705, %f2018; + fma.rn.f32 %f2020, %f1991, %f2011, %f2019; + ex2.approx.ftz.f32 %f2021, %f2020; + mul.f32 %f2022, %f2021, %f2016; + sub.f32 %f2023, %f2022, %f2015; + mul.f32 %f2024, %f272, %f2023; + mul.f32 %f386, %f373, %f2024; + add.f32 %f2025, %f362, 0f3F000000; + sub.f32 %f2026, %f2025, %f3068; + div.rn.f32 %f387, %f2026, %f3064; + abs.f32 %f388, %f387; + setp.lt.f32 %p642, %f388, 0f00800000; + mul.f32 %f2027, %f388, 0f4B800000; + selp.f32 %f2028, %f2027, %f388, %p642; + selp.f32 %f2029, 0fC3170000, 0fC2FE0000, %p642; + mov.b32 %r743, %f2028; + and.b32 %r744, %r743, 8388607; + or.b32 %r745, %r744, 1065353216; + mov.b32 %f2030, %r745; + shr.u32 %r746, %r743, 23; + cvt.rn.f32.u32 %f2031, %r746; + add.f32 %f2032, %f2029, %f2031; + setp.gt.f32 %p643, %f2030, 0f3FB504F3; + mul.f32 %f2033, %f2030, 0f3F000000; + add.f32 %f2034, %f2032, 0f3F800000; + selp.f32 %f2035, %f2034, %f2032, %p643; + selp.f32 %f2036, %f2033, %f2030, %p643; + add.f32 %f2037, %f2036, 0fBF800000; + add.f32 %f2038, %f2036, 0f3F800000; + rcp.approx.ftz.f32 %f2039, %f2038; + add.f32 %f2040, %f2037, %f2037; + mul.f32 %f2042, %f2040, %f2039; + mul.f32 %f2043, %f2042, %f2042; + fma.rn.f32 %f2046, %f1662, %f2043, %f1661; + fma.rn.f32 %f2048, %f2046, %f2043, %f1664; + mul.rn.f32 %f2049, %f2048, %f2043; + mul.rn.f32 %f2050, %f2049, %f2042; + sub.f32 %f2051, %f2037, %f2042; + add.f32 %f2052, %f2051, %f2051; + neg.f32 %f2053, %f2042; + fma.rn.f32 %f2054, %f2053, %f2037, %f2052; + mul.rn.f32 %f2055, %f2039, %f2054; + add.f32 %f2056, %f2050, %f2042; + sub.f32 %f2057, %f2042, %f2056; + add.f32 %f2058, %f2050, %f2057; + add.f32 %f2059, %f2055, %f2058; + add.f32 %f2060, %f2056, %f2059; + sub.f32 %f2061, %f2056, %f2060; + add.f32 %f2062, %f2059, %f2061; + mul.rn.f32 %f2064, %f2035, %f1680; + mul.rn.f32 %f2066, %f2035, %f1682; + add.f32 %f2067, %f2064, %f2060; + sub.f32 %f2068, %f2064, %f2067; + add.f32 %f2069, %f2060, %f2068; + add.f32 %f2070, %f2062, %f2069; + add.f32 %f2071, %f2066, %f2070; + add.f32 %f2072, %f2067, %f2071; + sub.f32 %f2073, %f2067, %f2072; + add.f32 %f2074, %f2071, %f2073; + mul.rn.f32 %f2075, %f1601, %f2072; + neg.f32 %f2076, %f2075; + fma.rn.f32 %f2077, %f1601, %f2072, %f2076; + fma.rn.f32 %f2078, %f1601, %f2074, %f2077; + mov.f32 %f2079, 0f00000000; + fma.rn.f32 %f2080, %f2079, %f2072, %f2078; + add.rn.f32 %f2081, %f2075, %f2080; + neg.f32 %f2082, %f2081; + add.rn.f32 %f2083, %f2075, %f2082; + add.rn.f32 %f2084, %f2083, %f2080; + mov.b32 %r747, %f2081; + setp.eq.s32 %p644, %r747, 1118925336; + add.s32 %r748, %r747, -1; + mov.b32 %f2085, %r748; + add.f32 %f2086, %f2084, 0f37000000; + selp.f32 %f389, %f2086, %f2084, %p644; + selp.f32 %f2087, %f2085, %f2081, %p644; + mul.rn.f32 %f2088, %f2087, %f1705; + cvt.rzi.f32.f32 %f2089, %f2088; + abs.f32 %f2090, %f2089; + setp.gt.f32 %p645, %f2090, 0f42FC0000; + mov.b32 %r749, %f2089; + and.b32 %r750, %r749, -2147483648; + or.b32 %r751, %r750, 1123811328; + mov.b32 %f2091, %r751; + selp.f32 %f2092, %f2091, %f2089, %p645; + fma.rn.f32 %f2094, %f2092, %f1711, %f2087; + fma.rn.f32 %f2096, %f2092, %f1713, %f2094; + mul.f32 %f2097, %f2096, 0f3FB8AA3B; + add.f32 %f2098, %f2092, 0f4B40007F; + mov.b32 %r752, %f2098; + shl.b32 %r753, %r752, 23; + mov.b32 %f2099, %r753; + ex2.approx.ftz.f32 %f2100, %f2097; + mul.f32 %f390, %f2100, %f2099; + setp.eq.f32 %p646, %f390, 0f7F800000; + @%p646 bra $L__BB7_396; + + fma.rn.f32 %f3123, %f390, %f389, %f390; + +$L__BB7_396: + setp.lt.f32 %p647, %f387, 0f00000000; + and.pred %p39, %p647, %p606; + setp.eq.f32 %p649, %f387, 0f00000000; + @%p649 bra $L__BB7_400; + bra.uni $L__BB7_397; + +$L__BB7_400: + add.f32 %f2105, %f387, %f387; + selp.f32 %f3125, %f2105, 0f00000000, %p606; + bra.uni $L__BB7_401; + +$L__BB7_397: + mov.b32 %r754, %f3123; + xor.b32 %r755, %r754, -2147483648; + mov.b32 %f2101, %r755; + selp.f32 %f3125, %f2101, %f3123, %p39; + setp.geu.f32 %p650, %f387, 0f00000000; + @%p650 bra $L__BB7_401; + + cvt.rzi.f32.f32 %f2103, %f1601; + setp.eq.f32 %p651, %f2103, 0f40000000; + @%p651 bra $L__BB7_401; + + mov.f32 %f3125, 0f7FFFFFFF; + +$L__BB7_401: + add.f32 %f2106, %f388, 0f40000000; + mov.b32 %r756, %f2106; + setp.lt.s32 %p653, %r756, 2139095040; + @%p653 bra $L__BB7_406; + + setp.gtu.f32 %p654, %f388, 0f7F800000; + @%p654 bra $L__BB7_405; + bra.uni $L__BB7_403; + +$L__BB7_405: + add.f32 %f3125, %f387, 0f40000000; + bra.uni $L__BB7_406; + +$L__BB7_403: + setp.neu.f32 %p655, %f388, 0f7F800000; + @%p655 bra $L__BB7_406; + + selp.f32 %f3125, 0fFF800000, 0f7F800000, %p39; + +$L__BB7_406: + mul.f32 %f2108, %f3125, 0fBF000000; + setp.eq.f32 %p656, %f387, 0f3F800000; + selp.f32 %f2109, 0fBF000000, %f2108, %p656; + fma.rn.f32 %f2112, %f2109, %f1993, %f1592; + cvt.sat.f32.f32 %f2115, %f2112; + fma.rm.f32 %f2117, %f2115, %f1996, %f1998; + add.f32 %f2118, %f2117, 0fCB40007F; + neg.f32 %f2119, %f2118; + fma.rn.f32 %f2120, %f2109, %f1705, %f2119; + fma.rn.f32 %f2122, %f2109, %f2011, %f2120; + mov.b32 %r757, %f2117; + shl.b32 %r758, %r757, 23; + mov.b32 %f2123, %r758; + ex2.approx.ftz.f32 %f2124, %f2122; + mul.f32 %f399, %f2124, %f2123; + div.rn.f32 %f400, %f368, %f3064; + abs.f32 %f401, %f400; + setp.lt.f32 %p657, %f401, 0f00800000; + mul.f32 %f2125, %f401, 0f4B800000; + selp.f32 %f2126, %f2125, %f401, %p657; + selp.f32 %f2127, 0fC3170000, 0fC2FE0000, %p657; + mov.b32 %r759, %f2126; + and.b32 %r760, %r759, 8388607; + or.b32 %r761, %r760, 1065353216; + mov.b32 %f2128, %r761; + shr.u32 %r762, %r759, 23; + cvt.rn.f32.u32 %f2129, %r762; + add.f32 %f2130, %f2127, %f2129; + setp.gt.f32 %p658, %f2128, 0f3FB504F3; + mul.f32 %f2131, %f2128, 0f3F000000; + add.f32 %f2132, %f2130, 0f3F800000; + selp.f32 %f2133, %f2132, %f2130, %p658; + selp.f32 %f2134, %f2131, %f2128, %p658; + add.f32 %f2135, %f2134, 0fBF800000; + add.f32 %f2136, %f2134, 0f3F800000; + rcp.approx.ftz.f32 %f2137, %f2136; + add.f32 %f2138, %f2135, %f2135; + mul.f32 %f2140, %f2138, %f2137; + mul.f32 %f2141, %f2140, %f2140; + fma.rn.f32 %f2144, %f1662, %f2141, %f1661; + fma.rn.f32 %f2146, %f2144, %f2141, %f1664; + mul.rn.f32 %f2147, %f2146, %f2141; + mul.rn.f32 %f2148, %f2147, %f2140; + sub.f32 %f2149, %f2135, %f2140; + add.f32 %f2150, %f2149, %f2149; + neg.f32 %f2151, %f2140; + fma.rn.f32 %f2152, %f2151, %f2135, %f2150; + mul.rn.f32 %f2153, %f2137, %f2152; + add.f32 %f2154, %f2148, %f2140; + sub.f32 %f2155, %f2140, %f2154; + add.f32 %f2156, %f2148, %f2155; + add.f32 %f2157, %f2153, %f2156; + add.f32 %f2158, %f2154, %f2157; + sub.f32 %f2159, %f2154, %f2158; + add.f32 %f2160, %f2157, %f2159; + mul.rn.f32 %f2162, %f2133, %f1680; + mul.rn.f32 %f2164, %f2133, %f1682; + add.f32 %f2165, %f2162, %f2158; + sub.f32 %f2166, %f2162, %f2165; + add.f32 %f2167, %f2158, %f2166; + add.f32 %f2168, %f2160, %f2167; + add.f32 %f2169, %f2164, %f2168; + add.f32 %f2170, %f2165, %f2169; + sub.f32 %f2171, %f2165, %f2170; + add.f32 %f2172, %f2169, %f2171; + mul.rn.f32 %f2173, %f1601, %f2170; + neg.f32 %f2174, %f2173; + fma.rn.f32 %f2175, %f1601, %f2170, %f2174; + fma.rn.f32 %f2176, %f1601, %f2172, %f2175; + fma.rn.f32 %f2178, %f2079, %f2170, %f2176; + add.rn.f32 %f2179, %f2173, %f2178; + neg.f32 %f2180, %f2179; + add.rn.f32 %f2181, %f2173, %f2180; + add.rn.f32 %f2182, %f2181, %f2178; + mov.b32 %r763, %f2179; + setp.eq.s32 %p659, %r763, 1118925336; + add.s32 %r764, %r763, -1; + mov.b32 %f2183, %r764; + add.f32 %f2184, %f2182, 0f37000000; + selp.f32 %f402, %f2184, %f2182, %p659; + selp.f32 %f2185, %f2183, %f2179, %p659; + mul.rn.f32 %f2186, %f2185, %f1705; + cvt.rzi.f32.f32 %f2187, %f2186; + abs.f32 %f2188, %f2187; + setp.gt.f32 %p660, %f2188, 0f42FC0000; + mov.b32 %r765, %f2187; + and.b32 %r766, %r765, -2147483648; + or.b32 %r767, %r766, 1123811328; + mov.b32 %f2189, %r767; + selp.f32 %f2190, %f2189, %f2187, %p660; + fma.rn.f32 %f2192, %f2190, %f1711, %f2185; + fma.rn.f32 %f2194, %f2190, %f1713, %f2192; + mul.f32 %f2195, %f2194, 0f3FB8AA3B; + add.f32 %f2196, %f2190, 0f4B40007F; + mov.b32 %r768, %f2196; + shl.b32 %r769, %r768, 23; + mov.b32 %f2197, %r769; + ex2.approx.ftz.f32 %f2198, %f2195; + mul.f32 %f403, %f2198, %f2197; + setp.eq.f32 %p661, %f403, 0f7F800000; + mov.f32 %f3126, 0f7F800000; + @%p661 bra $L__BB7_408; + + fma.rn.f32 %f3126, %f403, %f402, %f403; + +$L__BB7_408: + setp.lt.f32 %p662, %f400, 0f00000000; + and.pred %p40, %p662, %p606; + setp.eq.f32 %p664, %f400, 0f00000000; + @%p664 bra $L__BB7_412; + bra.uni $L__BB7_409; + +$L__BB7_412: + add.f32 %f2203, %f400, %f400; + selp.f32 %f3128, %f2203, 0f00000000, %p606; + bra.uni $L__BB7_413; + +$L__BB7_409: + mov.b32 %r770, %f3126; + xor.b32 %r771, %r770, -2147483648; + mov.b32 %f2199, %r771; + selp.f32 %f3128, %f2199, %f3126, %p40; + setp.geu.f32 %p665, %f400, 0f00000000; + @%p665 bra $L__BB7_413; + + cvt.rzi.f32.f32 %f2201, %f1601; + setp.eq.f32 %p666, %f2201, 0f40000000; + @%p666 bra $L__BB7_413; + + mov.f32 %f3128, 0f7FFFFFFF; + +$L__BB7_413: + add.f32 %f2204, %f401, 0f40000000; + mov.b32 %r772, %f2204; + setp.lt.s32 %p668, %r772, 2139095040; + @%p668 bra $L__BB7_418; + + setp.gtu.f32 %p669, %f401, 0f7F800000; + @%p669 bra $L__BB7_417; + bra.uni $L__BB7_415; + +$L__BB7_417: + add.f32 %f3128, %f400, 0f40000000; + bra.uni $L__BB7_418; + +$L__BB7_415: + setp.neu.f32 %p670, %f401, 0f7F800000; + @%p670 bra $L__BB7_418; + + selp.f32 %f3128, 0fFF800000, 0f7F800000, %p40; + +$L__BB7_418: + mul.f32 %f2206, %f3128, 0fBF000000; + setp.eq.f32 %p671, %f400, 0f3F800000; + selp.f32 %f2207, 0fBF000000, %f2206, %p671; + fma.rn.f32 %f2210, %f2207, %f1993, %f1592; + cvt.sat.f32.f32 %f2213, %f2210; + fma.rm.f32 %f2215, %f2213, %f1996, %f1998; + add.f32 %f2216, %f2215, 0fCB40007F; + neg.f32 %f2217, %f2216; + fma.rn.f32 %f2218, %f2207, %f1705, %f2217; + fma.rn.f32 %f2220, %f2207, %f2011, %f2218; + mov.b32 %r773, %f2215; + shl.b32 %r774, %r773, 23; + mov.b32 %f2221, %r774; + ex2.approx.ftz.f32 %f2222, %f2220; + mul.f32 %f2223, %f2222, %f2221; + sub.f32 %f412, %f399, %f2223; + setp.eq.f32 %p672, %f323, 0f7F800000; + mov.f32 %f3129, 0f7F800000; + @%p672 bra $L__BB7_420; + + fma.rn.f32 %f3129, %f323, %f322, %f323; + +$L__BB7_420: + mov.b32 %r775, %f3129; + xor.b32 %r776, %r775, -2147483648; + mov.b32 %f2224, %r776; + selp.f32 %f415, %f2224, %f3129, %p33; + setp.eq.f32 %p673, %f320, 0f00000000; + selp.f32 %f3130, %f324, %f415, %p673; + @%p37 bra $L__BB7_423; + + cvt.rzi.f32.f32 %f2226, %f1601; + setp.eq.f32 %p674, %f2226, 0f40000000; + mov.f32 %f3130, %f415; + @%p674 bra $L__BB7_423; + + mov.f32 %f3130, 0f7FFFFFFF; + +$L__BB7_423: + setp.eq.f32 %p675, %f328, 0f7F800000; + mov.f32 %f3131, 0f7F800000; + @%p675 bra $L__BB7_425; + + fma.rn.f32 %f3131, %f328, %f327, %f328; + +$L__BB7_425: + mov.b32 %r777, %f3131; + xor.b32 %r778, %r777, -2147483648; + mov.b32 %f2229, %r778; + selp.f32 %f420, %f2229, %f3131, %p34; + setp.eq.f32 %p676, %f325, 0f00000000; + selp.f32 %f3132, %f331, %f420, %p676; + @%p38 bra $L__BB7_428; + + cvt.rzi.f32.f32 %f2231, %f1601; + setp.eq.f32 %p677, %f2231, 0f40000000; + mov.f32 %f3132, %f420; + @%p677 bra $L__BB7_428; + + mov.f32 %f3132, 0f7FFFFFFF; + +$L__BB7_428: + mul.f32 %f2234, %f273, %f412; + mul.f32 %f423, %f361, %f2234; + setp.gtu.f32 %p678, %f321, 0f7F800000; + mov.f32 %f3133, 0f7F800000; + selp.f32 %f2235, %f329, %f3130, %p678; + setp.neu.f32 %p679, %f321, 0f7F800000; + selp.f32 %f2236, %f2235, %f330, %p679; + setp.gt.s32 %p680, %r99, 2139095039; + selp.f32 %f2237, %f2236, %f3130, %p680; + mul.f32 %f2238, %f2237, 0fBF000000; + setp.eq.f32 %p681, %f320, 0f3F800000; + selp.f32 %f2239, 0fBF000000, %f2238, %p681; + fma.rn.f32 %f2242, %f2239, %f1993, %f1592; + cvt.sat.f32.f32 %f2245, %f2242; + fma.rm.f32 %f2247, %f2245, %f1996, %f1998; + setp.gtu.f32 %p682, %f326, 0f7F800000; + selp.f32 %f2248, %f333, %f3132, %p682; + setp.neu.f32 %p683, %f326, 0f7F800000; + selp.f32 %f2249, %f2248, %f334, %p683; + setp.gt.s32 %p684, %r100, 2139095039; + selp.f32 %f2250, %f2249, %f3132, %p684; + mul.f32 %f2251, %f2250, 0fBF000000; + setp.eq.f32 %p685, %f325, 0f3F800000; + selp.f32 %f2252, 0fBF000000, %f2251, %p685; + fma.rn.f32 %f2253, %f2252, %f1993, %f1592; + cvt.sat.f32.f32 %f2254, %f2253; + fma.rm.f32 %f2255, %f2254, %f1996, %f1998; + add.f32 %f2256, %f2255, 0fCB40007F; + neg.f32 %f2257, %f2256; + fma.rn.f32 %f2258, %f2252, %f1705, %f2257; + fma.rn.f32 %f2260, %f2252, %f2011, %f2258; + mov.b32 %r779, %f2255; + shl.b32 %r780, %r779, 23; + mov.b32 %f2261, %r780; + ex2.approx.ftz.f32 %f2262, %f2260; + mul.f32 %f2263, %f2262, %f2261; + mul.f32 %f2264, %f301, %f2263; + mov.b32 %r781, %f2247; + shl.b32 %r782, %r781, 23; + mov.b32 %f2265, %r782; + add.f32 %f2266, %f2247, 0fCB40007F; + neg.f32 %f2267, %f2266; + fma.rn.f32 %f2268, %f2239, %f1705, %f2267; + fma.rn.f32 %f2269, %f2239, %f2011, %f2268; + ex2.approx.ftz.f32 %f2270, %f2269; + mul.f32 %f2271, %f2270, %f2265; + mul.f32 %f2272, %f332, %f2271; + sub.f32 %f2273, %f2272, %f2264; + mul.f32 %f2274, %f274, %f2273; + mul.f32 %f424, %f373, %f2274; + add.f32 %f2275, %f362, 0f3F800000; + sub.f32 %f2276, %f2275, %f3068; + div.rn.f32 %f425, %f2276, %f3064; + abs.f32 %f426, %f425; + setp.lt.f32 %p686, %f426, 0f00800000; + mul.f32 %f2277, %f426, 0f4B800000; + selp.f32 %f2278, %f2277, %f426, %p686; + selp.f32 %f2279, 0fC3170000, 0fC2FE0000, %p686; + mov.b32 %r783, %f2278; + and.b32 %r784, %r783, 8388607; + or.b32 %r785, %r784, 1065353216; + mov.b32 %f2280, %r785; + shr.u32 %r786, %r783, 23; + cvt.rn.f32.u32 %f2281, %r786; + add.f32 %f2282, %f2279, %f2281; + setp.gt.f32 %p687, %f2280, 0f3FB504F3; + mul.f32 %f2283, %f2280, 0f3F000000; + add.f32 %f2284, %f2282, 0f3F800000; + selp.f32 %f2285, %f2284, %f2282, %p687; + selp.f32 %f2286, %f2283, %f2280, %p687; + add.f32 %f2287, %f2286, 0fBF800000; + add.f32 %f2288, %f2286, 0f3F800000; + rcp.approx.ftz.f32 %f2289, %f2288; + add.f32 %f2290, %f2287, %f2287; + mul.f32 %f2292, %f2290, %f2289; + mul.f32 %f2293, %f2292, %f2292; + fma.rn.f32 %f2296, %f1662, %f2293, %f1661; + fma.rn.f32 %f2298, %f2296, %f2293, %f1664; + mul.rn.f32 %f2299, %f2298, %f2293; + mul.rn.f32 %f2300, %f2299, %f2292; + sub.f32 %f2301, %f2287, %f2292; + add.f32 %f2302, %f2301, %f2301; + neg.f32 %f2303, %f2292; + fma.rn.f32 %f2304, %f2303, %f2287, %f2302; + mul.rn.f32 %f2305, %f2289, %f2304; + add.f32 %f2306, %f2300, %f2292; + sub.f32 %f2307, %f2292, %f2306; + add.f32 %f2308, %f2300, %f2307; + add.f32 %f2309, %f2305, %f2308; + add.f32 %f2310, %f2306, %f2309; + sub.f32 %f2311, %f2306, %f2310; + add.f32 %f2312, %f2309, %f2311; + mul.rn.f32 %f2314, %f2285, %f1680; + mul.rn.f32 %f2316, %f2285, %f1682; + add.f32 %f2317, %f2314, %f2310; + sub.f32 %f2318, %f2314, %f2317; + add.f32 %f2319, %f2310, %f2318; + add.f32 %f2320, %f2312, %f2319; + add.f32 %f2321, %f2316, %f2320; + add.f32 %f2322, %f2317, %f2321; + sub.f32 %f2323, %f2317, %f2322; + add.f32 %f2324, %f2321, %f2323; + mul.rn.f32 %f2325, %f1601, %f2322; + neg.f32 %f2326, %f2325; + fma.rn.f32 %f2327, %f1601, %f2322, %f2326; + fma.rn.f32 %f2328, %f1601, %f2324, %f2327; + fma.rn.f32 %f2330, %f2079, %f2322, %f2328; + add.rn.f32 %f2331, %f2325, %f2330; + neg.f32 %f2332, %f2331; + add.rn.f32 %f2333, %f2325, %f2332; + add.rn.f32 %f2334, %f2333, %f2330; + mov.b32 %r787, %f2331; + setp.eq.s32 %p688, %r787, 1118925336; + add.s32 %r788, %r787, -1; + mov.b32 %f2335, %r788; + add.f32 %f2336, %f2334, 0f37000000; + selp.f32 %f427, %f2336, %f2334, %p688; + selp.f32 %f2337, %f2335, %f2331, %p688; + mul.rn.f32 %f2338, %f2337, %f1705; + cvt.rzi.f32.f32 %f2339, %f2338; + abs.f32 %f2340, %f2339; + setp.gt.f32 %p689, %f2340, 0f42FC0000; + mov.b32 %r789, %f2339; + and.b32 %r790, %r789, -2147483648; + or.b32 %r791, %r790, 1123811328; + mov.b32 %f2341, %r791; + selp.f32 %f2342, %f2341, %f2339, %p689; + fma.rn.f32 %f2344, %f2342, %f1711, %f2337; + fma.rn.f32 %f2346, %f2342, %f1713, %f2344; + mul.f32 %f2347, %f2346, 0f3FB8AA3B; + add.f32 %f2348, %f2342, 0f4B40007F; + mov.b32 %r792, %f2348; + shl.b32 %r793, %r792, 23; + mov.b32 %f2349, %r793; + ex2.approx.ftz.f32 %f2350, %f2347; + mul.f32 %f428, %f2350, %f2349; + setp.eq.f32 %p690, %f428, 0f7F800000; + @%p690 bra $L__BB7_430; + + fma.rn.f32 %f3133, %f428, %f427, %f428; + +$L__BB7_430: + setp.lt.f32 %p691, %f425, 0f00000000; + and.pred %p41, %p691, %p606; + setp.eq.f32 %p693, %f425, 0f00000000; + @%p693 bra $L__BB7_434; + bra.uni $L__BB7_431; + +$L__BB7_434: + add.f32 %f2355, %f425, %f425; + selp.f32 %f3135, %f2355, 0f00000000, %p606; + bra.uni $L__BB7_435; + +$L__BB7_431: + mov.b32 %r794, %f3133; + xor.b32 %r795, %r794, -2147483648; + mov.b32 %f2351, %r795; + selp.f32 %f3135, %f2351, %f3133, %p41; + setp.geu.f32 %p694, %f425, 0f00000000; + @%p694 bra $L__BB7_435; + + cvt.rzi.f32.f32 %f2353, %f1601; + setp.eq.f32 %p695, %f2353, 0f40000000; + @%p695 bra $L__BB7_435; + + mov.f32 %f3135, 0f7FFFFFFF; + +$L__BB7_435: + add.f32 %f2356, %f426, 0f40000000; + mov.b32 %r796, %f2356; + setp.lt.s32 %p697, %r796, 2139095040; + @%p697 bra $L__BB7_440; + + setp.gtu.f32 %p698, %f426, 0f7F800000; + @%p698 bra $L__BB7_439; + bra.uni $L__BB7_437; + +$L__BB7_439: + add.f32 %f3135, %f425, 0f40000000; + bra.uni $L__BB7_440; + +$L__BB7_437: + setp.neu.f32 %p699, %f426, 0f7F800000; + @%p699 bra $L__BB7_440; + + selp.f32 %f3135, 0fFF800000, 0f7F800000, %p41; + +$L__BB7_440: + mul.f32 %f2358, %f3135, 0fBF000000; + setp.eq.f32 %p700, %f425, 0f3F800000; + selp.f32 %f2359, 0fBF000000, %f2358, %p700; + fma.rn.f32 %f2362, %f2359, %f1993, %f1592; + cvt.sat.f32.f32 %f2365, %f2362; + fma.rm.f32 %f2367, %f2365, %f1996, %f1998; + add.f32 %f2368, %f2367, 0fCB40007F; + neg.f32 %f2369, %f2368; + fma.rn.f32 %f2370, %f2359, %f1705, %f2369; + fma.rn.f32 %f2372, %f2359, %f2011, %f2370; + mov.b32 %r797, %f2367; + shl.b32 %r798, %r797, 23; + mov.b32 %f2373, %r798; + ex2.approx.ftz.f32 %f2374, %f2372; + mul.f32 %f437, %f2374, %f2373; + div.rn.f32 %f438, %f363, %f3064; + abs.f32 %f439, %f438; + setp.lt.f32 %p701, %f439, 0f00800000; + mul.f32 %f2375, %f439, 0f4B800000; + selp.f32 %f2376, %f2375, %f439, %p701; + selp.f32 %f2377, 0fC3170000, 0fC2FE0000, %p701; + mov.b32 %r799, %f2376; + and.b32 %r800, %r799, 8388607; + or.b32 %r801, %r800, 1065353216; + mov.b32 %f2378, %r801; + shr.u32 %r802, %r799, 23; + cvt.rn.f32.u32 %f2379, %r802; + add.f32 %f2380, %f2377, %f2379; + setp.gt.f32 %p702, %f2378, 0f3FB504F3; + mul.f32 %f2381, %f2378, 0f3F000000; + add.f32 %f2382, %f2380, 0f3F800000; + selp.f32 %f2383, %f2382, %f2380, %p702; + selp.f32 %f2384, %f2381, %f2378, %p702; + add.f32 %f2385, %f2384, 0fBF800000; + add.f32 %f2386, %f2384, 0f3F800000; + rcp.approx.ftz.f32 %f2387, %f2386; + add.f32 %f2388, %f2385, %f2385; + mul.f32 %f2390, %f2388, %f2387; + mul.f32 %f2391, %f2390, %f2390; + fma.rn.f32 %f2394, %f1662, %f2391, %f1661; + fma.rn.f32 %f2396, %f2394, %f2391, %f1664; + mul.rn.f32 %f2397, %f2396, %f2391; + mul.rn.f32 %f2398, %f2397, %f2390; + sub.f32 %f2399, %f2385, %f2390; + add.f32 %f2400, %f2399, %f2399; + neg.f32 %f2401, %f2390; + fma.rn.f32 %f2402, %f2401, %f2385, %f2400; + mul.rn.f32 %f2403, %f2387, %f2402; + add.f32 %f2404, %f2398, %f2390; + sub.f32 %f2405, %f2390, %f2404; + add.f32 %f2406, %f2398, %f2405; + add.f32 %f2407, %f2403, %f2406; + add.f32 %f2408, %f2404, %f2407; + sub.f32 %f2409, %f2404, %f2408; + add.f32 %f2410, %f2407, %f2409; + mul.rn.f32 %f2412, %f2383, %f1680; + mul.rn.f32 %f2414, %f2383, %f1682; + add.f32 %f2415, %f2412, %f2408; + sub.f32 %f2416, %f2412, %f2415; + add.f32 %f2417, %f2408, %f2416; + add.f32 %f2418, %f2410, %f2417; + add.f32 %f2419, %f2414, %f2418; + add.f32 %f2420, %f2415, %f2419; + sub.f32 %f2421, %f2415, %f2420; + add.f32 %f2422, %f2419, %f2421; + mul.rn.f32 %f2423, %f1601, %f2420; + neg.f32 %f2424, %f2423; + fma.rn.f32 %f2425, %f1601, %f2420, %f2424; + fma.rn.f32 %f2426, %f1601, %f2422, %f2425; + fma.rn.f32 %f2428, %f2079, %f2420, %f2426; + add.rn.f32 %f2429, %f2423, %f2428; + neg.f32 %f2430, %f2429; + add.rn.f32 %f2431, %f2423, %f2430; + add.rn.f32 %f2432, %f2431, %f2428; + mov.b32 %r803, %f2429; + setp.eq.s32 %p703, %r803, 1118925336; + add.s32 %r804, %r803, -1; + mov.b32 %f2433, %r804; + add.f32 %f2434, %f2432, 0f37000000; + selp.f32 %f440, %f2434, %f2432, %p703; + selp.f32 %f2435, %f2433, %f2429, %p703; + mul.rn.f32 %f2436, %f2435, %f1705; + cvt.rzi.f32.f32 %f2437, %f2436; + abs.f32 %f2438, %f2437; + setp.gt.f32 %p704, %f2438, 0f42FC0000; + mov.b32 %r805, %f2437; + and.b32 %r806, %r805, -2147483648; + or.b32 %r807, %r806, 1123811328; + mov.b32 %f2439, %r807; + selp.f32 %f2440, %f2439, %f2437, %p704; + fma.rn.f32 %f2442, %f2440, %f1711, %f2435; + fma.rn.f32 %f2444, %f2440, %f1713, %f2442; + mul.f32 %f2445, %f2444, 0f3FB8AA3B; + add.f32 %f2446, %f2440, 0f4B40007F; + mov.b32 %r808, %f2446; + shl.b32 %r809, %r808, 23; + mov.b32 %f2447, %r809; + ex2.approx.ftz.f32 %f2448, %f2445; + mul.f32 %f441, %f2448, %f2447; + setp.eq.f32 %p705, %f441, 0f7F800000; + mov.f32 %f3136, 0f7F800000; + @%p705 bra $L__BB7_442; + + fma.rn.f32 %f3136, %f441, %f440, %f441; + +$L__BB7_442: + setp.lt.f32 %p706, %f438, 0f00000000; + and.pred %p42, %p706, %p606; + setp.eq.f32 %p708, %f438, 0f00000000; + @%p708 bra $L__BB7_446; + bra.uni $L__BB7_443; + +$L__BB7_446: + add.f32 %f2453, %f438, %f438; + selp.f32 %f3138, %f2453, 0f00000000, %p606; + bra.uni $L__BB7_447; + +$L__BB7_443: + mov.b32 %r810, %f3136; + xor.b32 %r811, %r810, -2147483648; + mov.b32 %f2449, %r811; + selp.f32 %f3138, %f2449, %f3136, %p42; + setp.geu.f32 %p709, %f438, 0f00000000; + @%p709 bra $L__BB7_447; + + cvt.rzi.f32.f32 %f2451, %f1601; + setp.eq.f32 %p710, %f2451, 0f40000000; + @%p710 bra $L__BB7_447; + + mov.f32 %f3138, 0f7FFFFFFF; + +$L__BB7_447: + add.f32 %f2454, %f439, 0f40000000; + mov.b32 %r812, %f2454; + setp.lt.s32 %p712, %r812, 2139095040; + @%p712 bra $L__BB7_452; + + setp.gtu.f32 %p713, %f439, 0f7F800000; + @%p713 bra $L__BB7_451; + bra.uni $L__BB7_449; + +$L__BB7_451: + add.f32 %f3138, %f438, 0f40000000; + bra.uni $L__BB7_452; + +$L__BB7_449: + setp.neu.f32 %p714, %f439, 0f7F800000; + @%p714 bra $L__BB7_452; + + selp.f32 %f3138, 0fFF800000, 0f7F800000, %p42; + +$L__BB7_452: + mul.f32 %f2455, %f3138, 0fBF000000; + setp.eq.f32 %p715, %f438, 0f3F800000; + selp.f32 %f2456, 0fBF000000, %f2455, %p715; + fma.rn.f32 %f2459, %f2456, %f1993, %f1592; + cvt.sat.f32.f32 %f2462, %f2459; + fma.rm.f32 %f2464, %f2462, %f1996, %f1998; + add.f32 %f2465, %f2464, 0fCB40007F; + neg.f32 %f2466, %f2465; + fma.rn.f32 %f2467, %f2456, %f1705, %f2466; + fma.rn.f32 %f2469, %f2456, %f2011, %f2467; + mov.b32 %r813, %f2464; + shl.b32 %r814, %r813, 23; + mov.b32 %f2470, %r814; + ex2.approx.ftz.f32 %f2471, %f2469; + mul.f32 %f2472, %f2471, %f2470; + add.f32 %f2473, %f363, 0f3F800000; + mul.f32 %f2474, %f2473, %f437; + mul.f32 %f2475, %f363, %f2472; + sub.f32 %f2476, %f2474, %f2475; + mul.f32 %f2477, %f275, %f2476; + mul.f32 %f2478, %f361, %f2477; + mul.f32 %f2479, %f386, %f386; + div.rn.f32 %f2480, %f2479, %f374; + add.f32 %f3110, %f3110, %f2480; + mul.f32 %f2481, %f423, %f386; + div.rn.f32 %f2482, %f2481, %f374; + add.f32 %f3109, %f3109, %f2482; + mul.f32 %f2483, %f361, %f373; + mul.f32 %f2484, %f2483, %f386; + div.rn.f32 %f2485, %f2484, %f374; + add.f32 %f3108, %f3108, %f2485; + div.rn.f32 %f2486, %f386, %f374; + add.f32 %f3107, %f3107, %f2486; + mul.f32 %f2487, %f424, %f386; + div.rn.f32 %f2488, %f2487, %f374; + add.f32 %f3106, %f3106, %f2488; + mul.f32 %f2489, %f2478, %f386; + div.rn.f32 %f2490, %f2489, %f374; + add.f32 %f3105, %f3105, %f2490; + mul.f32 %f2491, %f423, %f423; + div.rn.f32 %f2492, %f2491, %f374; + add.f32 %f3104, %f3104, %f2492; + mul.f32 %f2493, %f2483, %f423; + div.rn.f32 %f2494, %f2493, %f374; + add.f32 %f3103, %f3103, %f2494; + div.rn.f32 %f2495, %f423, %f374; + add.f32 %f3102, %f3102, %f2495; + mul.f32 %f2496, %f424, %f423; + div.rn.f32 %f2497, %f2496, %f374; + add.f32 %f3101, %f3101, %f2497; + mul.f32 %f2498, %f2478, %f423; + div.rn.f32 %f2499, %f2498, %f374; + add.f32 %f3100, %f3100, %f2499; + mul.f32 %f2500, %f2483, %f2483; + div.rn.f32 %f2501, %f2500, %f374; + add.f32 %f3099, %f3099, %f2501; + div.rn.f32 %f2502, %f2483, %f374; + add.f32 %f3098, %f3098, %f2502; + mul.f32 %f2503, %f424, %f2483; + div.rn.f32 %f2504, %f2503, %f374; + add.f32 %f3097, %f3097, %f2504; + mul.f32 %f2505, %f2478, %f2483; + div.rn.f32 %f2506, %f2505, %f374; + add.f32 %f3096, %f3096, %f2506; + rcp.rn.f32 %f2507, %f374; + add.f32 %f3095, %f3095, %f2507; + div.rn.f32 %f2508, %f424, %f374; + add.f32 %f3094, %f3094, %f2508; + div.rn.f32 %f2509, %f2478, %f374; + add.f32 %f3093, %f3093, %f2509; + mul.f32 %f2510, %f424, %f424; + div.rn.f32 %f2511, %f2510, %f374; + add.f32 %f3111, %f3111, %f2511; + mul.f32 %f2512, %f2478, %f424; + div.rn.f32 %f2513, %f2512, %f374; + add.f32 %f3112, %f3112, %f2513; + mul.f32 %f2514, %f2478, %f2478; + div.rn.f32 %f2515, %f2514, %f374; + add.f32 %f3113, %f3113, %f2515; + add.f32 %f471, %f3063, %f374; + setp.leu.f32 %p716, %f471, 0f00000000; + @%p716 bra $L__BB7_460; + + add.f32 %f472, %f3063, %f375; + setp.gt.f32 %p717, %f472, 0f00000000; + @%p717 bra $L__BB7_455; + bra.uni $L__BB7_454; + +$L__BB7_455: + setp.lt.f32 %p718, %f471, 0f00800000; + mul.f32 %f2518, %f471, 0f4B000000; + selp.f32 %f474, %f2518, %f471, %p718; + selp.f32 %f2519, 0fC1B80000, 0f00000000, %p718; + mov.b32 %r815, %f474; + add.s32 %r816, %r815, -1059760811; + and.b32 %r817, %r816, -8388608; + sub.s32 %r818, %r815, %r817; + mov.b32 %f2520, %r818; + cvt.rn.f32.s32 %f2521, %r817; + mov.f32 %f2522, 0f34000000; + fma.rn.f32 %f2523, %f2521, %f2522, %f2519; + add.f32 %f2524, %f2520, 0fBF800000; + mov.f32 %f2525, 0f3E1039F6; + mov.f32 %f2526, 0fBE055027; + fma.rn.f32 %f2527, %f2526, %f2524, %f2525; + mov.f32 %f2528, 0fBDF8CDCC; + fma.rn.f32 %f2529, %f2527, %f2524, %f2528; + mov.f32 %f2530, 0f3E0F2955; + fma.rn.f32 %f2531, %f2529, %f2524, %f2530; + mov.f32 %f2532, 0fBE2AD8B9; + fma.rn.f32 %f2533, %f2531, %f2524, %f2532; + mov.f32 %f2534, 0f3E4CED0B; + fma.rn.f32 %f2535, %f2533, %f2524, %f2534; + mov.f32 %f2536, 0fBE7FFF22; + fma.rn.f32 %f2537, %f2535, %f2524, %f2536; + mov.f32 %f2538, 0f3EAAAA78; + fma.rn.f32 %f2539, %f2537, %f2524, %f2538; + mov.f32 %f2540, 0fBF000000; + fma.rn.f32 %f2541, %f2539, %f2524, %f2540; + mul.f32 %f2542, %f2524, %f2541; + fma.rn.f32 %f2543, %f2542, %f2524, %f2524; + mov.f32 %f2544, 0f3F317218; + fma.rn.f32 %f3139, %f2523, %f2544, %f2543; + setp.lt.u32 %p719, %r815, 2139095040; + @%p719 bra $L__BB7_457; + + mov.f32 %f2545, 0f7F800000; + fma.rn.f32 %f3139, %f474, %f2545, %f2545; + +$L__BB7_457: + setp.eq.f32 %p720, %f474, 0f00000000; + selp.f32 %f2546, 0fFF800000, %f3139, %p720; + mul.f32 %f2547, %f472, %f2546; + sub.f32 %f478, %f2547, %f374; + mul.f32 %f2548, %f472, 0f4B000000; + setp.lt.f32 %p721, %f472, 0f00800000; + selp.f32 %f479, %f2548, %f472, %p721; + selp.f32 %f2549, 0fC1B80000, 0f00000000, %p721; + mov.b32 %r819, %f479; + add.s32 %r820, %r819, -1059760811; + and.b32 %r821, %r820, -8388608; + sub.s32 %r822, %r819, %r821; + mov.b32 %f2550, %r822; + cvt.rn.f32.s32 %f2551, %r821; + fma.rn.f32 %f2553, %f2551, %f2522, %f2549; + add.f32 %f2554, %f2550, 0fBF800000; + fma.rn.f32 %f2557, %f2526, %f2554, %f2525; + fma.rn.f32 %f2559, %f2557, %f2554, %f2528; + fma.rn.f32 %f2561, %f2559, %f2554, %f2530; + fma.rn.f32 %f2563, %f2561, %f2554, %f2532; + fma.rn.f32 %f2565, %f2563, %f2554, %f2534; + fma.rn.f32 %f2567, %f2565, %f2554, %f2536; + fma.rn.f32 %f2569, %f2567, %f2554, %f2538; + fma.rn.f32 %f2571, %f2569, %f2554, %f2540; + mul.f32 %f2572, %f2554, %f2571; + fma.rn.f32 %f2573, %f2572, %f2554, %f2554; + fma.rn.f32 %f3140, %f2553, %f2544, %f2573; + setp.lt.u32 %p722, %r819, 2139095040; + @%p722 bra $L__BB7_459; + + mov.f32 %f2575, 0f7F800000; + fma.rn.f32 %f3140, %f479, %f2575, %f2575; + +$L__BB7_459: + setp.eq.f32 %p723, %f479, 0f00000000; + selp.f32 %f2576, 0fFF800000, %f3140, %p723; + mul.f32 %f2577, %f472, %f2576; + sub.f32 %f2578, %f478, %f2577; + add.f32 %f2579, %f375, %f2578; + add.f32 %f3141, %f3141, %f2579; + bra.uni $L__BB7_460; + +$L__BB7_454: + neg.f32 %f2516, %f374; + sub.f32 %f2517, %f2516, %f3063; + add.f32 %f3141, %f3141, %f2517; + +$L__BB7_460: + add.s32 %r849, %r849, 1; + setp.lt.s32 %p724, %r849, %r104; + @%p724 bra $L__BB7_376; + + add.s32 %r848, %r848, 1; + setp.lt.s32 %p725, %r848, %r104; + @%p725 bra $L__BB7_375; + +$L__BB7_462: + ld.param.u64 %rd56, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_9]; + ld.param.u64 %rd55, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_8]; + ld.param.u32 %r833, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_10]; + ld.param.u64 %rd54, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_7]; + rcp.rn.f32 %f2580, %f3110; + mov.f32 %f2581, 0f3F800000; + mul.f32 %f2582, %f2580, %f3109; + mul.f32 %f2583, %f2580, %f3108; + mul.f32 %f2584, %f2580, %f3107; + mul.f32 %f2585, %f2580, %f3106; + mul.f32 %f2586, %f2580, %f3105; + fma.rn.f32 %f2587, %f2582, %f3109, 0f00000000; + sub.f32 %f2589, %f3104, %f2587; + fma.rn.f32 %f2590, %f2583, %f3109, 0f00000000; + rcp.rn.f32 %f2591, %f2589; + sub.f32 %f2592, %f3103, %f2590; + mul.f32 %f2593, %f2591, %f2592; + fma.rn.f32 %f2594, %f2584, %f3109, 0f00000000; + sub.f32 %f2595, %f3102, %f2594; + mul.f32 %f2596, %f2591, %f2595; + fma.rn.f32 %f2597, %f2585, %f3109, 0f00000000; + sub.f32 %f2598, %f3101, %f2597; + mul.f32 %f2599, %f2591, %f2598; + fma.rn.f32 %f2600, %f2586, %f3109, 0f00000000; + sub.f32 %f2601, %f3100, %f2600; + mul.f32 %f2602, %f2591, %f2601; + fma.rn.f32 %f2603, %f2582, %f3108, 0f00000000; + sub.f32 %f2604, %f3103, %f2603; + fma.rn.f32 %f2605, %f2583, %f3108, 0f00000000; + fma.rn.f32 %f2606, %f2593, %f2604, %f2605; + sub.f32 %f2607, %f3099, %f2606; + fma.rn.f32 %f2608, %f2584, %f3108, 0f00000000; + fma.rn.f32 %f2609, %f2596, %f2604, %f2608; + rcp.rn.f32 %f2610, %f2607; + sub.f32 %f2611, %f3098, %f2609; + mul.f32 %f2612, %f2610, %f2611; + fma.rn.f32 %f2613, %f2585, %f3108, 0f00000000; + fma.rn.f32 %f2614, %f2599, %f2604, %f2613; + sub.f32 %f2615, %f3097, %f2614; + mul.f32 %f2616, %f2610, %f2615; + fma.rn.f32 %f2617, %f2586, %f3108, 0f00000000; + fma.rn.f32 %f2618, %f2602, %f2604, %f2617; + sub.f32 %f2619, %f3096, %f2618; + mul.f32 %f2620, %f2610, %f2619; + fma.rn.f32 %f2621, %f2582, %f3107, 0f00000000; + sub.f32 %f2622, %f3102, %f2621; + fma.rn.f32 %f2623, %f2583, %f3107, 0f00000000; + fma.rn.f32 %f2624, %f2593, %f2622, %f2623; + sub.f32 %f2625, %f3098, %f2624; + fma.rn.f32 %f2626, %f2584, %f3107, 0f00000000; + fma.rn.f32 %f2627, %f2596, %f2622, %f2626; + fma.rn.f32 %f2628, %f2612, %f2625, %f2627; + sub.f32 %f2629, %f3095, %f2628; + fma.rn.f32 %f2630, %f2585, %f3107, 0f00000000; + fma.rn.f32 %f2631, %f2599, %f2622, %f2630; + fma.rn.f32 %f2632, %f2616, %f2625, %f2631; + rcp.rn.f32 %f2633, %f2629; + sub.f32 %f2634, %f3094, %f2632; + mul.f32 %f2635, %f2633, %f2634; + fma.rn.f32 %f2636, %f2586, %f3107, 0f00000000; + fma.rn.f32 %f2637, %f2602, %f2622, %f2636; + fma.rn.f32 %f2638, %f2620, %f2625, %f2637; + sub.f32 %f2639, %f3093, %f2638; + mul.f32 %f2640, %f2633, %f2639; + fma.rn.f32 %f2641, %f2582, %f3106, 0f00000000; + sub.f32 %f2642, %f3101, %f2641; + fma.rn.f32 %f2643, %f2583, %f3106, 0f00000000; + fma.rn.f32 %f2644, %f2593, %f2642, %f2643; + sub.f32 %f2645, %f3097, %f2644; + fma.rn.f32 %f2646, %f2584, %f3106, 0f00000000; + fma.rn.f32 %f2647, %f2596, %f2642, %f2646; + fma.rn.f32 %f2648, %f2612, %f2645, %f2647; + sub.f32 %f2649, %f3094, %f2648; + fma.rn.f32 %f2650, %f2585, %f3106, 0f00000000; + fma.rn.f32 %f2651, %f2599, %f2642, %f2650; + fma.rn.f32 %f2652, %f2616, %f2645, %f2651; + fma.rn.f32 %f2653, %f2635, %f2649, %f2652; + sub.f32 %f2654, %f3111, %f2653; + fma.rn.f32 %f2655, %f2586, %f3106, 0f00000000; + fma.rn.f32 %f2656, %f2602, %f2642, %f2655; + fma.rn.f32 %f2657, %f2620, %f2645, %f2656; + fma.rn.f32 %f2658, %f2640, %f2649, %f2657; + rcp.rn.f32 %f2659, %f2654; + sub.f32 %f2660, %f3112, %f2658; + mul.f32 %f2661, %f2659, %f2660; + fma.rn.f32 %f2662, %f2582, %f3105, 0f00000000; + sub.f32 %f2663, %f3100, %f2662; + fma.rn.f32 %f2664, %f2583, %f3105, 0f00000000; + fma.rn.f32 %f2665, %f2593, %f2663, %f2664; + sub.f32 %f2666, %f3096, %f2665; + fma.rn.f32 %f2667, %f2584, %f3105, 0f00000000; + fma.rn.f32 %f2668, %f2596, %f2663, %f2667; + fma.rn.f32 %f2669, %f2612, %f2666, %f2668; + sub.f32 %f2670, %f3093, %f2669; + fma.rn.f32 %f2671, %f2585, %f3105, 0f00000000; + fma.rn.f32 %f2672, %f2599, %f2663, %f2671; + fma.rn.f32 %f2673, %f2616, %f2666, %f2672; + fma.rn.f32 %f2674, %f2635, %f2670, %f2673; + sub.f32 %f2675, %f3112, %f2674; + fma.rn.f32 %f2676, %f2586, %f3105, 0f00000000; + fma.rn.f32 %f2677, %f2602, %f2663, %f2676; + fma.rn.f32 %f2678, %f2620, %f2666, %f2677; + fma.rn.f32 %f2679, %f2640, %f2670, %f2678; + fma.rn.f32 %f2680, %f2661, %f2675, %f2679; + sub.f32 %f2681, %f3113, %f2680; + add.f32 %f2682, %f2582, 0f00000000; + sub.f32 %f2683, %f1569, %f2682; + add.f32 %f2684, %f2583, 0f00000000; + fma.rn.f32 %f2685, %f2593, %f2683, %f2684; + sub.f32 %f2686, %f1569, %f2685; + add.f32 %f2687, %f2584, 0f00000000; + fma.rn.f32 %f2688, %f2596, %f2683, %f2687; + fma.rn.f32 %f2689, %f2612, %f2686, %f2688; + sub.f32 %f2690, %f1569, %f2689; + add.f32 %f2691, %f2585, 0f00000000; + fma.rn.f32 %f2692, %f2599, %f2683, %f2691; + fma.rn.f32 %f2693, %f2616, %f2686, %f2692; + fma.rn.f32 %f2694, %f2635, %f2690, %f2693; + sub.f32 %f2695, %f1569, %f2694; + add.f32 %f2696, %f2586, 0f00000000; + fma.rn.f32 %f2697, %f2602, %f2683, %f2696; + fma.rn.f32 %f2698, %f2620, %f2686, %f2697; + fma.rn.f32 %f2699, %f2640, %f2690, %f2698; + fma.rn.f32 %f2700, %f2661, %f2695, %f2699; + sub.f32 %f2701, %f1569, %f2700; + div.rn.f32 %f2702, %f2701, %f2681; + fma.rn.f32 %f2703, %f2675, %f2702, 0f00000000; + sub.f32 %f2704, %f2695, %f2703; + mul.f32 %f2705, %f2659, %f2704; + fma.rn.f32 %f2706, %f2649, %f2705, 0f00000000; + fma.rn.f32 %f2707, %f2670, %f2702, %f2706; + sub.f32 %f2708, %f2690, %f2707; + mul.f32 %f2709, %f2633, %f2708; + fma.rn.f32 %f2710, %f2625, %f2709, 0f00000000; + fma.rn.f32 %f2711, %f2645, %f2705, %f2710; + fma.rn.f32 %f2712, %f2666, %f2702, %f2711; + sub.f32 %f2713, %f2686, %f2712; + mul.f32 %f2714, %f2610, %f2713; + fma.rn.f32 %f2715, %f2604, %f2714, 0f00000000; + fma.rn.f32 %f2716, %f2622, %f2709, %f2715; + fma.rn.f32 %f2717, %f2642, %f2705, %f2716; + fma.rn.f32 %f2718, %f2663, %f2702, %f2717; + sub.f32 %f2719, %f2683, %f2718; + mul.f32 %f2720, %f2591, %f2719; + fma.rn.f32 %f2721, %f3109, %f2720, 0f00000000; + fma.rn.f32 %f2722, %f3108, %f2714, %f2721; + fma.rn.f32 %f2723, %f3107, %f2709, %f2722; + fma.rn.f32 %f2724, %f3106, %f2705, %f2723; + fma.rn.f32 %f2725, %f3105, %f2702, %f2724; + sub.f32 %f2726, %f2581, %f2725; + mul.f32 %f2727, %f2580, %f2726; + fma.rn.f32 %f2728, %f2582, 0f00000000, 0f00000000; + sub.f32 %f2729, %f2581, %f2728; + fma.rn.f32 %f2730, %f2583, 0f00000000, 0f00000000; + fma.rn.f32 %f2731, %f2593, %f2729, %f2730; + sub.f32 %f2732, %f1569, %f2731; + fma.rn.f32 %f2733, %f2584, 0f00000000, 0f00000000; + fma.rn.f32 %f2734, %f2596, %f2729, %f2733; + fma.rn.f32 %f2735, %f2612, %f2732, %f2734; + sub.f32 %f2736, %f1569, %f2735; + fma.rn.f32 %f2737, %f2585, 0f00000000, 0f00000000; + fma.rn.f32 %f2738, %f2599, %f2729, %f2737; + fma.rn.f32 %f2739, %f2616, %f2732, %f2738; + fma.rn.f32 %f2740, %f2635, %f2736, %f2739; + sub.f32 %f2741, %f1569, %f2740; + fma.rn.f32 %f2742, %f2586, 0f00000000, 0f00000000; + fma.rn.f32 %f2743, %f2602, %f2729, %f2742; + fma.rn.f32 %f2744, %f2620, %f2732, %f2743; + fma.rn.f32 %f2745, %f2640, %f2736, %f2744; + fma.rn.f32 %f2746, %f2661, %f2741, %f2745; + sub.f32 %f2747, %f1569, %f2746; + div.rn.f32 %f2748, %f2747, %f2681; + fma.rn.f32 %f2749, %f2675, %f2748, 0f00000000; + sub.f32 %f2750, %f2741, %f2749; + mul.f32 %f2751, %f2659, %f2750; + fma.rn.f32 %f2752, %f2649, %f2751, 0f00000000; + fma.rn.f32 %f2753, %f2670, %f2748, %f2752; + sub.f32 %f2754, %f2736, %f2753; + mul.f32 %f2755, %f2633, %f2754; + fma.rn.f32 %f2756, %f2625, %f2755, 0f00000000; + fma.rn.f32 %f2757, %f2645, %f2751, %f2756; + fma.rn.f32 %f2758, %f2666, %f2748, %f2757; + sub.f32 %f2759, %f2732, %f2758; + mul.f32 %f2760, %f2610, %f2759; + fma.rn.f32 %f2761, %f2604, %f2760, 0f00000000; + fma.rn.f32 %f2762, %f2622, %f2755, %f2761; + fma.rn.f32 %f2763, %f2642, %f2751, %f2762; + fma.rn.f32 %f2764, %f2663, %f2748, %f2763; + sub.f32 %f2765, %f2729, %f2764; + mul.f32 %f2766, %f2591, %f2765; + sub.f32 %f2767, %f1569, %f2728; + fma.rn.f32 %f2768, %f2593, %f2767, %f2730; + sub.f32 %f2769, %f2581, %f2768; + fma.rn.f32 %f2770, %f2596, %f2767, %f2733; + fma.rn.f32 %f2771, %f2612, %f2769, %f2770; + sub.f32 %f2772, %f1569, %f2771; + fma.rn.f32 %f2773, %f2599, %f2767, %f2737; + fma.rn.f32 %f2774, %f2616, %f2769, %f2773; + fma.rn.f32 %f2775, %f2635, %f2772, %f2774; + sub.f32 %f2776, %f1569, %f2775; + fma.rn.f32 %f2777, %f2602, %f2767, %f2742; + fma.rn.f32 %f2778, %f2620, %f2769, %f2777; + fma.rn.f32 %f2779, %f2640, %f2772, %f2778; + fma.rn.f32 %f2780, %f2661, %f2776, %f2779; + sub.f32 %f2781, %f1569, %f2780; + div.rn.f32 %f2782, %f2781, %f2681; + fma.rn.f32 %f2783, %f2675, %f2782, 0f00000000; + sub.f32 %f2784, %f2776, %f2783; + mul.f32 %f2785, %f2659, %f2784; + fma.rn.f32 %f2786, %f2649, %f2785, 0f00000000; + fma.rn.f32 %f2787, %f2670, %f2782, %f2786; + sub.f32 %f2788, %f2772, %f2787; + mul.f32 %f2789, %f2633, %f2788; + fma.rn.f32 %f2790, %f2625, %f2789, 0f00000000; + fma.rn.f32 %f2791, %f2645, %f2785, %f2790; + fma.rn.f32 %f2792, %f2666, %f2782, %f2791; + sub.f32 %f2793, %f2769, %f2792; + mul.f32 %f2794, %f2610, %f2793; + sub.f32 %f2795, %f1569, %f2768; + fma.rn.f32 %f2796, %f2612, %f2795, %f2770; + sub.f32 %f2797, %f2581, %f2796; + fma.rn.f32 %f2798, %f2616, %f2795, %f2773; + fma.rn.f32 %f2799, %f2635, %f2797, %f2798; + sub.f32 %f2800, %f1569, %f2799; + fma.rn.f32 %f2801, %f2620, %f2795, %f2777; + fma.rn.f32 %f2802, %f2640, %f2797, %f2801; + fma.rn.f32 %f2803, %f2661, %f2800, %f2802; + sub.f32 %f2804, %f1569, %f2803; + div.rn.f32 %f2805, %f2804, %f2681; + fma.rn.f32 %f2806, %f2675, %f2805, 0f00000000; sub.f32 %f2807, %f2800, %f2806; - mul.f32 %f601, %f599, %f2807; - ld.local.f32 %f2808, [%rd14]; - st.local.v2.f32 [%rd14+84], {%f600, %f601}; - ld.local.v2.f32 {%f2809, %f2810}, [%rd14+92]; - fma.rn.f32 %f2813, %f2808, %f2809, 0f00000000; - sub.f32 %f602, %f2810, %f2813; - st.local.f32 [%rd14+96], %f602; - add.s64 %rd143, %rd2, 96; - add.s64 %rd142, %rd2, 8; - mov.f32 %f3400, 0f00000000; - mov.u32 %r335, -1; - -BB7_214: - ld.local.f32 %f2814, [%rd143]; - ld.local.f32 %f2815, [%rd142]; - fma.rn.f32 %f3400, %f2815, %f2814, %f3400; - add.s64 %rd143, %rd143, 4; - add.s64 %rd142, %rd142, 24; - add.s32 %r335, %r335, 1; - setp.lt.s32 %p328, %r335, 1; - @%p328 bra BB7_214; - - ld.local.f32 %f2817, [%rd2+104]; - sub.f32 %f605, %f2817, %f3400; - st.local.f32 [%rd2+104], %f605; - add.s64 %rd145, %rd2, 96; - add.s64 %rd144, %rd2, 12; - mov.f32 %f3401, 0f00000000; - mov.u32 %r336, -1; - -BB7_216: - ld.local.f32 %f2818, [%rd145]; - ld.local.f32 %f2819, [%rd144]; - fma.rn.f32 %f3401, %f2819, %f2818, %f3401; - add.s64 %rd145, %rd145, 4; - add.s64 %rd144, %rd144, 24; - add.s32 %r336, %r336, 1; - setp.lt.s32 %p329, %r336, 2; - @%p329 bra BB7_216; - - ld.local.f32 %f2821, [%rd2+108]; - sub.f32 %f608, %f2821, %f3401; - st.local.f32 [%rd2+108], %f608; - add.s64 %rd147, %rd2, 96; - add.s64 %rd146, %rd2, 16; - mov.f32 %f3402, 0f00000000; - mov.u32 %r337, -1; - -BB7_218: - ld.local.f32 %f2822, [%rd147]; - ld.local.f32 %f2823, [%rd146]; - fma.rn.f32 %f3402, %f2823, %f2822, %f3402; - add.s64 %rd147, %rd147, 4; - add.s64 %rd146, %rd146, 24; - add.s32 %r337, %r337, 1; - setp.lt.s32 %p330, %r337, 3; - @%p330 bra BB7_218; - - ld.local.v4.f32 {%f2825, %f2826, %f2827, %f2828}, [%rd14+108]; - ld.local.f32 %f611, [%rd14+92]; - fma.rn.f32 %f2833, %f577, %f611, 0f00000000; - fma.rn.f32 %f2834, %f583, %f602, %f2833; - fma.rn.f32 %f2835, %f591, %f605, %f2834; - fma.rn.f32 %f2836, %f601, %f608, %f2835; - sub.f32 %f2837, %f2825, %f3402; - rcp.rn.f32 %f612, %f2837; - sub.f32 %f2838, %f2826, %f2836; - mul.f32 %f613, %f612, %f2838; - ld.local.f32 %f2839, [%rd14]; - st.local.v2.f32 [%rd14+108], {%f2837, %f613}; - fma.rn.f32 %f2840, %f2839, %f2827, 0f00000000; - sub.f32 %f614, %f2828, %f2840; - st.local.f32 [%rd14+120], %f614; - add.s64 %rd149, %rd2, 120; - add.s64 %rd148, %rd2, 8; - mov.f32 %f3403, 0f00000000; - mov.u32 %r338, -1; - -BB7_220: - ld.local.f32 %f2841, [%rd149]; - ld.local.f32 %f2842, [%rd148]; - fma.rn.f32 %f3403, %f2842, %f2841, %f3403; - add.s64 %rd149, %rd149, 4; - add.s64 %rd148, %rd148, 24; - add.s32 %r338, %r338, 1; - setp.lt.s32 %p331, %r338, 1; - @%p331 bra BB7_220; - - ld.local.f32 %f2844, [%rd2+128]; - sub.f32 %f617, %f2844, %f3403; - st.local.f32 [%rd2+128], %f617; - add.s64 %rd151, %rd2, 120; - add.s64 %rd150, %rd2, 12; - mov.f32 %f3404, 0f00000000; - mov.u32 %r339, -1; - -BB7_222: - ld.local.f32 %f2845, [%rd151]; - ld.local.f32 %f2846, [%rd150]; - fma.rn.f32 %f3404, %f2846, %f2845, %f3404; - add.s64 %rd151, %rd151, 4; - add.s64 %rd150, %rd150, 24; - add.s32 %r339, %r339, 1; - setp.lt.s32 %p332, %r339, 2; - @%p332 bra BB7_222; - - ld.local.f32 %f2848, [%rd2+132]; - sub.f32 %f620, %f2848, %f3404; - st.local.f32 [%rd2+132], %f620; - add.s64 %rd153, %rd2, 120; - add.s64 %rd152, %rd2, 16; - mov.f32 %f3405, 0f00000000; - mov.u32 %r340, -1; - -BB7_224: - ld.local.f32 %f2849, [%rd153]; - ld.local.f32 %f2850, [%rd152]; - fma.rn.f32 %f3405, %f2850, %f2849, %f3405; - add.s64 %rd153, %rd153, 4; - add.s64 %rd152, %rd152, 24; - add.s32 %r340, %r340, 1; - setp.lt.s32 %p333, %r340, 3; - @%p333 bra BB7_224; - - ld.local.f32 %f2852, [%rd2+136]; - sub.f32 %f623, %f2852, %f3405; - st.local.f32 [%rd2+136], %f623; - add.s64 %rd155, %rd2, 120; - add.s64 %rd154, %rd2, 20; - mov.f32 %f3406, 0f00000000; - mov.u32 %r341, -1; - -BB7_226: - ld.local.f32 %f2853, [%rd155]; - ld.local.f32 %f2854, [%rd154]; - fma.rn.f32 %f3406, %f2854, %f2853, %f3406; - add.s64 %rd155, %rd155, 4; - add.s64 %rd154, %rd154, 24; - add.s32 %r341, %r341, 1; - setp.lt.s32 %p334, %r341, 4; - @%p334 bra BB7_226; - - mov.u32 %r308, %tid.x; - mov.u32 %r307, %ctaid.x; - mov.u32 %r306, %ntid.x; - mad.lo.s32 %r305, %r306, %r307, %r308; - ld.param.u64 %rd134, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_9]; - ld.param.u64 %rd133, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_8]; - ld.param.u32 %r297, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_10]; - ld.param.u64 %rd132, [_Z28kernel_MLEFit_SCMOSXYNBSXSY_PKfS0_S0_fiiiPfS1_S1_i_param_7]; - ld.local.f32 %f2855, [%rd2+140]; - sub.f32 %f2856, %f2855, %f3406; - ld.local.f32 %f2857, [%rd2+120]; - st.local.f32 [%rd2+140], %f2856; - add.f32 %f2858, %f573, 0f00000000; - mov.f32 %f2859, 0f00000000; - sub.f32 %f2860, %f2859, %f2858; - add.f32 %f2861, %f574, 0f00000000; - fma.rn.f32 %f2862, %f580, %f2860, %f2861; - sub.f32 %f2863, %f2859, %f2862; - add.f32 %f2864, %f575, 0f00000000; - fma.rn.f32 %f2865, %f581, %f2860, %f2864; - fma.rn.f32 %f2866, %f589, %f2863, %f2865; - sub.f32 %f2867, %f2859, %f2866; - add.f32 %f2868, %f576, 0f00000000; - fma.rn.f32 %f2869, %f582, %f2860, %f2868; - fma.rn.f32 %f2870, %f590, %f2863, %f2869; - fma.rn.f32 %f2871, %f600, %f2867, %f2870; - sub.f32 %f2872, %f2859, %f2871; - add.f32 %f2873, %f577, 0f00000000; - fma.rn.f32 %f2874, %f583, %f2860, %f2873; - fma.rn.f32 %f2875, %f591, %f2863, %f2874; - fma.rn.f32 %f2876, %f601, %f2867, %f2875; - fma.rn.f32 %f2877, %f613, %f2872, %f2876; - sub.f32 %f2878, %f2859, %f2877; - div.rn.f32 %f2879, %f2878, %f2856; - fma.rn.f32 %f2880, %f623, %f2879, 0f00000000; - sub.f32 %f2881, %f2872, %f2880; - mul.f32 %f2882, %f612, %f2881; - fma.rn.f32 %f2883, %f608, %f2882, 0f00000000; - fma.rn.f32 %f2884, %f620, %f2879, %f2883; - sub.f32 %f2885, %f2867, %f2884; - mul.f32 %f2886, %f599, %f2885; - fma.rn.f32 %f2887, %f595, %f2886, 0f00000000; - fma.rn.f32 %f2888, %f605, %f2882, %f2887; - fma.rn.f32 %f2889, %f617, %f2879, %f2888; - sub.f32 %f2890, %f2863, %f2889; - mul.f32 %f2891, %f588, %f2890; - fma.rn.f32 %f2892, %f584, %f2891, 0f00000000; - fma.rn.f32 %f2893, %f592, %f2886, %f2892; - fma.rn.f32 %f2894, %f602, %f2882, %f2893; - fma.rn.f32 %f2895, %f614, %f2879, %f2894; - sub.f32 %f2896, %f2860, %f2895; - mul.f32 %f2897, %f579, %f2896; - fma.rn.f32 %f2898, %f578, %f2897, 0f00000000; - fma.rn.f32 %f2899, %f2759, %f2891, %f2898; - fma.rn.f32 %f2900, %f598, %f2886, %f2899; - fma.rn.f32 %f2901, %f611, %f2882, %f2900; - fma.rn.f32 %f2902, %f2857, %f2879, %f2901; - mov.f32 %f2903, 0f3F800000; - sub.f32 %f2904, %f2903, %f2902; - mul.f32 %f2905, %f572, %f2904; - fma.rn.f32 %f2906, %f573, 0f00000000, 0f00000000; - sub.f32 %f2907, %f2903, %f2906; - fma.rn.f32 %f2908, %f574, 0f00000000, 0f00000000; - fma.rn.f32 %f2909, %f580, %f2907, %f2908; - sub.f32 %f2910, %f2859, %f2909; - fma.rn.f32 %f2911, %f575, 0f00000000, 0f00000000; - fma.rn.f32 %f2912, %f581, %f2907, %f2911; - fma.rn.f32 %f2913, %f589, %f2910, %f2912; - sub.f32 %f2914, %f2859, %f2913; - fma.rn.f32 %f2915, %f576, 0f00000000, 0f00000000; - fma.rn.f32 %f2916, %f582, %f2907, %f2915; - fma.rn.f32 %f2917, %f590, %f2910, %f2916; - fma.rn.f32 %f2918, %f600, %f2914, %f2917; - sub.f32 %f2919, %f2859, %f2918; - fma.rn.f32 %f2920, %f577, 0f00000000, 0f00000000; - fma.rn.f32 %f2921, %f583, %f2907, %f2920; - fma.rn.f32 %f2922, %f591, %f2910, %f2921; - fma.rn.f32 %f2923, %f601, %f2914, %f2922; - fma.rn.f32 %f2924, %f613, %f2919, %f2923; - sub.f32 %f2925, %f2859, %f2924; - div.rn.f32 %f2926, %f2925, %f2856; - fma.rn.f32 %f2927, %f623, %f2926, 0f00000000; - sub.f32 %f2928, %f2919, %f2927; - mul.f32 %f2929, %f612, %f2928; - fma.rn.f32 %f2930, %f608, %f2929, 0f00000000; - fma.rn.f32 %f2931, %f620, %f2926, %f2930; - sub.f32 %f2932, %f2914, %f2931; - mul.f32 %f2933, %f599, %f2932; - fma.rn.f32 %f2934, %f595, %f2933, 0f00000000; - fma.rn.f32 %f2935, %f605, %f2929, %f2934; - fma.rn.f32 %f2936, %f617, %f2926, %f2935; - sub.f32 %f2937, %f2910, %f2936; - mul.f32 %f2938, %f588, %f2937; - fma.rn.f32 %f2939, %f584, %f2938, 0f00000000; - fma.rn.f32 %f2940, %f592, %f2933, %f2939; - fma.rn.f32 %f2941, %f602, %f2929, %f2940; - fma.rn.f32 %f2942, %f614, %f2926, %f2941; - sub.f32 %f2943, %f2907, %f2942; - mul.f32 %f2944, %f579, %f2943; - sub.f32 %f2945, %f2859, %f2906; - fma.rn.f32 %f2946, %f580, %f2945, %f2908; - sub.f32 %f2947, %f2903, %f2946; - fma.rn.f32 %f2948, %f581, %f2945, %f2911; - fma.rn.f32 %f2949, %f589, %f2947, %f2948; - sub.f32 %f2950, %f2859, %f2949; - fma.rn.f32 %f2951, %f582, %f2945, %f2915; - fma.rn.f32 %f2952, %f590, %f2947, %f2951; - fma.rn.f32 %f2953, %f600, %f2950, %f2952; - sub.f32 %f2954, %f2859, %f2953; - fma.rn.f32 %f2955, %f583, %f2945, %f2920; - fma.rn.f32 %f2956, %f591, %f2947, %f2955; - fma.rn.f32 %f2957, %f601, %f2950, %f2956; - fma.rn.f32 %f2958, %f613, %f2954, %f2957; - sub.f32 %f2959, %f2859, %f2958; - div.rn.f32 %f2960, %f2959, %f2856; - fma.rn.f32 %f2961, %f623, %f2960, 0f00000000; - sub.f32 %f2962, %f2954, %f2961; - mul.f32 %f2963, %f612, %f2962; - fma.rn.f32 %f2964, %f608, %f2963, 0f00000000; - fma.rn.f32 %f2965, %f620, %f2960, %f2964; - sub.f32 %f2966, %f2950, %f2965; - mul.f32 %f2967, %f599, %f2966; - fma.rn.f32 %f2968, %f595, %f2967, 0f00000000; - fma.rn.f32 %f2969, %f605, %f2963, %f2968; - fma.rn.f32 %f2970, %f617, %f2960, %f2969; - sub.f32 %f2971, %f2947, %f2970; - mul.f32 %f2972, %f588, %f2971; - sub.f32 %f2973, %f2859, %f2946; - fma.rn.f32 %f2974, %f589, %f2973, %f2948; - sub.f32 %f2975, %f2903, %f2974; - fma.rn.f32 %f2976, %f590, %f2973, %f2951; - fma.rn.f32 %f2977, %f600, %f2975, %f2976; - sub.f32 %f2978, %f2859, %f2977; - fma.rn.f32 %f2979, %f591, %f2973, %f2955; - fma.rn.f32 %f2980, %f601, %f2975, %f2979; - fma.rn.f32 %f2981, %f613, %f2978, %f2980; - sub.f32 %f2982, %f2859, %f2981; - div.rn.f32 %f2983, %f2982, %f2856; - fma.rn.f32 %f2984, %f623, %f2983, 0f00000000; - sub.f32 %f2985, %f2978, %f2984; - mul.f32 %f2986, %f612, %f2985; - fma.rn.f32 %f2987, %f608, %f2986, 0f00000000; - fma.rn.f32 %f2988, %f620, %f2983, %f2987; - sub.f32 %f2989, %f2975, %f2988; - mul.f32 %f2990, %f599, %f2989; - sub.f32 %f2991, %f2859, %f2974; - fma.rn.f32 %f2992, %f600, %f2991, %f2976; - sub.f32 %f2993, %f2903, %f2992; - fma.rn.f32 %f2994, %f601, %f2991, %f2979; - fma.rn.f32 %f2995, %f613, %f2993, %f2994; - sub.f32 %f2996, %f2859, %f2995; - div.rn.f32 %f2997, %f2996, %f2856; - fma.rn.f32 %f2998, %f623, %f2997, 0f00000000; - sub.f32 %f2999, %f2993, %f2998; - mul.f32 %f3000, %f612, %f2999; - sub.f32 %f3001, %f2859, %f2992; - fma.rn.f32 %f3002, %f613, %f3001, %f2994; - sub.f32 %f3003, %f2903, %f3002; - div.rn.f32 %f3004, %f3003, %f2856; - cvta.to.global.u64 %rd114, %rd132; - mul.wide.s32 %rd115, %r305, 4; - add.s64 %rd116, %rd114, %rd115; - st.global.f32 [%rd116], %f3343; - shl.b32 %r293, %r297, 2; - cvt.s64.s32 %rd117, %r293; - add.s64 %rd118, %rd116, %rd117; - st.global.f32 [%rd118], %f3342; - add.s64 %rd119, %rd118, %rd117; - st.global.f32 [%rd119], %f3341; - add.s64 %rd120, %rd119, %rd117; - st.global.f32 [%rd120], %f3246; - add.s64 %rd121, %rd120, %rd117; - st.global.f32 [%rd121], %f3339; - add.s64 %rd122, %rd121, %rd117; - st.global.f32 [%rd122], %f3338; - cvta.to.global.u64 %rd123, %rd133; - add.s64 %rd124, %rd123, %rd115; - st.global.f32 [%rd124], %f2905; - add.s64 %rd125, %rd124, %rd117; - st.global.f32 [%rd125], %f2944; - add.s64 %rd126, %rd125, %rd117; - st.global.f32 [%rd126], %f2972; - add.s64 %rd127, %rd126, %rd117; - st.global.f32 [%rd127], %f2990; - add.s64 %rd128, %rd127, %rd117; - st.global.f32 [%rd128], %f3000; - add.s64 %rd129, %rd128, %rd117; - st.global.f32 [%rd129], %f3004; - cvta.to.global.u64 %rd130, %rd134; - add.s64 %rd131, %rd130, %rd115; - st.global.f32 [%rd131], %f3367; - -BB7_228: + mul.f32 %f2808, %f2659, %f2807; + fma.rn.f32 %f2809, %f2649, %f2808, 0f00000000; + fma.rn.f32 %f2810, %f2670, %f2805, %f2809; + sub.f32 %f2811, %f2797, %f2810; + mul.f32 %f2812, %f2633, %f2811; + sub.f32 %f2813, %f1569, %f2796; + fma.rn.f32 %f2814, %f2635, %f2813, %f2798; + sub.f32 %f2815, %f2581, %f2814; + fma.rn.f32 %f2816, %f2640, %f2813, %f2801; + fma.rn.f32 %f2817, %f2661, %f2815, %f2816; + sub.f32 %f2818, %f1569, %f2817; + div.rn.f32 %f2819, %f2818, %f2681; + fma.rn.f32 %f2820, %f2675, %f2819, 0f00000000; + sub.f32 %f2821, %f2815, %f2820; + mul.f32 %f2822, %f2659, %f2821; + sub.f32 %f2823, %f1569, %f2814; + fma.rn.f32 %f2824, %f2661, %f2823, %f2816; + sub.f32 %f2825, %f2581, %f2824; + div.rn.f32 %f2826, %f2825, %f2681; + cvta.to.global.u64 %rd34, %rd54; + mul.wide.s32 %rd35, %r1, 4; + add.s64 %rd36, %rd34, %rd35; + st.global.f32 [%rd36], %f3069; + add.s32 %r827, %r1, %r833; + mul.wide.s32 %rd37, %r833, 4; + add.s64 %rd38, %rd36, %rd37; + st.global.f32 [%rd38], %f3068; + add.s32 %r828, %r827, %r833; + mul.wide.s32 %rd39, %r828, 4; + add.s64 %rd40, %rd34, %rd39; + st.global.f32 [%rd40], %f3067; + shl.b32 %r829, %r833, 2; + cvt.s64.s32 %rd41, %r829; + add.s64 %rd42, %rd40, %rd41; + st.global.f32 [%rd42], %f3066; + add.s64 %rd43, %rd42, %rd41; + st.global.f32 [%rd43], %f3065; + add.s64 %rd44, %rd43, %rd41; + st.global.f32 [%rd44], %f3064; + cvta.to.global.u64 %rd45, %rd55; + add.s64 %rd46, %rd45, %rd35; + st.global.f32 [%rd46], %f2727; + add.s64 %rd47, %rd46, %rd37; + st.global.f32 [%rd47], %f2766; + add.s64 %rd48, %rd45, %rd39; + st.global.f32 [%rd48], %f2794; + add.s64 %rd49, %rd48, %rd41; + st.global.f32 [%rd49], %f2812; + add.s64 %rd50, %rd49, %rd41; + st.global.f32 [%rd50], %f2822; + add.s64 %rd51, %rd50, %rd41; + st.global.f32 [%rd51], %f2826; + cvta.to.global.u64 %rd52, %rd56; + add.s64 %rd53, %rd52, %rd35; + st.global.f32 [%rd53], %f3141; + +$L__BB7_463: ret; -} +} // .globl _Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i .visible .entry _Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i( .param .u64 _Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_0, @@ -30041,3233 +50992,3879 @@ BB7_228: .local .align 16 .b8 __local_depot8[192]; .reg .b64 %SP; .reg .b64 %SPL; - .reg .pred %p<258>; - .reg .f32 %f<2298>; - .reg .b32 %r<435>; - .reg .f64 %fd<4>; - .reg .b64 %rd<167>; + .reg .pred %p<408>; + .reg .f32 %f<1819>; + .reg .b32 %r<662>; + .reg .f64 %fd<399>; + .reg .b64 %rd<144>; mov.u64 %SPL, __local_depot8; - ld.param.u64 %rd31, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_0]; - ld.param.f32 %f472, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_1]; - ld.param.u32 %r102, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_2]; - ld.param.u32 %r103, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_3]; - ld.param.u32 %r104, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_4]; - ld.param.f32 %f473, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_5]; - ld.param.u32 %r105, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_11]; - cvta.to.global.u64 %rd1, %rd31; + ld.param.u64 %rd51, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_0]; + ld.param.f32 %f378, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_1]; + ld.param.u32 %r151, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_2]; + ld.param.u32 %r153, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_4]; + ld.param.f32 %f379, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_5]; + ld.param.u32 %r154, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_11]; + cvta.to.global.u64 %rd1, %rd51; add.u64 %rd2, %SPL, 0; add.u64 %rd3, %SPL, 32; add.u64 %rd4, %SPL, 64; add.u64 %rd5, %SPL, 96; add.u64 %rd6, %SPL, 128; add.u64 %rd7, %SPL, 160; - mov.f32 %f1, 0f00000000; - st.local.v4.f32 [%rd2], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd2+16], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd3], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd3+16], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd4], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd4+16], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd5], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd5+16], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd6], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd6+16], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd7], {%f1, %f1, %f1, %f1}; - st.local.v4.f32 [%rd7+16], {%f1, %f1, %f1, %f1}; - mov.u32 %r106, %ntid.x; - mov.u32 %r107, %ctaid.x; - mov.u32 %r108, %tid.x; - mad.lo.s32 %r109, %r106, %r107, %r108; - mul.lo.s32 %r110, %r102, %r102; - mul.lo.s32 %r1, %r109, %r110; - setp.ge.s32 %p7, %r109, %r105; - @%p7 bra BB8_238; - - setp.lt.s32 %p8, %r102, 1; - mov.f32 %f2, %f1; - mov.f32 %f3, %f1; - @%p8 bra BB8_15; - - and.b32 %r2, %r102, 3; - shl.b32 %r3, %r102, 2; - mov.f32 %f481, 0f00000000; - mov.u32 %r111, 0; - mov.u32 %r387, %r111; - mov.f32 %f1, %f481; - mov.f32 %f2, %f481; - mov.f32 %f3, %f481; - -BB8_3: - cvt.rn.f32.s32 %f4, %r387; - setp.eq.s32 %p9, %r2, 0; - @%p9 bra BB8_4; - - setp.eq.s32 %p10, %r2, 1; - @%p10 bra BB8_6; - bra.uni BB8_7; - -BB8_6: - mov.u32 %r389, %r111; - bra.uni BB8_11; - -BB8_4: - mov.u32 %r391, %r111; - mov.f32 %f2128, %f1; - mov.f32 %f2129, %f2; - mov.f32 %f2130, %f3; - mov.f32 %f1, %f481; - mov.f32 %f2, %f481; - mov.f32 %f3, %f481; - bra.uni BB8_12; - -BB8_7: - setp.eq.s32 %p11, %r2, 2; - @%p11 bra BB8_8; - bra.uni BB8_9; - -BB8_8: - mov.u32 %r388, %r111; - bra.uni BB8_10; - -BB8_9: - add.s32 %r116, %r387, %r1; - mul.wide.s32 %rd38, %r116, 4; - add.s64 %rd39, %rd1, %rd38; - ld.global.f32 %f485, [%rd39]; - fma.rn.f32 %f3, %f4, %f485, %f3; - fma.rn.f32 %f2, %f485, 0f00000000, %f2; - add.f32 %f1, %f1, %f485; - mov.u32 %r388, 1; - -BB8_10: - neg.s32 %r117, %r388; - and.b32 %r118, %r117, %r102; - add.s32 %r119, %r118, %r387; - add.s32 %r120, %r119, %r1; - mul.wide.s32 %rd40, %r120, 4; - add.s64 %rd41, %rd1, %rd40; - ld.global.f32 %f486, [%rd41]; - fma.rn.f32 %f3, %f4, %f486, %f3; - cvt.rn.f32.s32 %f487, %r388; - fma.rn.f32 %f2, %f487, %f486, %f2; - add.f32 %f1, %f1, %f486; - add.s32 %r389, %r388, 1; - -BB8_11: - mad.lo.s32 %r121, %r389, %r102, %r387; - add.s32 %r122, %r121, %r1; - mul.wide.s32 %rd42, %r122, 4; - add.s64 %rd43, %rd1, %rd42; - ld.global.f32 %f488, [%rd43]; - fma.rn.f32 %f2130, %f4, %f488, %f3; - cvt.rn.f32.s32 %f489, %r389; - fma.rn.f32 %f2129, %f489, %f488, %f2; - add.f32 %f2128, %f1, %f488; - add.s32 %r391, %r389, 1; - mov.f32 %f1, %f2128; - mov.f32 %f2, %f2129; - mov.f32 %f3, %f2130; - -BB8_12: - setp.lt.u32 %p12, %r102, 4; - @%p12 bra BB8_14; - -BB8_13: - mad.lo.s32 %r123, %r391, %r102, %r387; - add.s32 %r124, %r123, %r1; - mul.wide.s32 %rd44, %r124, 4; - add.s64 %rd45, %rd1, %rd44; - ld.global.f32 %f490, [%rd45]; - fma.rn.f32 %f491, %f4, %f490, %f2130; - cvt.rn.f32.s32 %f492, %r391; - fma.rn.f32 %f493, %f492, %f490, %f2129; - add.f32 %f494, %f2128, %f490; - cvt.s64.s32 %rd46, %r3; - add.s64 %rd47, %rd45, %rd46; - ld.global.f32 %f495, [%rd47]; - fma.rn.f32 %f496, %f4, %f495, %f491; - add.s32 %r125, %r391, 1; - cvt.rn.f32.s32 %f497, %r125; - fma.rn.f32 %f498, %f497, %f495, %f493; - add.f32 %f499, %f494, %f495; - add.s64 %rd48, %rd47, %rd46; - ld.global.f32 %f500, [%rd48]; - fma.rn.f32 %f501, %f4, %f500, %f496; - add.s32 %r126, %r391, 2; - cvt.rn.f32.s32 %f502, %r126; - fma.rn.f32 %f503, %f502, %f500, %f498; - add.f32 %f504, %f499, %f500; - add.s64 %rd49, %rd48, %rd46; - ld.global.f32 %f505, [%rd49]; - fma.rn.f32 %f2130, %f4, %f505, %f501; - add.s32 %r127, %r391, 3; - cvt.rn.f32.s32 %f506, %r127; - fma.rn.f32 %f2129, %f506, %f505, %f503; - add.f32 %f2128, %f504, %f505; - add.s32 %r391, %r391, 4; - setp.lt.s32 %p13, %r391, %r102; - mov.f32 %f1, %f2128; - mov.f32 %f2, %f2129; - mov.f32 %f3, %f2130; - @%p13 bra BB8_13; - -BB8_14: - add.s32 %r387, %r387, 1; - setp.lt.s32 %p14, %r387, %r102; - @%p14 bra BB8_3; - -BB8_15: - div.rn.f32 %f2178, %f3, %f1; - div.rn.f32 %f2179, %f2, %f1; - mov.f32 %f508, 0f3F000000; - div.rn.f32 %f509, %f508, %f472; - div.rn.f32 %f40, %f509, %f472; - mov.f32 %f2138, 0f51BA43B7; - @%p8 bra BB8_34; - - and.b32 %r13, %r102, 3; - mov.f32 %f2138, 0f51BA43B7; - mov.u32 %r128, 0; - mov.u32 %r392, %r128; - -BB8_17: - mov.u32 %r393, %r128; - -BB8_18: - cvt.rn.f32.s32 %f513, %r393; - mul.f32 %f514, %f513, %f513; - mul.f32 %f43, %f40, %f514; - neg.f32 %f44, %f43; - mul.f32 %f45, %f43, 0fBFB8AA3B; - mov.f32 %f512, 0f00000000; - mov.f32 %f2155, %f512; - mov.f32 %f2156, %f512; - mov.u32 %r394, %r128; - -BB8_19: - sub.s32 %r132, %r394, %r392; - cvt.rn.f32.s32 %f48, %r132; - mul.lo.s32 %r17, %r394, %r102; - setp.eq.s32 %p16, %r13, 0; - @%p16 bra BB8_20; - - setp.eq.s32 %p17, %r13, 1; - @%p17 bra BB8_24; - bra.uni BB8_22; - -BB8_24: - mul.f32 %f529, %f48, %f48; - mul.f32 %f2146, %f40, %f529; - neg.f32 %f530, %f2146; - mul.f32 %f531, %f2146, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f532, %f531; - mov.f32 %f533, 0fBF317200; - fma.rn.f32 %f534, %f532, %f533, %f530; - mov.f32 %f535, 0fB5BFBE8E; - fma.rn.f32 %f536, %f532, %f535, %f534; - mul.f32 %f537, %f536, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f538, %f537; - add.f32 %f539, %f532, 0f00000000; - ex2.approx.f32 %f540, %f539; - mul.f32 %f2145, %f538, %f540; - mov.u32 %r396, 0; - bra.uni BB8_27; - -BB8_20: - mov.f32 %f2149, %f2155; - mov.f32 %f2150, %f2156; - mov.u32 %r398, %r128; - mov.f32 %f2155, %f512; - mov.f32 %f2156, %f512; - bra.uni BB8_28; - -BB8_22: - setp.ne.s32 %p18, %r13, 2; - @%p18 bra BB8_25; - - mul.f32 %f517, %f48, %f48; - mul.f32 %f2146, %f40, %f517; - neg.f32 %f518, %f2146; - mul.f32 %f519, %f2146, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f520, %f519; - mov.f32 %f521, 0fBF317200; - fma.rn.f32 %f522, %f520, %f521, %f518; - mov.f32 %f523, 0fB5BFBE8E; - fma.rn.f32 %f524, %f520, %f523, %f522; - mul.f32 %f525, %f524, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f526, %f525; - add.f32 %f527, %f520, 0f00000000; - ex2.approx.f32 %f528, %f527; - mul.f32 %f2145, %f526, %f528; - mov.u32 %r395, 0; - bra.uni BB8_26; - -BB8_25: - setp.lt.f32 %p19, %f43, 0fC2D20000; - mul.f32 %f541, %f48, %f48; - mul.f32 %f2146, %f40, %f541; - neg.f32 %f542, %f2146; - mul.f32 %f543, %f2146, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f544, %f543; - mov.f32 %f545, 0fBF317200; - fma.rn.f32 %f546, %f544, %f545, %f542; - mov.f32 %f547, 0fB5BFBE8E; - fma.rn.f32 %f548, %f544, %f547, %f546; - mul.f32 %f549, %f548, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f550, %f549; - add.f32 %f551, %f544, 0f00000000; - ex2.approx.f32 %f552, %f551; - mul.f32 %f2145, %f550, %f552; - setp.gt.f32 %p20, %f2146, 0f42D20000; - selp.f32 %f553, 0f00000000, %f2145, %p20; - setp.lt.f32 %p21, %f2146, 0fC2D20000; - selp.f32 %f554, 0f7F800000, %f553, %p21; - cvt.rzi.f32.f32 %f555, %f45; - fma.rn.f32 %f556, %f555, %f545, %f44; - fma.rn.f32 %f557, %f555, %f547, %f556; - mul.f32 %f558, %f557, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f559, %f558; - add.f32 %f560, %f555, 0f00000000; - ex2.approx.f32 %f561, %f560; - mul.f32 %f562, %f559, %f561; - setp.gt.f32 %p22, %f43, 0f42D20000; - selp.f32 %f563, 0f00000000, %f562, %p22; - selp.f32 %f564, 0f7F800000, %f563, %p19; - mul.f32 %f565, %f554, %f564; - add.s32 %r136, %r17, %r1; - mul.wide.s32 %rd50, %r136, 4; - add.s64 %rd51, %rd1, %rd50; - ld.global.f32 %f566, [%rd51]; - fma.rn.f32 %f2156, %f566, %f565, %f2156; - add.f32 %f2155, %f2155, %f565; - mov.u32 %r395, 1; - -BB8_26: - sub.s32 %r137, %r393, %r395; - cvt.rn.f32.s32 %f567, %r137; - mul.f32 %f568, %f567, %f567; - setp.gt.f32 %p23, %f2146, 0f42D20000; - selp.f32 %f569, 0f00000000, %f2145, %p23; - setp.lt.f32 %p24, %f2146, 0fC2D20000; - selp.f32 %f570, 0f7F800000, %f569, %p24; - mul.f32 %f571, %f40, %f568; - neg.f32 %f572, %f571; - mul.f32 %f573, %f571, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f574, %f573; - mov.f32 %f575, 0fBF317200; - fma.rn.f32 %f576, %f574, %f575, %f572; - mov.f32 %f577, 0fB5BFBE8E; - fma.rn.f32 %f578, %f574, %f577, %f576; - mul.f32 %f579, %f578, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f580, %f579; - add.f32 %f581, %f574, 0f00000000; - ex2.approx.f32 %f582, %f581; - mul.f32 %f583, %f580, %f582; - setp.gt.f32 %p25, %f571, 0f42D20000; - selp.f32 %f584, 0f00000000, %f583, %p25; - setp.lt.f32 %p26, %f571, 0fC2D20000; - selp.f32 %f585, 0f7F800000, %f584, %p26; - mul.f32 %f586, %f570, %f585; - add.s32 %r138, %r395, %r17; - add.s32 %r139, %r138, %r1; - mul.wide.s32 %rd52, %r139, 4; - add.s64 %rd53, %rd1, %rd52; - ld.global.f32 %f587, [%rd53]; - fma.rn.f32 %f2156, %f587, %f586, %f2156; - add.f32 %f2155, %f2155, %f586; - add.s32 %r396, %r395, 1; - -BB8_27: - sub.s32 %r140, %r393, %r396; - cvt.rn.f32.s32 %f588, %r140; - mul.f32 %f589, %f588, %f588; - setp.gt.f32 %p27, %f2146, 0f42D20000; - selp.f32 %f590, 0f00000000, %f2145, %p27; - setp.lt.f32 %p28, %f2146, 0fC2D20000; - selp.f32 %f591, 0f7F800000, %f590, %p28; - mul.f32 %f592, %f40, %f589; - neg.f32 %f593, %f592; - mul.f32 %f594, %f592, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f595, %f594; - mov.f32 %f596, 0fBF317200; - fma.rn.f32 %f597, %f595, %f596, %f593; - mov.f32 %f598, 0fB5BFBE8E; - fma.rn.f32 %f599, %f595, %f598, %f597; - mul.f32 %f600, %f599, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f601, %f600; - add.f32 %f602, %f595, 0f00000000; - ex2.approx.f32 %f603, %f602; - mul.f32 %f604, %f601, %f603; - setp.gt.f32 %p29, %f592, 0f42D20000; - selp.f32 %f605, 0f00000000, %f604, %p29; - setp.lt.f32 %p30, %f592, 0fC2D20000; - selp.f32 %f606, 0f7F800000, %f605, %p30; - mul.f32 %f607, %f591, %f606; - add.s32 %r141, %r396, %r17; - add.s32 %r142, %r141, %r1; - mul.wide.s32 %rd54, %r142, 4; - add.s64 %rd55, %rd1, %rd54; - ld.global.f32 %f608, [%rd55]; - fma.rn.f32 %f2150, %f608, %f607, %f2156; - add.f32 %f2149, %f2155, %f607; - add.s32 %r398, %r396, 1; - mov.f32 %f2155, %f2149; - mov.f32 %f2156, %f2150; - -BB8_28: - setp.lt.u32 %p31, %r102, 4; - @%p31 bra BB8_31; - - mul.f32 %f609, %f48, %f48; - mul.f32 %f610, %f40, %f609; - neg.f32 %f611, %f610; - mul.f32 %f612, %f610, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f613, %f612; - mov.f32 %f614, 0fBF317200; - fma.rn.f32 %f615, %f613, %f614, %f611; - mov.f32 %f616, 0fB5BFBE8E; - fma.rn.f32 %f617, %f613, %f616, %f615; - mul.f32 %f618, %f617, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f619, %f618; - add.f32 %f620, %f613, 0f00000000; - ex2.approx.f32 %f621, %f620; - mul.f32 %f622, %f619, %f621; - setp.gt.f32 %p32, %f610, 0f42D20000; - selp.f32 %f623, 0f00000000, %f622, %p32; - setp.lt.f32 %p33, %f610, 0fC2D20000; - selp.f32 %f73, 0f7F800000, %f623, %p33; - mov.f32 %f2155, %f2149; - mov.f32 %f2156, %f2150; - -BB8_30: - sub.s32 %r143, %r393, %r398; - cvt.rn.f32.s32 %f624, %r143; - mul.f32 %f625, %f624, %f624; - mul.f32 %f626, %f40, %f625; - neg.f32 %f627, %f626; - mul.f32 %f628, %f626, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f629, %f628; - fma.rn.f32 %f631, %f629, %f614, %f627; - fma.rn.f32 %f633, %f629, %f616, %f631; - mul.f32 %f634, %f633, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f635, %f634; - add.f32 %f636, %f629, 0f00000000; - ex2.approx.f32 %f637, %f636; - mul.f32 %f638, %f635, %f637; - setp.gt.f32 %p34, %f626, 0f42D20000; - selp.f32 %f639, 0f00000000, %f638, %p34; - setp.lt.f32 %p35, %f626, 0fC2D20000; - selp.f32 %f640, 0f7F800000, %f639, %p35; - mul.f32 %f641, %f73, %f640; - add.s32 %r144, %r398, %r17; - add.s32 %r145, %r144, %r1; - mul.wide.s32 %rd56, %r145, 4; - add.s64 %rd57, %rd1, %rd56; - ld.global.f32 %f642, [%rd57]; - fma.rn.f32 %f643, %f642, %f641, %f2156; - add.f32 %f644, %f2155, %f641; - add.s32 %r146, %r398, 1; - sub.s32 %r147, %r393, %r146; - cvt.rn.f32.s32 %f645, %r147; - mul.f32 %f646, %f645, %f645; - mul.f32 %f647, %f40, %f646; - neg.f32 %f648, %f647; - mul.f32 %f649, %f647, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f650, %f649; - fma.rn.f32 %f651, %f650, %f614, %f648; - fma.rn.f32 %f652, %f650, %f616, %f651; - mul.f32 %f653, %f652, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f654, %f653; - add.f32 %f655, %f650, 0f00000000; - ex2.approx.f32 %f656, %f655; - mul.f32 %f657, %f654, %f656; - setp.gt.f32 %p36, %f647, 0f42D20000; - selp.f32 %f658, 0f00000000, %f657, %p36; - setp.lt.f32 %p37, %f647, 0fC2D20000; - selp.f32 %f659, 0f7F800000, %f658, %p37; - mul.f32 %f660, %f73, %f659; - ld.global.f32 %f661, [%rd57+4]; - fma.rn.f32 %f662, %f661, %f660, %f643; - add.f32 %f663, %f644, %f660; - add.s32 %r148, %r398, 2; - sub.s32 %r149, %r393, %r148; - cvt.rn.f32.s32 %f664, %r149; + mov.f32 %f1683, 0f00000000; + st.local.v4.f32 [%rd2], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd2+16], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd3], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd3+16], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd4], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd4+16], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd5], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd5+16], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd6], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd6+16], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd7], {%f1683, %f1683, %f1683, %f1683}; + st.local.v4.f32 [%rd7+16], {%f1683, %f1683, %f1683, %f1683}; + mov.u32 %r155, %ntid.x; + mov.u32 %r156, %ctaid.x; + mov.u32 %r157, %tid.x; + mad.lo.s32 %r158, %r155, %r156, %r157; + mul.lo.s32 %r159, %r151, %r151; + mul.lo.s32 %r1, %r158, %r159; + setp.ge.s32 %p17, %r158, %r154; + @%p17 bra $L__BB8_303; + + setp.lt.s32 %p18, %r151, 1; + mov.f32 %f1684, %f1683; + mov.f32 %f1685, %f1683; + @%p18 bra $L__BB8_11; + + add.s32 %r2, %r151, -1; + and.b32 %r3, %r151, 3; + sub.s32 %r4, %r151, %r3; + shl.b32 %r5, %r151, 2; + mov.u32 %r160, 0; + setp.lt.u32 %p19, %r2, 3; + setp.eq.s32 %p21, %r3, 0; + setp.eq.s32 %p22, %r3, 1; + setp.eq.s32 %p23, %r3, 2; + cvt.s64.s32 %rd60, %r5; + mov.u32 %r618, %r160; + mov.f32 %f1685, %f1683; + mov.f32 %f1684, %f1683; + +$L__BB8_3: + cvt.rn.f32.s32 %f4, %r618; + mov.u32 %r621, %r160; + @%p19 bra $L__BB8_6; + + mov.u32 %r621, %r160; + mov.u32 %r620, %r4; + +$L__BB8_5: + mad.lo.s32 %r163, %r621, %r151, %r618; + add.s32 %r164, %r163, %r1; + mul.wide.s32 %rd58, %r164, 4; + add.s64 %rd59, %rd1, %rd58; + ld.global.f32 %f389, [%rd59]; + fma.rn.f32 %f390, %f389, %f4, %f1683; + cvt.rn.f32.s32 %f391, %r621; + fma.rn.f32 %f392, %f389, %f391, %f1684; + add.f32 %f393, %f1685, %f389; + add.s64 %rd61, %rd59, %rd60; + ld.global.f32 %f394, [%rd61]; + fma.rn.f32 %f395, %f394, %f4, %f390; + add.s32 %r165, %r621, 1; + cvt.rn.f32.s32 %f396, %r165; + fma.rn.f32 %f397, %f394, %f396, %f392; + add.f32 %f398, %f393, %f394; + add.s64 %rd62, %rd61, %rd60; + ld.global.f32 %f399, [%rd62]; + fma.rn.f32 %f400, %f399, %f4, %f395; + add.s32 %r166, %r621, 2; + cvt.rn.f32.s32 %f401, %r166; + fma.rn.f32 %f402, %f399, %f401, %f397; + add.f32 %f403, %f398, %f399; + add.s64 %rd63, %rd62, %rd60; + ld.global.f32 %f404, [%rd63]; + fma.rn.f32 %f1683, %f404, %f4, %f400; + add.s32 %r167, %r621, 3; + cvt.rn.f32.s32 %f405, %r167; + fma.rn.f32 %f1684, %f404, %f405, %f402; + add.f32 %f1685, %f403, %f404; + add.s32 %r621, %r621, 4; + add.s32 %r620, %r620, -4; + setp.ne.s32 %p20, %r620, 0; + @%p20 bra $L__BB8_5; + +$L__BB8_6: + @%p21 bra $L__BB8_10; + + mad.lo.s32 %r12, %r621, %r151, %r618; + add.s32 %r168, %r12, %r1; + mul.wide.s32 %rd64, %r168, 4; + add.s64 %rd65, %rd1, %rd64; + ld.global.f32 %f406, [%rd65]; + fma.rn.f32 %f1683, %f406, %f4, %f1683; + cvt.rn.f32.s32 %f407, %r621; + fma.rn.f32 %f1684, %f406, %f407, %f1684; + add.f32 %f1685, %f1685, %f406; + @%p22 bra $L__BB8_10; + + add.s32 %r13, %r12, %r151; + add.s32 %r169, %r13, %r1; + mul.wide.s32 %rd66, %r169, 4; + add.s64 %rd67, %rd1, %rd66; + ld.global.f32 %f408, [%rd67]; + fma.rn.f32 %f1683, %f408, %f4, %f1683; + add.s32 %r170, %r621, 1; + cvt.rn.f32.s32 %f409, %r170; + fma.rn.f32 %f1684, %f408, %f409, %f1684; + add.f32 %f1685, %f1685, %f408; + @%p23 bra $L__BB8_10; + + add.s32 %r171, %r621, 2; + add.s32 %r172, %r13, %r151; + add.s32 %r173, %r172, %r1; + mul.wide.s32 %rd68, %r173, 4; + add.s64 %rd69, %rd1, %rd68; + ld.global.f32 %f410, [%rd69]; + fma.rn.f32 %f1683, %f410, %f4, %f1683; + cvt.rn.f32.s32 %f411, %r171; + fma.rn.f32 %f1684, %f410, %f411, %f1684; + add.f32 %f1685, %f1685, %f410; + +$L__BB8_10: + add.s32 %r618, %r618, 1; + setp.lt.s32 %p24, %r618, %r151; + @%p24 bra $L__BB8_3; + +$L__BB8_11: + div.rn.f32 %f1774, %f1683, %f1685; + div.rn.f32 %f1773, %f1684, %f1685; + mov.f32 %f413, 0f3F000000; + div.rn.f32 %f414, %f413, %f378; + div.rn.f32 %f34, %f414, %f378; + mov.f32 %f1690, 0f51BA43B7; + @%p18 bra $L__BB8_51; + + cvt.f64.f32 %fd1, %f34; + mov.f64 %fd115, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r15}, %fd115; + } + and.b32 %r16, %r15, 2146435072; + and.b32 %r17, %r15, 2147483647; + setp.gt.s32 %p26, %r15, -1; + selp.b32 %r18, 2146435072, 0, %p26; + mov.u32 %r174, 0; + or.b32 %r19, %r18, -2147483648; + setp.eq.s32 %p28, %r16, 1062207488; + setp.lt.s32 %p29, %r15, 0; + setp.ne.s32 %p34, %r17, 1071644672; + setp.eq.s32 %p61, %r17, 2146435072; + mov.u32 %r622, %r174; + +$L__BB8_13: + mov.u32 %r623, %r174; + +$L__BB8_14: + mov.f32 %f1693, 0f00000000; + mov.f32 %f1694, %f1693; + mov.u32 %r624, %r174; + +$L__BB8_15: + sub.s32 %r23, %r624, %r622; + cvt.rn.f32.s32 %f418, %r23; + cvt.f64.f32 %fd2, %f418; + { + .reg .b32 %temp; + mov.b64 {%temp, %r24}, %fd2; + } + abs.f64 %fd116, %fd2; + { // callseq 174, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd116; + .param .b64 param1; + st.param.f64 [param1+0], %fd115; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd3, [retval0+0]; + } // callseq 174 + setp.lt.s32 %p27, %r24, 0; + and.pred %p1, %p27, %p28; + selp.b32 %r178, %r24, 0, %p28; + or.b32 %r179, %r178, 2146435072; + selp.b32 %r25, %r179, %r178, %p29; + add.f64 %fd4, %fd2, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r180}, %fd4; + } + and.b32 %r26, %r180, 2146435072; + setp.ne.s32 %p30, %r26, 2146435072; + setp.gtu.f64 %p31, %fd116, 0d7FF0000000000000; + setp.gt.f64 %p32, %fd116, 0d3FF0000000000000; + selp.b32 %r181, 2146435072, 0, %p32; + xor.b32 %r182, %r181, 2146435072; + selp.b32 %r183, %r182, %r181, %p29; + setp.eq.s32 %p33, %r23, -1; + selp.b32 %r27, 1072693248, %r183, %p33; + and.b32 %r28, %r24, 2147483647; + and.pred %p35, %p34, %p1; + selp.b32 %r29, %r19, %r18, %p35; + mul.lo.s32 %r30, %r624, %r151; + or.pred %p2, %p30, %p31; + mov.u32 %r625, %r174; + +$L__BB8_16: + not.pred %p36, %p1; + mov.f64 %fd372, %fd3; + @%p36 bra $L__BB8_18; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r184}, %fd3; + } + xor.b32 %r185, %r184, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r186, %temp}, %fd3; + } + mov.b64 %fd372, {%r186, %r185}; + +$L__BB8_18: + setp.eq.s32 %p37, %r23, 0; + @%p37 bra $L__BB8_22; + + setp.gt.s32 %p38, %r24, -1; + @%p38 bra $L__BB8_23; + + cvt.rzi.f64.f64 %fd119, %fd115; + setp.eq.f64 %p39, %fd119, 0d4000000000000000; + @%p39 bra $L__BB8_23; + + mov.f64 %fd372, 0dFFF8000000000000; + bra.uni $L__BB8_23; + +$L__BB8_22: + mov.u32 %r187, 0; + mov.b64 %fd372, {%r187, %r25}; + +$L__BB8_23: + selp.f64 %fd373, %fd372, %fd4, %p30; + @%p2 bra $L__BB8_28; + + { + .reg .b32 %temp; + mov.b64 {%r188, %temp}, %fd115; + } + setp.eq.s32 %p42, %r188, 0; + and.pred %p43, %p61, %p42; + @%p43 bra $L__BB8_27; + bra.uni $L__BB8_25; + +$L__BB8_27: + mov.u32 %r191, 0; + mov.b64 %fd373, {%r191, %r27}; + bra.uni $L__BB8_28; + +$L__BB8_25: + setp.ne.s32 %p44, %r28, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r189, %temp}, %fd2; + } + setp.ne.s32 %p45, %r189, 0; + or.pred %p46, %p44, %p45; + mov.f64 %fd373, %fd372; + @%p46 bra $L__BB8_28; + + mov.u32 %r190, 0; + mov.b64 %fd373, {%r190, %r29}; + +$L__BB8_28: + setp.eq.s32 %p47, %r23, 1; + selp.f64 %fd122, 0d3FF0000000000000, %fd373, %p47; + mov.f64 %fd123, 0d3FF0000000000000; + mul.f64 %fd13, %fd122, %fd1; + neg.f64 %fd124, %fd13; + mov.f64 %fd125, 0d4338000000000000; + mov.f64 %fd126, 0d3FF71547652B82FE; + fma.rn.f64 %fd127, %fd124, %fd126, %fd125; + { + .reg .b32 %temp; + mov.b64 {%r32, %temp}, %fd127; + } + mov.f64 %fd128, 0dC338000000000000; + add.rn.f64 %fd129, %fd127, %fd128; + mov.f64 %fd130, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd131, %fd129, %fd130, %fd124; + mov.f64 %fd132, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd133, %fd129, %fd132, %fd131; + mov.f64 %fd134, 0d3E928AF3FCA213EA; + mov.f64 %fd135, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd136, %fd135, %fd133, %fd134; + mov.f64 %fd137, 0d3EC71DEE62401315; + fma.rn.f64 %fd138, %fd136, %fd133, %fd137; + mov.f64 %fd139, 0d3EFA01997C89EB71; + fma.rn.f64 %fd140, %fd138, %fd133, %fd139; + mov.f64 %fd141, 0d3F2A01A014761F65; + fma.rn.f64 %fd142, %fd140, %fd133, %fd141; + mov.f64 %fd143, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd144, %fd142, %fd133, %fd143; + mov.f64 %fd145, 0d3F81111111122322; + fma.rn.f64 %fd146, %fd144, %fd133, %fd145; + mov.f64 %fd147, 0d3FA55555555502A1; + fma.rn.f64 %fd148, %fd146, %fd133, %fd147; + mov.f64 %fd149, 0d3FC5555555555511; + fma.rn.f64 %fd150, %fd148, %fd133, %fd149; + mov.f64 %fd151, 0d3FE000000000000B; + fma.rn.f64 %fd152, %fd150, %fd133, %fd151; + fma.rn.f64 %fd153, %fd152, %fd133, %fd123; + fma.rn.f64 %fd154, %fd153, %fd133, %fd123; + { + .reg .b32 %temp; + mov.b64 {%r33, %temp}, %fd154; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r34}, %fd154; + } + shl.b32 %r192, %r32, 20; + add.s32 %r193, %r34, %r192; + mov.b64 %fd374, {%r33, %r193}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r194}, %fd124; + } + mov.b32 %f419, %r194; + abs.f32 %f41, %f419; + setp.lt.f32 %p48, %f41, 0f4086232B; + @%p48 bra $L__BB8_31; + + setp.gt.f64 %p49, %fd13, 0d8000000000000000; + mov.f64 %fd155, 0d7FF0000000000000; + sub.f64 %fd156, %fd155, %fd13; + selp.f64 %fd374, 0d0000000000000000, %fd156, %p49; + setp.geu.f32 %p50, %f41, 0f40874800; + @%p50 bra $L__BB8_31; + + shr.u32 %r195, %r32, 31; + add.s32 %r196, %r32, %r195; + shr.s32 %r197, %r196, 1; + shl.b32 %r198, %r197, 20; + add.s32 %r199, %r34, %r198; + mov.b64 %fd157, {%r33, %r199}; + sub.s32 %r200, %r32, %r197; + shl.b32 %r201, %r200, 20; + add.s32 %r202, %r201, 1072693248; + mov.u32 %r203, 0; + mov.b64 %fd158, {%r203, %r202}; + mul.f64 %fd374, %fd157, %fd158; + +$L__BB8_31: + sub.s32 %r35, %r623, %r625; + cvt.rn.f32.s32 %f420, %r35; + cvt.f64.f32 %fd18, %f420; + { + .reg .b32 %temp; + mov.b64 {%temp, %r36}, %fd18; + } + abs.f64 %fd19, %fd18; + { // callseq 175, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd19; + .param .b64 param1; + st.param.f64 [param1+0], %fd115; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd376, [retval0+0]; + } // callseq 175 + setp.lt.s32 %p51, %r36, 0; + and.pred %p3, %p51, %p28; + not.pred %p53, %p3; + @%p53 bra $L__BB8_33; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r204}, %fd376; + } + xor.b32 %r205, %r204, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r206, %temp}, %fd376; + } + mov.b64 %fd376, {%r206, %r205}; + +$L__BB8_33: + setp.eq.s32 %p54, %r35, 0; + @%p54 bra $L__BB8_37; + + setp.gt.s32 %p55, %r36, -1; + @%p55 bra $L__BB8_38; + + cvt.rzi.f64.f64 %fd161, %fd115; + setp.eq.f64 %p56, %fd161, 0d4000000000000000; + @%p56 bra $L__BB8_38; + + mov.f64 %fd376, 0dFFF8000000000000; + bra.uni $L__BB8_38; + +$L__BB8_37: + mov.u32 %r207, 0; + selp.b32 %r208, %r36, 0, %p28; + or.b32 %r209, %r208, 2146435072; + selp.b32 %r210, %r209, %r208, %p29; + mov.b64 %fd376, {%r207, %r210}; + +$L__BB8_38: + add.f64 %fd25, %fd18, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r211}, %fd25; + } + and.b32 %r212, %r211, 2146435072; + setp.ne.s32 %p59, %r212, 2146435072; + mov.f64 %fd377, %fd376; + @%p59 bra $L__BB8_44; + + setp.gtu.f64 %p60, %fd19, 0d7FF0000000000000; + mov.f64 %fd377, %fd25; + @%p60 bra $L__BB8_44; + + { + .reg .b32 %temp; + mov.b64 {%r213, %temp}, %fd115; + } + setp.eq.s32 %p62, %r213, 0; + and.pred %p63, %p61, %p62; + @%p63 bra $L__BB8_43; + bra.uni $L__BB8_41; + +$L__BB8_43: + mov.u32 %r218, 0; + setp.gt.f64 %p70, %fd19, 0d3FF0000000000000; + selp.b32 %r219, 2146435072, 0, %p70; + xor.b32 %r220, %r219, 2146435072; + selp.b32 %r221, %r220, %r219, %p29; + setp.eq.s32 %p71, %r35, -1; + selp.b32 %r222, 1072693248, %r221, %p71; + mov.b64 %fd377, {%r218, %r222}; + bra.uni $L__BB8_44; + +$L__BB8_41: + { + .reg .b32 %temp; + mov.b64 {%r214, %temp}, %fd18; + } + and.b32 %r215, %r36, 2147483647; + setp.ne.s32 %p64, %r215, 2146435072; + setp.ne.s32 %p65, %r214, 0; + or.pred %p66, %p64, %p65; + mov.f64 %fd377, %fd376; + @%p66 bra $L__BB8_44; + + and.pred %p68, %p34, %p3; + selp.b32 %r216, %r19, %r18, %p68; + mov.u32 %r217, 0; + mov.b64 %fd377, {%r217, %r216}; + +$L__BB8_44: + mov.f64 %fd363, 0d3FF0000000000000; + mov.f64 %fd362, 0d3FE000000000000B; + mov.f64 %fd361, 0d3FC5555555555511; + mov.f64 %fd360, 0d3FA55555555502A1; + mov.f64 %fd359, 0d3F81111111122322; + mov.f64 %fd358, 0d3F56C16C1852B7AF; + mov.f64 %fd357, 0d3F2A01A014761F65; + mov.f64 %fd356, 0d3EFA01997C89EB71; + mov.f64 %fd355, 0d3EC71DEE62401315; + mov.f64 %fd354, 0d3E928AF3FCA213EA; + mov.f64 %fd353, 0d3E5ADE1569CE2BDF; + mov.f64 %fd352, 0dBC7ABC9E3B39803F; + mov.f64 %fd351, 0dBFE62E42FEFA39EF; + mov.f64 %fd350, 0dC338000000000000; + mov.f64 %fd349, 0d4338000000000000; + mov.f64 %fd348, 0d3FF71547652B82FE; + setp.eq.s32 %p72, %r35, 1; + selp.f64 %fd164, 0d3FF0000000000000, %fd377, %p72; + mul.f64 %fd29, %fd164, %fd1; + neg.f64 %fd166, %fd29; + fma.rn.f64 %fd169, %fd166, %fd348, %fd349; + { + .reg .b32 %temp; + mov.b64 {%r37, %temp}, %fd169; + } + add.rn.f64 %fd171, %fd169, %fd350; + fma.rn.f64 %fd173, %fd171, %fd351, %fd166; + fma.rn.f64 %fd175, %fd171, %fd352, %fd173; + fma.rn.f64 %fd178, %fd353, %fd175, %fd354; + fma.rn.f64 %fd180, %fd178, %fd175, %fd355; + fma.rn.f64 %fd182, %fd180, %fd175, %fd356; + fma.rn.f64 %fd184, %fd182, %fd175, %fd357; + fma.rn.f64 %fd186, %fd184, %fd175, %fd358; + fma.rn.f64 %fd188, %fd186, %fd175, %fd359; + fma.rn.f64 %fd190, %fd188, %fd175, %fd360; + fma.rn.f64 %fd192, %fd190, %fd175, %fd361; + fma.rn.f64 %fd194, %fd192, %fd175, %fd362; + fma.rn.f64 %fd195, %fd194, %fd175, %fd363; + fma.rn.f64 %fd196, %fd195, %fd175, %fd363; + { + .reg .b32 %temp; + mov.b64 {%r38, %temp}, %fd196; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r39}, %fd196; + } + shl.b32 %r223, %r37, 20; + add.s32 %r224, %r39, %r223; + mov.b64 %fd378, {%r38, %r224}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r225}, %fd166; + } + mov.b32 %f421, %r225; + abs.f32 %f42, %f421; + setp.lt.f32 %p73, %f42, 0f4086232B; + @%p73 bra $L__BB8_47; + + setp.gt.f64 %p74, %fd29, 0d8000000000000000; + mov.f64 %fd197, 0d7FF0000000000000; + sub.f64 %fd198, %fd197, %fd29; + selp.f64 %fd378, 0d0000000000000000, %fd198, %p74; + setp.geu.f32 %p75, %f42, 0f40874800; + @%p75 bra $L__BB8_47; + + shr.u32 %r226, %r37, 31; + add.s32 %r227, %r37, %r226; + shr.s32 %r228, %r227, 1; + shl.b32 %r229, %r228, 20; + add.s32 %r230, %r39, %r229; + mov.b64 %fd199, {%r38, %r230}; + sub.s32 %r231, %r37, %r228; + shl.b32 %r232, %r231, 20; + add.s32 %r233, %r232, 1072693248; + mov.u32 %r234, 0; + mov.b64 %fd200, {%r234, %r233}; + mul.f64 %fd378, %fd199, %fd200; + +$L__BB8_47: + add.s32 %r235, %r625, %r30; + add.s32 %r236, %r235, %r1; + mul.wide.s32 %rd70, %r236, 4; + add.s64 %rd71, %rd1, %rd70; + ld.global.f32 %f422, [%rd71]; + cvt.f64.f32 %fd201, %f422; + mul.f64 %fd202, %fd374, %fd378; + cvt.f64.f32 %fd203, %f1694; + fma.rn.f64 %fd204, %fd202, %fd201, %fd203; + cvt.rn.f32.f64 %f1694, %fd204; + cvt.f64.f32 %fd205, %f1693; + add.f64 %fd206, %fd202, %fd205; + cvt.rn.f32.f64 %f1693, %fd206; + add.s32 %r625, %r625, 1; + setp.lt.s32 %p76, %r625, %r151; + @%p76 bra $L__BB8_16; + + add.s32 %r624, %r624, 1; + setp.lt.s32 %p77, %r624, %r151; + @%p77 bra $L__BB8_15; + + div.rn.f32 %f423, %f1694, %f1693; + min.f32 %f1690, %f1690, %f423; + add.s32 %r623, %r623, 1; + setp.lt.s32 %p78, %r623, %r151; + @%p78 bra $L__BB8_14; + + add.s32 %r622, %r622, 1; + setp.lt.s32 %p79, %r622, %r151; + @%p79 bra $L__BB8_13; + +$L__BB8_51: + mov.f32 %f426, 0f38D1B717; + max.f32 %f47, %f1690, %f426; + setp.lt.s32 %p80, %r153, 1; + mov.f32 %f1814, 0fBF800000; + mov.f32 %f1813, 0f00000000; + mov.u32 %r648, 0; + @%p80 bra $L__BB8_280; + + ld.param.u32 %r616, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_3]; + mul.f32 %f429, %f379, 0f3F000000; + div.rn.f32 %f430, %f429, 0f40490FD8; + div.rn.f32 %f431, %f430, %f378; + div.rn.f32 %f432, %f431, %f378; + div.rn.f32 %f48, %f432, 0f41200000; + div.rn.f32 %f433, %f379, 0fC0206C98; + div.rn.f32 %f49, %f433, %f378; + cvt.f64.f32 %fd34, %f433; + cvt.f64.f32 %fd35, %f378; + add.f64 %fd36, %fd35, 0d4008000000000000; + mul.f32 %f50, %f378, 0f3FC00000; + add.s32 %r241, %r151, -1; + mov.u32 %r626, 1; + setp.lt.s32 %p106, %r616, 1; + mov.f64 %fd207, 0d4000000000000000; + mov.u32 %r628, %r648; + +$L__BB8_53: + mov.u32 %r44, %r628; + mov.u32 %r628, %r626; + setp.eq.s32 %p81, %r628, 1; + @%p81 bra $L__BB8_73; + bra.uni $L__BB8_54; + +$L__BB8_73: + st.local.f32 [%rd2], %f1774; + st.local.f32 [%rd3], %f1773; + mov.u32 %r633, 0; + bra.uni $L__BB8_74; + +$L__BB8_54: + mov.f32 %f1718, %f48; + @%p18 bra $L__BB8_72; + + add.s32 %r47, %r628, -1; + mul.wide.s32 %rd72, %r47, 4; + add.s64 %rd8, %rd2, %rd72; + add.s64 %rd9, %rd3, %rd72; + mov.u32 %r242, 0; + mov.u32 %r629, %r242; + mov.f32 %f1718, %f48; + +$L__BB8_56: + cvt.rn.f32.s32 %f59, %r629; + setp.eq.s32 %p83, %r629, %r241; + setp.eq.s32 %p84, %r629, 0; + or.pred %p85, %p83, %p84; + selp.f32 %f60, 0fBF800000, 0f3F800000, %p85; + mov.u32 %r630, %r242; + +$L__BB8_57: + setp.lt.u32 %p86, %r628, 2; + mov.f32 %f1715, 0f00000000; + mov.f32 %f1716, %f1715; + mov.u32 %r632, %r242; + mov.f32 %f1717, %f47; + @%p86 bra $L__BB8_68; + + sqrt.rn.f32 %f64, %f34; + cvt.rn.f32.s32 %f65, %r630; + mov.u64 %rd134, %rd2; + mov.u64 %rd135, %rd3; + mov.f32 %f1717, %f47; + mov.u32 %r632, %r242; + +$L__BB8_59: + ld.local.f32 %f1774, [%rd134]; + add.f32 %f1716, %f1716, %f1774; + ld.local.f32 %f1773, [%rd135]; + add.f32 %f1715, %f1715, %f1773; + sub.f32 %f73, %f59, %f1774; + add.f32 %f438, %f73, 0f3F000000; + mul.f32 %f74, %f438, %f64; + abs.f32 %f439, %f74; + setp.ltu.f32 %p87, %f439, 0f3F8060FE; + setp.ge.f32 %p88, %f439, 0f3F8060FE; + mul.f32 %f440, %f74, %f74; + selp.f32 %f441, %f439, %f440, %p88; + selp.f32 %f442, 0f3789CA3C, 0f38B1E96A, %p88; + selp.f32 %f443, 0fB9F560B9, 0fBA574D20, %p88; + fma.rn.f32 %f444, %f442, %f441, %f443; + selp.f32 %f445, 0f3BAC840B, 0f3BAAD5EA, %p88; + fma.rn.f32 %f446, %f444, %f441, %f445; + selp.f32 %f447, 0fBD0C8162, 0fBCDC1BE7, %p88; + fma.rn.f32 %f448, %f446, %f441, %f447; + selp.f32 %f449, 0f3E1CF906, 0f3DE718AF, %p88; + fma.rn.f32 %f450, %f448, %f441, %f449; + selp.f32 %f451, 0f3F6A937E, 0fBEC093AC, %p88; + fma.rn.f32 %f452, %f450, %f441, %f451; + selp.f32 %f453, 0f3F20D842, 0f3E0375D3, %p88; + fma.rn.f32 %f454, %f452, %f441, %f453; + neg.f32 %f455, %f439; + selp.f32 %f456, %f455, %f74, %p88; + fma.rn.f32 %f1709, %f454, %f456, %f456; + @%p87 bra $L__BB8_61; + + ex2.approx.ftz.f32 %f457, %f1709; + mov.f32 %f458, 0f3F800000; + sub.f32 %f459, %f458, %f457; + mov.b32 %r246, %f459; + mov.b32 %r247, %f74; + and.b32 %r248, %r247, -2147483648; + or.b32 %r249, %r248, %r246; + mov.b32 %f1709, %r249; + +$L__BB8_61: + add.f32 %f460, %f73, 0fBF000000; + mul.f32 %f78, %f460, %f64; + abs.f32 %f461, %f78; + setp.ltu.f32 %p89, %f461, 0f3F8060FE; + setp.ge.f32 %p90, %f461, 0f3F8060FE; + mul.f32 %f462, %f78, %f78; + selp.f32 %f463, %f461, %f462, %p90; + selp.f32 %f464, 0f3789CA3C, 0f38B1E96A, %p90; + selp.f32 %f465, 0fB9F560B9, 0fBA574D20, %p90; + fma.rn.f32 %f466, %f464, %f463, %f465; + selp.f32 %f467, 0f3BAC840B, 0f3BAAD5EA, %p90; + fma.rn.f32 %f468, %f466, %f463, %f467; + selp.f32 %f469, 0fBD0C8162, 0fBCDC1BE7, %p90; + fma.rn.f32 %f470, %f468, %f463, %f469; + selp.f32 %f471, 0f3E1CF906, 0f3DE718AF, %p90; + fma.rn.f32 %f472, %f470, %f463, %f471; + selp.f32 %f473, 0f3F6A937E, 0fBEC093AC, %p90; + fma.rn.f32 %f474, %f472, %f463, %f473; + selp.f32 %f475, 0f3F20D842, 0f3E0375D3, %p90; + fma.rn.f32 %f476, %f474, %f463, %f475; + neg.f32 %f477, %f461; + selp.f32 %f478, %f477, %f78, %p90; + fma.rn.f32 %f1710, %f476, %f478, %f478; + @%p89 bra $L__BB8_63; + + ex2.approx.ftz.f32 %f479, %f1710; + mov.f32 %f480, 0f3F800000; + sub.f32 %f481, %f480, %f479; + mov.b32 %r250, %f481; + mov.b32 %r251, %f78; + and.b32 %r252, %r251, -2147483648; + or.b32 %r253, %r252, %r250; + mov.b32 %f1710, %r253; + +$L__BB8_63: + sub.f32 %f82, %f1709, %f1710; + sub.f32 %f83, %f65, %f1773; + add.f32 %f482, %f83, 0f3F000000; + mul.f32 %f84, %f482, %f64; + abs.f32 %f483, %f84; + setp.ltu.f32 %p91, %f483, 0f3F8060FE; + setp.ge.f32 %p92, %f483, 0f3F8060FE; + mul.f32 %f484, %f84, %f84; + selp.f32 %f485, %f483, %f484, %p92; + selp.f32 %f486, 0f3789CA3C, 0f38B1E96A, %p92; + selp.f32 %f487, 0fB9F560B9, 0fBA574D20, %p92; + fma.rn.f32 %f488, %f486, %f485, %f487; + selp.f32 %f489, 0f3BAC840B, 0f3BAAD5EA, %p92; + fma.rn.f32 %f490, %f488, %f485, %f489; + selp.f32 %f491, 0fBD0C8162, 0fBCDC1BE7, %p92; + fma.rn.f32 %f492, %f490, %f485, %f491; + selp.f32 %f493, 0f3E1CF906, 0f3DE718AF, %p92; + fma.rn.f32 %f494, %f492, %f485, %f493; + selp.f32 %f495, 0f3F6A937E, 0fBEC093AC, %p92; + fma.rn.f32 %f496, %f494, %f485, %f495; + selp.f32 %f497, 0f3F20D842, 0f3E0375D3, %p92; + fma.rn.f32 %f498, %f496, %f485, %f497; + neg.f32 %f499, %f483; + selp.f32 %f500, %f499, %f84, %p92; + fma.rn.f32 %f1711, %f498, %f500, %f500; + @%p91 bra $L__BB8_65; + + ex2.approx.ftz.f32 %f501, %f1711; + mov.f32 %f502, 0f3F800000; + sub.f32 %f503, %f502, %f501; + mov.b32 %r254, %f503; + mov.b32 %r255, %f84; + and.b32 %r256, %r255, -2147483648; + or.b32 %r257, %r256, %r254; + mov.b32 %f1711, %r257; + +$L__BB8_65: + add.f32 %f504, %f83, 0fBF000000; + mul.f32 %f88, %f504, %f64; + abs.f32 %f505, %f88; + setp.ltu.f32 %p93, %f505, 0f3F8060FE; + setp.ge.f32 %p94, %f505, 0f3F8060FE; + mul.f32 %f506, %f88, %f88; + selp.f32 %f507, %f505, %f506, %p94; + selp.f32 %f508, 0f3789CA3C, 0f38B1E96A, %p94; + selp.f32 %f509, 0fB9F560B9, 0fBA574D20, %p94; + fma.rn.f32 %f510, %f508, %f507, %f509; + selp.f32 %f511, 0f3BAC840B, 0f3BAAD5EA, %p94; + fma.rn.f32 %f512, %f510, %f507, %f511; + selp.f32 %f513, 0fBD0C8162, 0fBCDC1BE7, %p94; + fma.rn.f32 %f514, %f512, %f507, %f513; + selp.f32 %f515, 0f3E1CF906, 0f3DE718AF, %p94; + fma.rn.f32 %f516, %f514, %f507, %f515; + selp.f32 %f517, 0f3F6A937E, 0fBEC093AC, %p94; + fma.rn.f32 %f518, %f516, %f507, %f517; + selp.f32 %f519, 0f3F20D842, 0f3E0375D3, %p94; + fma.rn.f32 %f520, %f518, %f507, %f519; + neg.f32 %f521, %f505; + selp.f32 %f522, %f521, %f88, %p94; + fma.rn.f32 %f1712, %f520, %f522, %f522; + @%p93 bra $L__BB8_67; + + ex2.approx.ftz.f32 %f523, %f1712; + mov.f32 %f524, 0f3F800000; + sub.f32 %f525, %f524, %f523; + mov.b32 %r258, %f525; + mov.b32 %r259, %f88; + and.b32 %r260, %r259, -2147483648; + or.b32 %r261, %r260, %r258; + mov.b32 %f1712, %r261; + +$L__BB8_67: + sub.f32 %f526, %f1711, %f1712; + mul.f32 %f527, %f526, 0f3F000000; + mul.f32 %f528, %f82, 0f3F000000; + mul.f32 %f529, %f528, %f379; + fma.rn.f32 %f1717, %f529, %f527, %f1717; + add.s64 %rd135, %rd135, 4; + add.s64 %rd134, %rd134, 4; + add.s32 %r632, %r632, 1; + setp.lt.s32 %p95, %r632, %r47; + @%p95 bra $L__BB8_59; + +$L__BB8_68: + mad.lo.s32 %r262, %r630, %r151, %r629; + add.s32 %r263, %r262, %r1; + mul.wide.s32 %rd73, %r263, 4; + add.s64 %rd74, %rd1, %rd73; + ld.global.f32 %f530, [%rd74]; + sub.f32 %f98, %f530, %f1717; + setp.leu.f32 %p96, %f98, %f1718; + @%p96 bra $L__BB8_70; + + setp.eq.s32 %p97, %r630, 0; + setp.eq.s32 %p98, %r630, %r241; + or.pred %p99, %p98, %p97; + cvt.rn.f32.s32 %f531, %r632; + div.rn.f32 %f532, %f1716, %f531; + sub.f32 %f533, %f59, %f532; + setp.gt.f32 %p100, %f533, 0f00000000; + div.rn.f32 %f534, %f1715, %f531; + cvt.rn.f32.s32 %f535, %r630; + sub.f32 %f536, %f535, %f534; + setp.gt.f32 %p101, %f536, 0f00000000; + selp.f32 %f537, 0f3F000000, 0fBF000000, %p100; + mul.f32 %f538, %f60, %f537; + sub.f32 %f539, %f59, %f538; + st.local.f32 [%rd8], %f539; + selp.f32 %f540, 0f3F000000, 0fBF000000, %p101; + selp.f32 %f541, 0fBF800000, 0f3F800000, %p99; + mul.f32 %f542, %f541, %f540; + sub.f32 %f543, %f535, %f542; + st.local.f32 [%rd9], %f543; + mov.f32 %f1718, %f98; + +$L__BB8_70: + add.s32 %r630, %r630, 1; + setp.lt.s32 %p102, %r630, %r151; + @%p102 bra $L__BB8_57; + + add.s32 %r629, %r629, 1; + setp.lt.s32 %p103, %r629, %r151; + @%p103 bra $L__BB8_56; + +$L__BB8_72: + setp.eq.f32 %p104, %f1718, %f48; + selp.u32 %r633, 1, 0, %p104; + +$L__BB8_74: + setp.ne.s32 %p105, %r633, 0; + @%p105 bra $L__BB8_280; + + mov.f32 %f1781, %f47; + @%p106 bra $L__BB8_224; + + mov.u32 %r634, 0; + mov.f32 %f1781, %f47; + +$L__BB8_77: + mov.f32 %f1730, 0f00000000; + mov.f32 %f1729, %f1730; + @%p18 bra $L__BB8_109; + + mov.u32 %r267, 0; + mov.u32 %r635, %r267; + +$L__BB8_79: + cvt.rn.f32.s32 %f111, %r635; + sqrt.rn.f32 %f112, %f34; + mov.u32 %r636, %r267; + +$L__BB8_80: + cvt.rn.f32.s32 %f115, %r636; + mov.f32 %f1731, %f1781; + mov.u32 %r637, %r267; + +$L__BB8_81: + mul.wide.s32 %rd75, %r637, 4; + add.s64 %rd76, %rd2, %rd75; + ld.local.f32 %f1774, [%rd76]; + add.s64 %rd77, %rd6, %rd75; + st.local.f32 [%rd77], %f1774; + add.s64 %rd78, %rd3, %rd75; + ld.local.f32 %f1773, [%rd78]; + add.s64 %rd79, %rd7, %rd75; + st.local.f32 [%rd79], %f1773; + sub.f32 %f119, %f111, %f1774; + add.f32 %f548, %f119, 0f3F000000; + mul.f32 %f120, %f112, %f548; + abs.f32 %f549, %f120; + setp.ltu.f32 %p108, %f549, 0f3F8060FE; + setp.ge.f32 %p109, %f549, 0f3F8060FE; + mul.f32 %f550, %f120, %f120; + selp.f32 %f551, %f549, %f550, %p109; + selp.f32 %f552, 0f3789CA3C, 0f38B1E96A, %p109; + selp.f32 %f553, 0fB9F560B9, 0fBA574D20, %p109; + fma.rn.f32 %f554, %f552, %f551, %f553; + selp.f32 %f555, 0f3BAC840B, 0f3BAAD5EA, %p109; + fma.rn.f32 %f556, %f554, %f551, %f555; + selp.f32 %f557, 0fBD0C8162, 0fBCDC1BE7, %p109; + fma.rn.f32 %f558, %f556, %f551, %f557; + selp.f32 %f559, 0f3E1CF906, 0f3DE718AF, %p109; + fma.rn.f32 %f560, %f558, %f551, %f559; + selp.f32 %f561, 0f3F6A937E, 0fBEC093AC, %p109; + fma.rn.f32 %f562, %f560, %f551, %f561; + selp.f32 %f563, 0f3F20D842, 0f3E0375D3, %p109; + fma.rn.f32 %f564, %f562, %f551, %f563; + neg.f32 %f565, %f549; + selp.f32 %f566, %f565, %f120, %p109; + fma.rn.f32 %f1732, %f564, %f566, %f566; + @%p108 bra $L__BB8_83; + + ex2.approx.ftz.f32 %f567, %f1732; + mov.f32 %f568, 0f3F800000; + sub.f32 %f569, %f568, %f567; + mov.b32 %r270, %f569; + mov.b32 %r271, %f120; + and.b32 %r272, %r271, -2147483648; + or.b32 %r273, %r272, %r270; + mov.b32 %f1732, %r273; + +$L__BB8_83: + add.f32 %f570, %f119, 0fBF000000; + mul.f32 %f124, %f112, %f570; + abs.f32 %f571, %f124; + setp.ltu.f32 %p110, %f571, 0f3F8060FE; + setp.ge.f32 %p111, %f571, 0f3F8060FE; + mul.f32 %f572, %f124, %f124; + selp.f32 %f573, %f571, %f572, %p111; + selp.f32 %f574, 0f3789CA3C, 0f38B1E96A, %p111; + selp.f32 %f575, 0fB9F560B9, 0fBA574D20, %p111; + fma.rn.f32 %f576, %f574, %f573, %f575; + selp.f32 %f577, 0f3BAC840B, 0f3BAAD5EA, %p111; + fma.rn.f32 %f578, %f576, %f573, %f577; + selp.f32 %f579, 0fBD0C8162, 0fBCDC1BE7, %p111; + fma.rn.f32 %f580, %f578, %f573, %f579; + selp.f32 %f581, 0f3E1CF906, 0f3DE718AF, %p111; + fma.rn.f32 %f582, %f580, %f573, %f581; + selp.f32 %f583, 0f3F6A937E, 0fBEC093AC, %p111; + fma.rn.f32 %f584, %f582, %f573, %f583; + selp.f32 %f585, 0f3F20D842, 0f3E0375D3, %p111; + fma.rn.f32 %f586, %f584, %f573, %f585; + neg.f32 %f587, %f571; + selp.f32 %f588, %f587, %f124, %p111; + fma.rn.f32 %f1733, %f586, %f588, %f588; + @%p110 bra $L__BB8_85; + + ex2.approx.ftz.f32 %f589, %f1733; + mov.f32 %f590, 0f3F800000; + sub.f32 %f591, %f590, %f589; + mov.b32 %r274, %f591; + mov.b32 %r275, %f124; + and.b32 %r276, %r275, -2147483648; + or.b32 %r277, %r276, %r274; + mov.b32 %f1733, %r277; + +$L__BB8_85: + sub.f32 %f128, %f1732, %f1733; + sub.f32 %f129, %f115, %f1773; + add.f32 %f592, %f129, 0f3F000000; + mul.f32 %f130, %f592, %f112; + abs.f32 %f593, %f130; + setp.ltu.f32 %p112, %f593, 0f3F8060FE; + setp.ge.f32 %p113, %f593, 0f3F8060FE; + mul.f32 %f594, %f130, %f130; + selp.f32 %f595, %f593, %f594, %p113; + selp.f32 %f596, 0f3789CA3C, 0f38B1E96A, %p113; + selp.f32 %f597, 0fB9F560B9, 0fBA574D20, %p113; + fma.rn.f32 %f598, %f596, %f595, %f597; + selp.f32 %f599, 0f3BAC840B, 0f3BAAD5EA, %p113; + fma.rn.f32 %f600, %f598, %f595, %f599; + selp.f32 %f601, 0fBD0C8162, 0fBCDC1BE7, %p113; + fma.rn.f32 %f602, %f600, %f595, %f601; + selp.f32 %f603, 0f3E1CF906, 0f3DE718AF, %p113; + fma.rn.f32 %f604, %f602, %f595, %f603; + selp.f32 %f605, 0f3F6A937E, 0fBEC093AC, %p113; + fma.rn.f32 %f606, %f604, %f595, %f605; + selp.f32 %f607, 0f3F20D842, 0f3E0375D3, %p113; + fma.rn.f32 %f608, %f606, %f595, %f607; + neg.f32 %f609, %f593; + selp.f32 %f610, %f609, %f130, %p113; + fma.rn.f32 %f1734, %f608, %f610, %f610; + @%p112 bra $L__BB8_87; + + ex2.approx.ftz.f32 %f611, %f1734; + mov.f32 %f612, 0f3F800000; + sub.f32 %f613, %f612, %f611; + mov.b32 %r278, %f613; + mov.b32 %r279, %f130; + and.b32 %r280, %r279, -2147483648; + or.b32 %r281, %r280, %r278; + mov.b32 %f1734, %r281; + +$L__BB8_87: + add.f32 %f614, %f129, 0fBF000000; + mul.f32 %f134, %f614, %f112; + abs.f32 %f615, %f134; + setp.ltu.f32 %p114, %f615, 0f3F8060FE; + setp.ge.f32 %p115, %f615, 0f3F8060FE; + mul.f32 %f616, %f134, %f134; + selp.f32 %f617, %f615, %f616, %p115; + selp.f32 %f618, 0f3789CA3C, 0f38B1E96A, %p115; + selp.f32 %f619, 0fB9F560B9, 0fBA574D20, %p115; + fma.rn.f32 %f620, %f618, %f617, %f619; + selp.f32 %f621, 0f3BAC840B, 0f3BAAD5EA, %p115; + fma.rn.f32 %f622, %f620, %f617, %f621; + selp.f32 %f623, 0fBD0C8162, 0fBCDC1BE7, %p115; + fma.rn.f32 %f624, %f622, %f617, %f623; + selp.f32 %f625, 0f3E1CF906, 0f3DE718AF, %p115; + fma.rn.f32 %f626, %f624, %f617, %f625; + selp.f32 %f627, 0f3F6A937E, 0fBEC093AC, %p115; + fma.rn.f32 %f628, %f626, %f617, %f627; + selp.f32 %f629, 0f3F20D842, 0f3E0375D3, %p115; + fma.rn.f32 %f630, %f628, %f617, %f629; + neg.f32 %f631, %f615; + selp.f32 %f632, %f631, %f134, %p115; + fma.rn.f32 %f1735, %f630, %f632, %f632; + @%p114 bra $L__BB8_89; + + ex2.approx.ftz.f32 %f633, %f1735; + mov.f32 %f634, 0f3F800000; + sub.f32 %f635, %f634, %f633; + mov.b32 %r282, %f635; + mov.b32 %r283, %f134; + and.b32 %r284, %r283, -2147483648; + or.b32 %r285, %r284, %r282; + mov.b32 %f1735, %r285; + +$L__BB8_89: + sub.f32 %f636, %f1734, %f1735; + mul.f32 %f637, %f636, 0f3F000000; + mul.f32 %f638, %f128, 0f3F000000; + mul.f32 %f639, %f638, %f379; + fma.rn.f32 %f1731, %f639, %f637, %f1731; + add.s32 %r637, %r637, 1; + setp.lt.u32 %p116, %r637, %r628; + @%p116 bra $L__BB8_81; + + mad.lo.s32 %r286, %r636, %r151, %r635; + add.s32 %r287, %r286, %r1; + mul.wide.s32 %rd80, %r287, 4; + add.s64 %rd81, %rd1, %rd80; + ld.global.f32 %f139, [%rd81]; + setp.leu.f32 %p117, %f1731, 0f3C23D70A; + mov.f32 %f1737, 0f00000000; + mov.f32 %f1736, %f1737; + @%p117 bra $L__BB8_92; + + div.rn.f32 %f641, %f139, %f1731; + add.f32 %f1736, %f641, 0fBF800000; + +$L__BB8_92: + @%p117 bra $L__BB8_107; + + cvt.f64.f32 %fd37, %f1731; + { + .reg .b32 %temp; + mov.b64 {%temp, %r63}, %fd37; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r64}, %fd207; + } + and.b32 %r65, %r64, 2146435072; + setp.eq.s32 %p119, %r65, 1062207488; + abs.f64 %fd38, %fd37; + { // callseq 176, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd38; + .param .b64 param1; + st.param.f64 [param1+0], %fd207; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd380, [retval0+0]; + } // callseq 176 + setp.lt.s32 %p120, %r63, 0; + and.pred %p4, %p120, %p119; + not.pred %p121, %p4; + @%p121 bra $L__BB8_95; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r288}, %fd380; + } + xor.b32 %r289, %r288, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r290, %temp}, %fd380; + } + mov.b64 %fd380, {%r290, %r289}; + +$L__BB8_95: + setp.eq.f32 %p122, %f1731, 0f00000000; + @%p122 bra $L__BB8_99; + bra.uni $L__BB8_96; + +$L__BB8_99: + selp.b32 %r291, %r63, 0, %p119; + mov.u32 %r292, 0; + or.b32 %r293, %r291, 2146435072; + setp.lt.s32 %p126, %r64, 0; + selp.b32 %r294, %r293, %r291, %p126; + mov.b64 %fd380, {%r292, %r294}; + bra.uni $L__BB8_100; + +$L__BB8_96: + setp.gt.s32 %p123, %r63, -1; + @%p123 bra $L__BB8_100; + + cvt.rzi.f64.f64 %fd209, %fd207; + setp.eq.f64 %p124, %fd209, 0d4000000000000000; + @%p124 bra $L__BB8_100; + + mov.f64 %fd380, 0dFFF8000000000000; + +$L__BB8_100: + add.f64 %fd44, %fd37, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r295}, %fd44; + } + and.b32 %r296, %r295, 2146435072; + setp.ne.s32 %p127, %r296, 2146435072; + mov.f64 %fd381, %fd380; + @%p127 bra $L__BB8_106; + + setp.gtu.f64 %p128, %fd38, 0d7FF0000000000000; + mov.f64 %fd381, %fd44; + @%p128 bra $L__BB8_106; + + { + .reg .b32 %temp; + mov.b64 {%r297, %temp}, %fd207; + } + and.b32 %r66, %r64, 2147483647; + setp.eq.s32 %p129, %r66, 2146435072; + setp.eq.s32 %p130, %r297, 0; + and.pred %p131, %p129, %p130; + @%p131 bra $L__BB8_105; + bra.uni $L__BB8_103; + +$L__BB8_105: + setp.gt.f64 %p138, %fd38, 0d3FF0000000000000; + selp.b32 %r304, 2146435072, 0, %p138; + mov.u32 %r305, 0; + xor.b32 %r306, %r304, 2146435072; + setp.lt.s32 %p139, %r64, 0; + selp.b32 %r307, %r306, %r304, %p139; + setp.eq.f32 %p140, %f1731, 0fBF800000; + selp.b32 %r308, 1072693248, %r307, %p140; + mov.b64 %fd381, {%r305, %r308}; + bra.uni $L__BB8_106; + +$L__BB8_103: + { + .reg .b32 %temp; + mov.b64 {%r298, %temp}, %fd37; + } + and.b32 %r299, %r63, 2147483647; + setp.ne.s32 %p132, %r299, 2146435072; + setp.ne.s32 %p133, %r298, 0; + or.pred %p134, %p132, %p133; + mov.f64 %fd381, %fd380; + @%p134 bra $L__BB8_106; + + setp.gt.s32 %p135, %r64, -1; + selp.b32 %r300, 2146435072, 0, %p135; + mov.u32 %r301, 0; + setp.ne.s32 %p136, %r66, 1071644672; + and.pred %p137, %p136, %p4; + or.b32 %r302, %r300, -2147483648; + selp.b32 %r303, %r302, %r300, %p137; + mov.b64 %fd381, {%r301, %r303}; + +$L__BB8_106: + setp.eq.f32 %p141, %f1731, 0f3F800000; + selp.f64 %fd212, 0d3FF0000000000000, %fd381, %p141; + cvt.f64.f32 %fd213, %f139; + div.rn.f64 %fd214, %fd213, %fd212; + cvt.rn.f32.f64 %f1737, %fd214; + +$L__BB8_107: + mov.f32 %f643, 0f47C35000; + min.f32 %f644, %f1737, %f643; + sub.f32 %f1729, %f1729, %f644; + min.f32 %f645, %f1736, %f643; + add.f32 %f1730, %f1730, %f645; + add.s32 %r636, %r636, 1; + setp.lt.s32 %p142, %r636, %r151; + @%p142 bra $L__BB8_80; + + add.s32 %r635, %r635, 1; + setp.lt.s32 %p143, %r635, %r151; + @%p143 bra $L__BB8_79; + +$L__BB8_109: + cvt.rn.f32.s32 %f1608, %r628; + div.rn.f32 %f646, %f1730, %f1729; + mov.f32 %f647, 0fBF800000; + max.f32 %f648, %f646, %f647; + mov.f32 %f649, 0f3F800000; + min.f32 %f650, %f648, %f649; + div.rn.f32 %f651, %f650, %f1608; + fma.rn.f32 %f652, %f651, 0fBF000000, %f1781; + mov.f32 %f653, 0f3A83126F; + max.f32 %f1781, %f652, %f653; + mov.u32 %r638, 0; + +$L__BB8_110: + cvt.s64.s32 %rd14, %r638; + mov.f32 %f1751, 0f00000000; + mov.f32 %f1750, %f1751; + mov.f32 %f1749, %f1751; + mov.f32 %f1748, %f1751; + @%p18 bra $L__BB8_222; + + mov.f32 %f1748, 0f00000000; + shl.b64 %rd82, %rd14, 2; + add.s64 %rd83, %rd6, %rd82; + ld.local.f32 %f1774, [%rd83]; + add.s64 %rd84, %rd7, %rd82; + ld.local.f32 %f1773, [%rd84]; + mov.u32 %r639, 0; + mov.f32 %f1749, %f1748; + mov.f32 %f1750, %f1748; + mov.f32 %f1751, %f1748; + +$L__BB8_112: + mov.u32 %r640, 0; + mov.f32 %f1611, 0f3F800000; + mov.f32 %f1610, 0f00000000; + cvt.rn.f32.s32 %f159, %r639; + sub.f32 %f662, %f159, %f1774; + add.f32 %f663, %f662, 0f3F000000; + sqrt.rn.f32 %f160, %f34; + mul.f32 %f664, %f663, %f160; + abs.f32 %f161, %f664; + setp.ge.f32 %p145, %f161, 0f3F8060FE; mul.f32 %f665, %f664, %f664; - mul.f32 %f666, %f40, %f665; - neg.f32 %f667, %f666; - mul.f32 %f668, %f666, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f669, %f668; - fma.rn.f32 %f670, %f669, %f614, %f667; - fma.rn.f32 %f671, %f669, %f616, %f670; - mul.f32 %f672, %f671, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f673, %f672; - add.f32 %f674, %f669, 0f00000000; - ex2.approx.f32 %f675, %f674; - mul.f32 %f676, %f673, %f675; - setp.gt.f32 %p38, %f666, 0f42D20000; - selp.f32 %f677, 0f00000000, %f676, %p38; - setp.lt.f32 %p39, %f666, 0fC2D20000; - selp.f32 %f678, 0f7F800000, %f677, %p39; - mul.f32 %f679, %f73, %f678; - ld.global.f32 %f680, [%rd57+8]; - fma.rn.f32 %f681, %f680, %f679, %f662; - add.f32 %f682, %f663, %f679; - add.s32 %r150, %r398, 3; - sub.s32 %r151, %r393, %r150; - cvt.rn.f32.s32 %f683, %r151; - mul.f32 %f684, %f683, %f683; - mul.f32 %f685, %f40, %f684; - neg.f32 %f686, %f685; - mul.f32 %f687, %f685, 0fBFB8AA3B; - cvt.rzi.f32.f32 %f688, %f687; - fma.rn.f32 %f689, %f688, %f614, %f686; - fma.rn.f32 %f690, %f688, %f616, %f689; - mul.f32 %f691, %f690, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f692, %f691; - add.f32 %f693, %f688, 0f00000000; - ex2.approx.f32 %f694, %f693; - mul.f32 %f695, %f692, %f694; - setp.gt.f32 %p40, %f685, 0f42D20000; - selp.f32 %f696, 0f00000000, %f695, %p40; - setp.lt.f32 %p41, %f685, 0fC2D20000; - selp.f32 %f697, 0f7F800000, %f696, %p41; - mul.f32 %f698, %f73, %f697; - ld.global.f32 %f699, [%rd57+12]; - fma.rn.f32 %f2156, %f699, %f698, %f681; - add.f32 %f2155, %f682, %f698; - add.s32 %r398, %r398, 4; - setp.lt.s32 %p42, %r398, %r102; - @%p42 bra BB8_30; - -BB8_31: - add.s32 %r394, %r394, 1; - setp.lt.s32 %p43, %r394, %r102; - @%p43 bra BB8_19; - - div.rn.f32 %f700, %f2156, %f2155; - min.f32 %f2138, %f2138, %f700; - add.s32 %r393, %r393, 1; - setp.lt.s32 %p44, %r393, %r102; - @%p44 bra BB8_18; - - add.s32 %r392, %r392, 1; - setp.lt.s32 %p45, %r392, %r102; - @%p45 bra BB8_17; - -BB8_34: - mov.f32 %f703, 0f38D1B717; - max.f32 %f82, %f2138, %f703; - setp.lt.s32 %p46, %r104, 1; - mov.u32 %r421, 0; - mov.f32 %f2290, 0f00000000; - mov.f32 %f2291, 0fBF800000; - @%p46 bra BB8_206; - - mul.f32 %f83, %f472, 0f3FC00000; - add.s32 %r155, %r102, -1; - div.rn.f32 %f706, %f473, 0fC0206C98; - div.rn.f32 %f85, %f706, %f472; - mul.f32 %f707, %f473, 0f3F000000; - div.rn.f32 %f708, %f707, 0f40490FD8; - div.rn.f32 %f709, %f708, %f472; - div.rn.f32 %f710, %f709, %f472; - div.rn.f32 %f86, %f710, 0f41200000; - mul.f32 %f711, %f472, %f472; - mul.f32 %f712, %f711, %f472; - div.rn.f32 %f87, %f706, %f712; - mov.u32 %r400, 1; - mov.u32 %r421, 0; - mov.f32 %f2290, 0f00000000; - mov.f32 %f2291, 0fBF800000; - -BB8_36: - mov.u32 %r29, %r400; - setp.eq.s32 %p47, %r29, 1; - @%p47 bra BB8_61; - bra.uni BB8_37; - -BB8_61: - st.local.f32 [%rd2], %f2178; - st.local.f32 [%rd3], %f2179; - mov.u32 %r405, 0; - bra.uni BB8_62; - -BB8_37: - @%p8 bra BB8_38; - - add.s32 %r30, %r29, -1; - mul.wide.s32 %rd58, %r30, 4; - add.s64 %rd8, %rd2, %rd58; - add.s64 %rd9, %rd3, %rd58; - mov.u32 %r156, 0; - mov.f32 %f2180, %f86; - mov.u32 %r401, %r156; - -BB8_40: - setp.ne.s32 %p49, %r401, 0; - setp.ne.s32 %p50, %r401, %r155; - and.pred %p51, %p50, %p49; - cvt.rn.f32.s32 %f95, %r401; - selp.f32 %f96, 0f3F800000, 0fBF800000, %p51; - mov.u32 %r402, %r156; - -BB8_41: - mov.f32 %f2176, 0f00000000; - setp.lt.s32 %p52, %r30, 1; - mov.f32 %f2175, %f82; - mov.f32 %f2177, %f2176; - mov.u32 %r404, %r156; - @%p52 bra BB8_56; - - sqrt.rn.f32 %f100, %f40; - cvt.rn.f32.s32 %f101, %r402; - mov.u32 %r404, 0; - mov.f32 %f2176, 0f00000000; - mov.f32 %f2175, %f82; - mov.f32 %f2177, %f2176; - -BB8_43: - mul.wide.s32 %rd59, %r404, 4; - add.s64 %rd60, %rd2, %rd59; - add.s64 %rd61, %rd3, %rd59; - ld.local.f32 %f2178, [%rd60]; - add.f32 %f2177, %f2177, %f2178; - ld.local.f32 %f2179, [%rd61]; - add.f32 %f2176, %f2176, %f2179; - sub.f32 %f109, %f95, %f2178; - add.f32 %f717, %f109, 0f3F800000; - mul.f32 %f110, %f717, %f100; - abs.f32 %f111, %f110; - setp.ltu.f32 %p53, %f111, 0f3F800000; - @%p53 bra BB8_45; - bra.uni BB8_44; - -BB8_45: - mul.f32 %f736, %f110, %f110; - mov.f32 %f737, 0f3BA0C9F8; - mov.f32 %f738, 0fBA1268FB; - fma.rn.f32 %f739, %f738, %f736, %f737; - mov.f32 %f740, 0fBCDABFD4; - fma.rn.f32 %f741, %f739, %f736, %f740; - mov.f32 %f742, 0f3DE70331; - fma.rn.f32 %f743, %f741, %f736, %f742; - mov.f32 %f744, 0fBEC09330; - fma.rn.f32 %f745, %f743, %f736, %f744; - mov.f32 %f746, 0f3F906EBA; - fma.rn.f32 %f747, %f745, %f736, %f746; - mul.f32 %f2171, %f110, %f747; - bra.uni BB8_46; - -BB8_44: - mov.f32 %f718, 0f3A03BB71; - mov.f32 %f719, 0fB7B730FB; - fma.rn.f32 %f720, %f719, %f111, %f718; - mov.f32 %f721, 0fBBACA3B3; - fma.rn.f32 %f722, %f720, %f111, %f721; - mov.f32 %f723, 0f3D0A7445; - fma.rn.f32 %f724, %f722, %f111, %f723; - mov.f32 %f725, 0fBE1B3B75; - fma.rn.f32 %f726, %f724, %f111, %f725; - mov.f32 %f727, 0fBF6B385A; - fma.rn.f32 %f728, %f726, %f111, %f727; - mov.f32 %f729, 0fBFD0316E; - fma.rn.f32 %f730, %f728, %f111, %f729; - mov.f32 %f731, 0fBA031CCE; - fma.rn.f32 %f732, %f730, %f111, %f731; - ex2.approx.ftz.f32 %f733, %f732; - mov.f32 %f734, 0f3F800000; - sub.f32 %f735, %f734, %f733; - mov.b32 %r160, %f735; - setp.ltu.f32 %p54, %f111, 0f407AD445; - selp.b32 %r161, %r160, 1065353216, %p54; - mov.b32 %r162, %f110; - and.b32 %r163, %r162, -2147483648; - or.b32 %r164, %r161, %r163; - mov.b32 %f2171, %r164; - -BB8_46: - mul.f32 %f115, %f109, %f100; - abs.f32 %f116, %f115; - setp.ltu.f32 %p55, %f116, 0f3F800000; - @%p55 bra BB8_48; - bra.uni BB8_47; - -BB8_48: - mul.f32 %f766, %f115, %f115; - mov.f32 %f767, 0f3BA0C9F8; - mov.f32 %f768, 0fBA1268FB; - fma.rn.f32 %f769, %f768, %f766, %f767; - mov.f32 %f770, 0fBCDABFD4; - fma.rn.f32 %f771, %f769, %f766, %f770; - mov.f32 %f772, 0f3DE70331; - fma.rn.f32 %f773, %f771, %f766, %f772; - mov.f32 %f774, 0fBEC09330; - fma.rn.f32 %f775, %f773, %f766, %f774; - mov.f32 %f776, 0f3F906EBA; - fma.rn.f32 %f777, %f775, %f766, %f776; - mul.f32 %f2172, %f115, %f777; - bra.uni BB8_49; - -BB8_47: - mov.f32 %f748, 0f3A03BB71; - mov.f32 %f749, 0fB7B730FB; - fma.rn.f32 %f750, %f749, %f116, %f748; - mov.f32 %f751, 0fBBACA3B3; - fma.rn.f32 %f752, %f750, %f116, %f751; - mov.f32 %f753, 0f3D0A7445; - fma.rn.f32 %f754, %f752, %f116, %f753; - mov.f32 %f755, 0fBE1B3B75; - fma.rn.f32 %f756, %f754, %f116, %f755; - mov.f32 %f757, 0fBF6B385A; - fma.rn.f32 %f758, %f756, %f116, %f757; - mov.f32 %f759, 0fBFD0316E; - fma.rn.f32 %f760, %f758, %f116, %f759; - mov.f32 %f761, 0fBA031CCE; - fma.rn.f32 %f762, %f760, %f116, %f761; - ex2.approx.ftz.f32 %f763, %f762; - mov.f32 %f764, 0f3F800000; - sub.f32 %f765, %f764, %f763; - mov.b32 %r165, %f765; - setp.ltu.f32 %p56, %f116, 0f407AD445; - selp.b32 %r166, %r165, 1065353216, %p56; - mov.b32 %r167, %f115; - and.b32 %r168, %r167, -2147483648; - or.b32 %r169, %r166, %r168; - mov.b32 %f2172, %r169; - -BB8_49: - sub.f32 %f120, %f2171, %f2172; - sub.f32 %f121, %f101, %f2179; - add.f32 %f778, %f121, 0f3F800000; - mul.f32 %f122, %f778, %f100; - abs.f32 %f123, %f122; - setp.ltu.f32 %p57, %f123, 0f3F800000; - @%p57 bra BB8_51; - bra.uni BB8_50; - -BB8_51: - mul.f32 %f797, %f122, %f122; - mov.f32 %f798, 0f3BA0C9F8; - mov.f32 %f799, 0fBA1268FB; - fma.rn.f32 %f800, %f799, %f797, %f798; - mov.f32 %f801, 0fBCDABFD4; - fma.rn.f32 %f802, %f800, %f797, %f801; - mov.f32 %f803, 0f3DE70331; - fma.rn.f32 %f804, %f802, %f797, %f803; - mov.f32 %f805, 0fBEC09330; - fma.rn.f32 %f806, %f804, %f797, %f805; - mov.f32 %f807, 0f3F906EBA; - fma.rn.f32 %f808, %f806, %f797, %f807; - mul.f32 %f2173, %f122, %f808; - bra.uni BB8_52; - -BB8_50: - mov.f32 %f779, 0f3A03BB71; - mov.f32 %f780, 0fB7B730FB; - fma.rn.f32 %f781, %f780, %f123, %f779; - mov.f32 %f782, 0fBBACA3B3; - fma.rn.f32 %f783, %f781, %f123, %f782; - mov.f32 %f784, 0f3D0A7445; - fma.rn.f32 %f785, %f783, %f123, %f784; - mov.f32 %f786, 0fBE1B3B75; - fma.rn.f32 %f787, %f785, %f123, %f786; - mov.f32 %f788, 0fBF6B385A; - fma.rn.f32 %f789, %f787, %f123, %f788; - mov.f32 %f790, 0fBFD0316E; - fma.rn.f32 %f791, %f789, %f123, %f790; - mov.f32 %f792, 0fBA031CCE; - fma.rn.f32 %f793, %f791, %f123, %f792; - ex2.approx.ftz.f32 %f794, %f793; - mov.f32 %f795, 0f3F800000; - sub.f32 %f796, %f795, %f794; - mov.b32 %r170, %f796; - setp.ltu.f32 %p58, %f123, 0f407AD445; - selp.b32 %r171, %r170, 1065353216, %p58; - mov.b32 %r172, %f122; - and.b32 %r173, %r172, -2147483648; - or.b32 %r174, %r171, %r173; - mov.b32 %f2173, %r174; - -BB8_52: - mul.f32 %f127, %f121, %f100; - abs.f32 %f128, %f127; - setp.ltu.f32 %p59, %f128, 0f3F800000; - @%p59 bra BB8_54; - bra.uni BB8_53; - -BB8_54: - mul.f32 %f827, %f127, %f127; - mov.f32 %f828, 0f3BA0C9F8; - mov.f32 %f829, 0fBA1268FB; - fma.rn.f32 %f830, %f829, %f827, %f828; - mov.f32 %f831, 0fBCDABFD4; - fma.rn.f32 %f832, %f830, %f827, %f831; - mov.f32 %f833, 0f3DE70331; - fma.rn.f32 %f834, %f832, %f827, %f833; - mov.f32 %f835, 0fBEC09330; - fma.rn.f32 %f836, %f834, %f827, %f835; - mov.f32 %f837, 0f3F906EBA; - fma.rn.f32 %f838, %f836, %f827, %f837; - mul.f32 %f2174, %f127, %f838; - bra.uni BB8_55; - -BB8_53: - mov.f32 %f809, 0f3A03BB71; - mov.f32 %f810, 0fB7B730FB; - fma.rn.f32 %f811, %f810, %f128, %f809; - mov.f32 %f812, 0fBBACA3B3; - fma.rn.f32 %f813, %f811, %f128, %f812; - mov.f32 %f814, 0f3D0A7445; - fma.rn.f32 %f815, %f813, %f128, %f814; - mov.f32 %f816, 0fBE1B3B75; - fma.rn.f32 %f817, %f815, %f128, %f816; - mov.f32 %f818, 0fBF6B385A; - fma.rn.f32 %f819, %f817, %f128, %f818; - mov.f32 %f820, 0fBFD0316E; - fma.rn.f32 %f821, %f819, %f128, %f820; - mov.f32 %f822, 0fBA031CCE; - fma.rn.f32 %f823, %f821, %f128, %f822; - ex2.approx.ftz.f32 %f824, %f823; - mov.f32 %f825, 0f3F800000; - sub.f32 %f826, %f825, %f824; - mov.b32 %r175, %f826; - setp.ltu.f32 %p60, %f128, 0f407AD445; - selp.b32 %r176, %r175, 1065353216, %p60; - mov.b32 %r177, %f127; - and.b32 %r178, %r177, -2147483648; - or.b32 %r179, %r176, %r178; - mov.b32 %f2174, %r179; - -BB8_55: - sub.f32 %f839, %f2173, %f2174; - mul.f32 %f840, %f839, 0f3F000000; - mul.f32 %f841, %f120, 0f3F000000; - mul.f32 %f842, %f841, %f473; - fma.rn.f32 %f2175, %f842, %f840, %f2175; - add.s32 %r404, %r404, 1; - setp.lt.s32 %p61, %r404, %r30; - @%p61 bra BB8_43; - -BB8_56: - mad.lo.s32 %r180, %r402, %r102, %r401; - add.s32 %r181, %r180, %r1; - mul.wide.s32 %rd62, %r181, 4; - add.s64 %rd63, %rd1, %rd62; - ld.global.f32 %f843, [%rd63]; - sub.f32 %f138, %f843, %f2175; - setp.leu.f32 %p62, %f138, %f2180; - @%p62 bra BB8_58; - - setp.ne.s32 %p63, %r402, %r155; - setp.ne.s32 %p64, %r402, 0; - and.pred %p65, %p63, %p64; - cvt.rn.f32.s32 %f844, %r404; - div.rn.f32 %f845, %f2177, %f844; - sub.f32 %f846, %f95, %f845; - setp.gt.f32 %p66, %f846, 0f00000000; - selp.f32 %f847, 0f3F000000, 0fBF000000, %p66; - div.rn.f32 %f848, %f2176, %f844; - cvt.rn.f32.s32 %f849, %r402; - sub.f32 %f850, %f849, %f848; - setp.gt.f32 %p67, %f850, 0f00000000; - selp.f32 %f851, 0f3F000000, 0fBF000000, %p67; - mul.f32 %f852, %f96, %f847; - sub.f32 %f853, %f95, %f852; - st.local.f32 [%rd8], %f853; - selp.f32 %f854, 0f3F800000, 0fBF800000, %p65; - mul.f32 %f855, %f854, %f851; - sub.f32 %f856, %f849, %f855; - st.local.f32 [%rd9], %f856; - mov.f32 %f2180, %f138; - -BB8_58: - add.s32 %r402, %r402, 1; - setp.lt.s32 %p68, %r402, %r102; - @%p68 bra BB8_41; - - add.s32 %r401, %r401, 1; - setp.lt.s32 %p69, %r401, %r102; - @%p69 bra BB8_40; - bra.uni BB8_60; - -BB8_38: - mov.f32 %f2180, %f86; - -BB8_60: - setp.eq.f32 %p70, %f2180, %f86; - selp.u32 %r405, 1, 0, %p70; - -BB8_62: - setp.ne.s32 %p71, %r405, 0; - @%p71 bra BB8_206; - - setp.lt.s32 %p72, %r103, 1; - @%p72 bra BB8_64; - - mov.u32 %r406, 0; - mov.f32 %f361, %f82; - -BB8_66: + selp.f32 %f666, %f161, %f665, %p145; + selp.f32 %f667, 0f3789CA3C, 0f38B1E96A, %p145; + selp.f32 %f668, 0fB9F560B9, 0fBA574D20, %p145; + fma.rn.f32 %f669, %f667, %f666, %f668; + selp.f32 %f670, 0f3BAC840B, 0f3BAAD5EA, %p145; + fma.rn.f32 %f671, %f669, %f666, %f670; + selp.f32 %f672, 0fBD0C8162, 0fBCDC1BE7, %p145; + fma.rn.f32 %f673, %f671, %f666, %f672; + selp.f32 %f674, 0f3E1CF906, 0f3DE718AF, %p145; + fma.rn.f32 %f675, %f673, %f666, %f674; + selp.f32 %f676, 0f3F6A937E, 0fBEC093AC, %p145; + fma.rn.f32 %f677, %f675, %f666, %f676; + selp.f32 %f678, 0f3F20D842, 0f3E0375D3, %p145; + fma.rn.f32 %f679, %f677, %f666, %f678; + neg.f32 %f680, %f161; + selp.f32 %f681, %f680, %f664, %p145; + fma.rn.f32 %f162, %f679, %f681, %f681; + mov.b32 %r312, %f664; + and.b32 %r71, %r312, -2147483648; + add.f32 %f163, %f662, 0fBF000000; + mul.f32 %f682, %f163, %f160; + abs.f32 %f164, %f682; + setp.ge.f32 %p146, %f164, 0f3F8060FE; + mul.f32 %f683, %f682, %f682; + selp.f32 %f684, %f164, %f683, %p146; + selp.f32 %f685, 0f3789CA3C, 0f38B1E96A, %p146; + selp.f32 %f686, 0fB9F560B9, 0fBA574D20, %p146; + fma.rn.f32 %f687, %f685, %f684, %f686; + selp.f32 %f688, 0f3BAC840B, 0f3BAAD5EA, %p146; + fma.rn.f32 %f689, %f687, %f684, %f688; + selp.f32 %f690, 0fBD0C8162, 0fBCDC1BE7, %p146; + fma.rn.f32 %f691, %f689, %f684, %f690; + selp.f32 %f692, 0f3E1CF906, 0f3DE718AF, %p146; + fma.rn.f32 %f693, %f691, %f684, %f692; + selp.f32 %f694, 0f3F6A937E, 0fBEC093AC, %p146; + fma.rn.f32 %f695, %f693, %f684, %f694; + selp.f32 %f696, 0f3F20D842, 0f3E0375D3, %p146; + fma.rn.f32 %f697, %f695, %f684, %f696; + neg.f32 %f698, %f164; + selp.f32 %f699, %f698, %f682, %p146; + fma.rn.f32 %f165, %f697, %f699, %f699; + mov.b32 %r313, %f682; + and.b32 %r72, %r313, -2147483648; + add.f32 %f700, %f159, 0f3F000000; + sub.f32 %f166, %f700, %f1774; + div.rn.f32 %f167, %f166, %f378; + cvt.rzi.f32.f32 %f702, %f1611; + add.f32 %f703, %f702, %f702; + mov.f32 %f704, 0f40000000; + sub.f32 %f705, %f704, %f703; + abs.f32 %f168, %f705; + setp.eq.f32 %p147, %f168, 0f3F800000; + abs.f32 %f169, %f167; + setp.lt.f32 %p148, %f169, 0f00800000; + mul.f32 %f706, %f169, 0f4B800000; + selp.f32 %f707, %f706, %f169, %p148; + selp.f32 %f708, 0fC3170000, 0fC2FE0000, %p148; + mov.b32 %r314, %f707; + and.b32 %r315, %r314, 8388607; + or.b32 %r316, %r315, 1065353216; + mov.b32 %f709, %r316; + shr.u32 %r317, %r314, 23; + cvt.rn.f32.u32 %f710, %r317; + add.f32 %f711, %f708, %f710; + setp.gt.f32 %p149, %f709, 0f3FB504F3; + mul.f32 %f712, %f709, 0f3F000000; + add.f32 %f713, %f711, 0f3F800000; + selp.f32 %f714, %f713, %f711, %p149; + selp.f32 %f715, %f712, %f709, %p149; + add.f32 %f716, %f715, 0fBF800000; + add.f32 %f717, %f715, 0f3F800000; + rcp.approx.ftz.f32 %f718, %f717; + add.f32 %f719, %f716, %f716; + mul.f32 %f720, %f719, %f718; + mul.f32 %f721, %f720, %f720; + mov.f32 %f722, 0f3C4CAF63; + mov.f32 %f723, 0f3B18F0FE; + fma.rn.f32 %f724, %f723, %f721, %f722; + mov.f32 %f725, 0f3DAAAABD; + fma.rn.f32 %f726, %f724, %f721, %f725; + mul.rn.f32 %f727, %f726, %f721; + mul.rn.f32 %f728, %f727, %f720; + sub.f32 %f729, %f716, %f720; + add.f32 %f730, %f729, %f729; + neg.f32 %f731, %f720; + fma.rn.f32 %f732, %f731, %f716, %f730; + mul.rn.f32 %f733, %f718, %f732; + add.f32 %f734, %f728, %f720; + sub.f32 %f735, %f720, %f734; + add.f32 %f736, %f728, %f735; + add.f32 %f737, %f733, %f736; + add.f32 %f738, %f734, %f737; + sub.f32 %f739, %f734, %f738; + add.f32 %f740, %f737, %f739; + mov.f32 %f741, 0f3F317200; + mul.rn.f32 %f742, %f714, %f741; + mov.f32 %f743, 0f35BFBE8E; + mul.rn.f32 %f744, %f714, %f743; + add.f32 %f745, %f742, %f738; + sub.f32 %f746, %f742, %f745; + add.f32 %f747, %f738, %f746; + add.f32 %f748, %f740, %f747; + add.f32 %f749, %f744, %f748; + add.f32 %f750, %f745, %f749; + sub.f32 %f751, %f745, %f750; + add.f32 %f752, %f749, %f751; + mul.rn.f32 %f753, %f704, %f750; + neg.f32 %f754, %f753; + fma.rn.f32 %f755, %f704, %f750, %f754; + fma.rn.f32 %f756, %f704, %f752, %f755; + fma.rn.f32 %f758, %f1610, %f750, %f756; + add.rn.f32 %f759, %f753, %f758; + neg.f32 %f760, %f759; + add.rn.f32 %f761, %f753, %f760; + add.rn.f32 %f762, %f761, %f758; + mov.b32 %r318, %f759; + setp.eq.s32 %p150, %r318, 1118925336; + add.s32 %r319, %r318, -1; + mov.b32 %f763, %r319; + add.f32 %f764, %f762, 0f37000000; + selp.f32 %f170, %f764, %f762, %p150; + selp.f32 %f765, %f763, %f759, %p150; + mov.f32 %f766, 0f3FB8AA3B; + mul.rn.f32 %f767, %f765, %f766; + cvt.rzi.f32.f32 %f768, %f767; + abs.f32 %f769, %f768; + setp.gt.f32 %p151, %f769, 0f42FC0000; + mov.b32 %r320, %f768; + and.b32 %r321, %r320, -2147483648; + or.b32 %r322, %r321, 1123811328; + mov.b32 %f770, %r322; + selp.f32 %f771, %f770, %f768, %p151; + mov.f32 %f772, 0fBF317218; + fma.rn.f32 %f773, %f771, %f772, %f765; + mov.f32 %f774, 0f3102E308; + fma.rn.f32 %f775, %f771, %f774, %f773; + mul.f32 %f776, %f775, 0f3FB8AA3B; + add.f32 %f777, %f771, 0f4B40007F; + mov.b32 %r323, %f777; + shl.b32 %r324, %r323, 23; + mov.b32 %f778, %r324; + ex2.approx.ftz.f32 %f779, %f776; + mul.f32 %f171, %f779, %f778; + setp.lt.f32 %p152, %f167, 0f00000000; + and.pred %p5, %p152, %p147; + add.f32 %f780, %f167, %f167; + selp.f32 %f172, %f780, 0f00000000, %p147; + div.rn.f32 %f173, %f163, %f378; + abs.f32 %f174, %f173; + setp.lt.f32 %p153, %f174, 0f00800000; + mul.f32 %f782, %f174, 0f4B800000; + selp.f32 %f783, %f782, %f174, %p153; + selp.f32 %f784, 0fC3170000, 0fC2FE0000, %p153; + mov.b32 %r325, %f783; + and.b32 %r326, %r325, 8388607; + or.b32 %r327, %r326, 1065353216; + mov.b32 %f785, %r327; + shr.u32 %r328, %r325, 23; + cvt.rn.f32.u32 %f786, %r328; + add.f32 %f787, %f784, %f786; + setp.gt.f32 %p154, %f785, 0f3FB504F3; + mul.f32 %f788, %f785, 0f3F000000; + add.f32 %f789, %f787, 0f3F800000; + selp.f32 %f790, %f789, %f787, %p154; + selp.f32 %f791, %f788, %f785, %p154; + add.f32 %f792, %f791, 0fBF800000; + add.f32 %f793, %f791, 0f3F800000; + rcp.approx.ftz.f32 %f794, %f793; + add.f32 %f795, %f792, %f792; + mul.f32 %f796, %f795, %f794; + mul.f32 %f797, %f796, %f796; + fma.rn.f32 %f798, %f723, %f797, %f722; + fma.rn.f32 %f799, %f798, %f797, %f725; + mul.rn.f32 %f800, %f799, %f797; + mul.rn.f32 %f801, %f800, %f796; + sub.f32 %f802, %f792, %f796; + add.f32 %f803, %f802, %f802; + neg.f32 %f804, %f796; + fma.rn.f32 %f805, %f804, %f792, %f803; + mul.rn.f32 %f806, %f794, %f805; + add.f32 %f807, %f801, %f796; + sub.f32 %f808, %f796, %f807; + add.f32 %f809, %f801, %f808; + add.f32 %f810, %f806, %f809; + add.f32 %f811, %f807, %f810; + sub.f32 %f812, %f807, %f811; + add.f32 %f813, %f810, %f812; + mul.rn.f32 %f814, %f790, %f741; + mul.rn.f32 %f815, %f790, %f743; + add.f32 %f816, %f814, %f811; + sub.f32 %f817, %f814, %f816; + add.f32 %f818, %f811, %f817; + add.f32 %f819, %f813, %f818; + add.f32 %f820, %f815, %f819; + add.f32 %f821, %f816, %f820; + sub.f32 %f822, %f816, %f821; + add.f32 %f823, %f820, %f822; + mul.rn.f32 %f824, %f704, %f821; + neg.f32 %f825, %f824; + fma.rn.f32 %f826, %f704, %f821, %f825; + fma.rn.f32 %f827, %f704, %f823, %f826; + fma.rn.f32 %f828, %f1610, %f821, %f827; + add.rn.f32 %f829, %f824, %f828; + neg.f32 %f830, %f829; + add.rn.f32 %f831, %f824, %f830; + add.rn.f32 %f832, %f831, %f828; + mov.b32 %r329, %f829; + setp.eq.s32 %p155, %r329, 1118925336; + add.s32 %r330, %r329, -1; + mov.b32 %f833, %r330; + add.f32 %f834, %f832, 0f37000000; + selp.f32 %f175, %f834, %f832, %p155; + selp.f32 %f835, %f833, %f829, %p155; + mul.rn.f32 %f836, %f835, %f766; + cvt.rzi.f32.f32 %f837, %f836; + abs.f32 %f838, %f837; + setp.gt.f32 %p156, %f838, 0f42FC0000; + mov.b32 %r331, %f837; + and.b32 %r332, %r331, -2147483648; + or.b32 %r333, %r332, 1123811328; + mov.b32 %f839, %r333; + selp.f32 %f840, %f839, %f837, %p156; + fma.rn.f32 %f841, %f840, %f772, %f835; + fma.rn.f32 %f842, %f840, %f774, %f841; + mul.f32 %f843, %f842, 0f3FB8AA3B; + add.f32 %f844, %f840, 0f4B40007F; + mov.b32 %r334, %f844; + shl.b32 %r335, %r334, 23; + mov.b32 %f845, %r335; + ex2.approx.ftz.f32 %f846, %f843; + mul.f32 %f176, %f846, %f845; + setp.lt.f32 %p157, %f173, 0f00000000; + and.pred %p6, %p157, %p147; + add.f32 %f847, %f173, %f173; + selp.f32 %f179, %f847, 0f00000000, %p147; + mov.f64 %fd215, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r336}, %fd215; + } + and.b32 %r337, %r336, 2146435072; + setp.eq.s32 %p158, %r337, 1073741824; + abs.f64 %fd216, %fd35; + { // callseq 177, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd216; + .param .b64 param1; + st.param.f64 [param1+0], %fd215; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd48, [retval0+0]; + } // callseq 177 + { + .reg .b32 %temp; + mov.b64 {%temp, %r75}, %fd35; + } + setp.lt.s32 %p159, %r75, 0; + and.pred %p7, %p159, %p158; + selp.b32 %r338, %r75, 0, %p158; + setp.lt.s32 %p160, %r336, 0; + or.b32 %r339, %r338, 2146435072; + selp.b32 %r76, %r339, %r338, %p160; + { + .reg .b32 %temp; + mov.b64 {%temp, %r340}, %fd36; + } + and.b32 %r77, %r340, 2146435072; + setp.ne.s32 %p161, %r77, 2146435072; + setp.gtu.f64 %p162, %fd216, 0d7FF0000000000000; + and.b32 %r78, %r336, 2147483647; + setp.gt.f64 %p163, %fd216, 0d3FF0000000000000; + selp.b32 %r341, 2146435072, 0, %p163; + xor.b32 %r342, %r341, 2146435072; + selp.b32 %r343, %r342, %r341, %p160; + setp.eq.f32 %p164, %f378, 0fBF800000; + selp.b32 %r79, 1072693248, %r343, %p164; + setp.gt.s32 %p165, %r336, -1; + selp.b32 %r344, 2146435072, 0, %p165; + setp.ne.s32 %p166, %r78, 1071644672; + and.pred %p167, %p166, %p7; + or.b32 %r345, %r344, -2147483648; + selp.b32 %r81, %r345, %r344, %p167; + { + .reg .b32 %temp; + mov.b64 {%temp, %r82}, %fd207; + } + and.b32 %r84, %r82, 2147483647; + setp.gt.s32 %p168, %r82, -1; + selp.b32 %r85, 2146435072, 0, %p168; + or.b32 %r86, %r85, -2147483648; + or.pred %p10, %p161, %p162; + +$L__BB8_113: + mov.u32 %r641, 0; + mad.lo.s32 %r347, %r640, %r151, %r639; + add.s32 %r348, %r347, %r1; + mul.wide.s32 %rd85, %r348, 4; + add.s64 %rd86, %rd1, %rd85; + ld.global.f32 %f186, [%rd86]; + cvt.rn.f32.s32 %f187, %r640; + mov.f32 %f1752, %f1781; + +$L__BB8_114: + cvt.rn.f32.s32 %f1612, %r639; + mul.wide.s32 %rd87, %r641, 4; + add.s64 %rd88, %rd6, %rd87; + add.s64 %rd89, %rd7, %rd87; + ld.local.f32 %f189, [%rd89]; + ld.local.f32 %f849, [%rd88]; + sub.f32 %f190, %f1612, %f849; + add.f32 %f850, %f190, 0f3F000000; + mul.f32 %f191, %f850, %f160; + abs.f32 %f851, %f191; + setp.ltu.f32 %p169, %f851, 0f3F8060FE; + setp.ge.f32 %p170, %f851, 0f3F8060FE; + mul.f32 %f852, %f191, %f191; + selp.f32 %f853, %f851, %f852, %p170; + selp.f32 %f854, 0f3789CA3C, 0f38B1E96A, %p170; + selp.f32 %f855, 0fB9F560B9, 0fBA574D20, %p170; + fma.rn.f32 %f856, %f854, %f853, %f855; + selp.f32 %f857, 0f3BAC840B, 0f3BAAD5EA, %p170; + fma.rn.f32 %f858, %f856, %f853, %f857; + selp.f32 %f859, 0fBD0C8162, 0fBCDC1BE7, %p170; + fma.rn.f32 %f860, %f858, %f853, %f859; + selp.f32 %f861, 0f3E1CF906, 0f3DE718AF, %p170; + fma.rn.f32 %f862, %f860, %f853, %f861; + selp.f32 %f863, 0f3F6A937E, 0fBEC093AC, %p170; + fma.rn.f32 %f864, %f862, %f853, %f863; + selp.f32 %f865, 0f3F20D842, 0f3E0375D3, %p170; + fma.rn.f32 %f866, %f864, %f853, %f865; + neg.f32 %f867, %f851; + selp.f32 %f868, %f867, %f191, %p170; + fma.rn.f32 %f1753, %f866, %f868, %f868; + @%p169 bra $L__BB8_116; + + mov.f32 %f1663, 0f3F800000; + ex2.approx.ftz.f32 %f869, %f1753; + sub.f32 %f871, %f1663, %f869; + mov.b32 %r349, %f871; + mov.b32 %r350, %f191; + and.b32 %r351, %r350, -2147483648; + or.b32 %r352, %r351, %r349; + mov.b32 %f1753, %r352; + +$L__BB8_116: + add.f32 %f872, %f190, 0fBF000000; + mul.f32 %f195, %f872, %f160; + abs.f32 %f873, %f195; + setp.ltu.f32 %p171, %f873, 0f3F8060FE; + setp.ge.f32 %p172, %f873, 0f3F8060FE; + mul.f32 %f874, %f195, %f195; + selp.f32 %f875, %f873, %f874, %p172; + selp.f32 %f876, 0f3789CA3C, 0f38B1E96A, %p172; + selp.f32 %f877, 0fB9F560B9, 0fBA574D20, %p172; + fma.rn.f32 %f878, %f876, %f875, %f877; + selp.f32 %f879, 0f3BAC840B, 0f3BAAD5EA, %p172; + fma.rn.f32 %f880, %f878, %f875, %f879; + selp.f32 %f881, 0fBD0C8162, 0fBCDC1BE7, %p172; + fma.rn.f32 %f882, %f880, %f875, %f881; + selp.f32 %f883, 0f3E1CF906, 0f3DE718AF, %p172; + fma.rn.f32 %f884, %f882, %f875, %f883; + selp.f32 %f885, 0f3F6A937E, 0fBEC093AC, %p172; + fma.rn.f32 %f886, %f884, %f875, %f885; + selp.f32 %f887, 0f3F20D842, 0f3E0375D3, %p172; + fma.rn.f32 %f888, %f886, %f875, %f887; + neg.f32 %f889, %f873; + selp.f32 %f890, %f889, %f195, %p172; + fma.rn.f32 %f1754, %f888, %f890, %f890; + @%p171 bra $L__BB8_118; + + mov.f32 %f1662, 0f3F800000; + ex2.approx.ftz.f32 %f891, %f1754; + sub.f32 %f893, %f1662, %f891; + mov.b32 %r353, %f893; + mov.b32 %r354, %f195; + and.b32 %r355, %r354, -2147483648; + or.b32 %r356, %r355, %r353; + mov.b32 %f1754, %r356; + +$L__BB8_118: + sub.f32 %f199, %f1753, %f1754; + sub.f32 %f200, %f187, %f189; + add.f32 %f894, %f200, 0f3F000000; + mul.f32 %f201, %f894, %f160; + abs.f32 %f895, %f201; + setp.ltu.f32 %p173, %f895, 0f3F8060FE; + setp.ge.f32 %p174, %f895, 0f3F8060FE; + mul.f32 %f896, %f201, %f201; + selp.f32 %f897, %f895, %f896, %p174; + selp.f32 %f898, 0f3789CA3C, 0f38B1E96A, %p174; + selp.f32 %f899, 0fB9F560B9, 0fBA574D20, %p174; + fma.rn.f32 %f900, %f898, %f897, %f899; + selp.f32 %f901, 0f3BAC840B, 0f3BAAD5EA, %p174; + fma.rn.f32 %f902, %f900, %f897, %f901; + selp.f32 %f903, 0fBD0C8162, 0fBCDC1BE7, %p174; + fma.rn.f32 %f904, %f902, %f897, %f903; + selp.f32 %f905, 0f3E1CF906, 0f3DE718AF, %p174; + fma.rn.f32 %f906, %f904, %f897, %f905; + selp.f32 %f907, 0f3F6A937E, 0fBEC093AC, %p174; + fma.rn.f32 %f908, %f906, %f897, %f907; + selp.f32 %f909, 0f3F20D842, 0f3E0375D3, %p174; + fma.rn.f32 %f910, %f908, %f897, %f909; + neg.f32 %f911, %f895; + selp.f32 %f912, %f911, %f201, %p174; + fma.rn.f32 %f1755, %f910, %f912, %f912; + @%p173 bra $L__BB8_120; + + mov.f32 %f1661, 0f3F800000; + ex2.approx.ftz.f32 %f913, %f1755; + sub.f32 %f915, %f1661, %f913; + mov.b32 %r357, %f915; + mov.b32 %r358, %f201; + and.b32 %r359, %r358, -2147483648; + or.b32 %r360, %r359, %r357; + mov.b32 %f1755, %r360; + +$L__BB8_120: + add.f32 %f916, %f200, 0fBF000000; + mul.f32 %f205, %f916, %f160; + abs.f32 %f917, %f205; + setp.ltu.f32 %p175, %f917, 0f3F8060FE; + setp.ge.f32 %p176, %f917, 0f3F8060FE; + mul.f32 %f918, %f205, %f205; + selp.f32 %f919, %f917, %f918, %p176; + selp.f32 %f920, 0f3789CA3C, 0f38B1E96A, %p176; + selp.f32 %f921, 0fB9F560B9, 0fBA574D20, %p176; + fma.rn.f32 %f922, %f920, %f919, %f921; + selp.f32 %f923, 0f3BAC840B, 0f3BAAD5EA, %p176; + fma.rn.f32 %f924, %f922, %f919, %f923; + selp.f32 %f925, 0fBD0C8162, 0fBCDC1BE7, %p176; + fma.rn.f32 %f926, %f924, %f919, %f925; + selp.f32 %f927, 0f3E1CF906, 0f3DE718AF, %p176; + fma.rn.f32 %f928, %f926, %f919, %f927; + selp.f32 %f929, 0f3F6A937E, 0fBEC093AC, %p176; + fma.rn.f32 %f930, %f928, %f919, %f929; + selp.f32 %f931, 0f3F20D842, 0f3E0375D3, %p176; + fma.rn.f32 %f932, %f930, %f919, %f931; + neg.f32 %f933, %f917; + selp.f32 %f934, %f933, %f205, %p176; + fma.rn.f32 %f1756, %f932, %f934, %f934; + @%p175 bra $L__BB8_122; + + mov.f32 %f1660, 0f3F800000; + ex2.approx.ftz.f32 %f935, %f1756; + sub.f32 %f937, %f1660, %f935; + mov.b32 %r361, %f937; + mov.b32 %r362, %f205; + and.b32 %r363, %r362, -2147483648; + or.b32 %r364, %r363, %r361; + mov.b32 %f1756, %r364; + +$L__BB8_122: + sub.f32 %f938, %f1755, %f1756; + mul.f32 %f939, %f938, 0f3F000000; + mul.f32 %f940, %f199, 0f3F000000; + mul.f32 %f941, %f940, %f379; + fma.rn.f32 %f1752, %f941, %f939, %f1752; + add.s32 %r641, %r641, 1; + setp.lt.u32 %p177, %r641, %r628; + @%p177 bra $L__BB8_114; + + setp.leu.f32 %p178, %f1752, 0f3C23D70A; + mov.f32 %f1758, 0f00000000; + mov.f32 %f1757, %f1758; + @%p178 bra $L__BB8_125; + + div.rn.f32 %f943, %f186, %f1752; + add.f32 %f1757, %f943, 0fBF800000; + +$L__BB8_125: + @%p178 bra $L__BB8_140; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r609}, %fd207; + } + and.b32 %r608, %r609, 2146435072; + setp.eq.s32 %p180, %r608, 1062207488; + cvt.f64.f32 %fd49, %f1752; + { + .reg .b32 %temp; + mov.b64 {%temp, %r90}, %fd49; + } + abs.f64 %fd50, %fd49; + { // callseq 178, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd50; + .param .b64 param1; + st.param.f64 [param1+0], %fd207; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd383, [retval0+0]; + } // callseq 178 + setp.lt.s32 %p181, %r90, 0; + and.pred %p11, %p181, %p180; + not.pred %p182, %p11; + @%p182 bra $L__BB8_128; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r365}, %fd383; + } + xor.b32 %r366, %r365, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r367, %temp}, %fd383; + } + mov.b64 %fd383, {%r367, %r366}; + +$L__BB8_128: + setp.eq.f32 %p183, %f1752, 0f00000000; + @%p183 bra $L__BB8_132; + bra.uni $L__BB8_129; + +$L__BB8_132: + { + .reg .b32 %temp; + mov.b64 {%temp, %r611}, %fd207; + } + setp.lt.s32 %p186, %r611, 0; + mov.u32 %r368, 0; + selp.b32 %r369, %r90, 0, %p180; + or.b32 %r370, %r369, 2146435072; + selp.b32 %r371, %r370, %r369, %p186; + mov.b64 %fd383, {%r368, %r371}; + bra.uni $L__BB8_133; + +$L__BB8_129: + setp.gt.s32 %p184, %r90, -1; + @%p184 bra $L__BB8_133; + + cvt.rzi.f64.f64 %fd220, %fd207; + setp.eq.f64 %p185, %fd220, 0d4000000000000000; + @%p185 bra $L__BB8_133; + + mov.f64 %fd383, 0dFFF8000000000000; + +$L__BB8_133: + add.f64 %fd56, %fd49, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r372}, %fd56; + } + and.b32 %r373, %r372, 2146435072; + setp.ne.s32 %p188, %r373, 2146435072; + mov.f64 %fd384, %fd383; + @%p188 bra $L__BB8_139; + + setp.gtu.f64 %p189, %fd50, 0d7FF0000000000000; + mov.f64 %fd384, %fd56; + @%p189 bra $L__BB8_139; + + setp.eq.s32 %p190, %r84, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r374, %temp}, %fd207; + } + setp.eq.s32 %p191, %r374, 0; + and.pred %p192, %p190, %p191; + @%p192 bra $L__BB8_138; + bra.uni $L__BB8_136; + +$L__BB8_138: + { + .reg .b32 %temp; + mov.b64 {%temp, %r610}, %fd207; + } + setp.lt.s32 %p198, %r610, 0; + mov.u32 %r379, 0; + setp.gt.f64 %p199, %fd50, 0d3FF0000000000000; + selp.b32 %r380, 2146435072, 0, %p199; + xor.b32 %r381, %r380, 2146435072; + selp.b32 %r382, %r381, %r380, %p198; + setp.eq.f32 %p200, %f1752, 0fBF800000; + selp.b32 %r383, 1072693248, %r382, %p200; + mov.b64 %fd384, {%r379, %r383}; + bra.uni $L__BB8_139; + +$L__BB8_136: + { + .reg .b32 %temp; + mov.b64 {%r375, %temp}, %fd49; + } + and.b32 %r376, %r90, 2147483647; + setp.ne.s32 %p193, %r376, 2146435072; + setp.ne.s32 %p194, %r375, 0; + or.pred %p195, %p193, %p194; + mov.f64 %fd384, %fd383; + @%p195 bra $L__BB8_139; + + setp.ne.s32 %p196, %r84, 1071644672; + and.pred %p197, %p196, %p11; + selp.b32 %r377, %r86, %r85, %p197; + mov.u32 %r378, 0; + mov.b64 %fd384, {%r378, %r377}; + +$L__BB8_139: + setp.eq.f32 %p201, %f1752, 0f3F800000; + selp.f64 %fd223, 0d3FF0000000000000, %fd384, %p201; + cvt.f64.f32 %fd224, %f186; + div.rn.f64 %fd225, %fd224, %fd223; + cvt.rn.f32.f64 %f1758, %fd225; + +$L__BB8_140: + cvt.rn.f32.s32 %f1617, %r639; + sub.f32 %f1616, %f1617, %f1774; + add.f32 %f1615, %f1616, 0f3F000000; + mul.f32 %f1614, %f1615, %f160; + abs.f32 %f1613, %f1614; + mov.f32 %f945, 0f47C35000; + min.f32 %f214, %f1757, %f945; + setp.ltu.f32 %p202, %f1613, 0f3F8060FE; + mov.f32 %f1759, %f162; + @%p202 bra $L__BB8_142; + + mov.f32 %f1659, 0f3F800000; + ex2.approx.ftz.f32 %f946, %f162; + sub.f32 %f948, %f1659, %f946; + mov.b32 %r384, %f948; + or.b32 %r385, %r71, %r384; + mov.b32 %f1759, %r385; + +$L__BB8_142: + cvt.rn.f32.s32 %f1622, %r639; + sub.f32 %f1621, %f1622, %f1774; + add.f32 %f1620, %f1621, 0fBF000000; + mul.f32 %f1619, %f1620, %f160; + abs.f32 %f1618, %f1619; + setp.ltu.f32 %p203, %f1618, 0f3F8060FE; + mov.f32 %f1760, %f165; + @%p203 bra $L__BB8_144; + + mov.f32 %f1658, 0f3F800000; + ex2.approx.ftz.f32 %f949, %f165; + sub.f32 %f951, %f1658, %f949; + mov.b32 %r386, %f951; + or.b32 %r387, %r72, %r386; + mov.b32 %f1760, %r387; + +$L__BB8_144: + sub.f32 %f219, %f1759, %f1760; + sub.f32 %f220, %f187, %f1773; + add.f32 %f952, %f220, 0f3F000000; + mul.f32 %f221, %f952, %f160; + abs.f32 %f953, %f221; + setp.ltu.f32 %p204, %f953, 0f3F8060FE; + setp.ge.f32 %p205, %f953, 0f3F8060FE; + mul.f32 %f954, %f221, %f221; + selp.f32 %f955, %f953, %f954, %p205; + selp.f32 %f956, 0f3789CA3C, 0f38B1E96A, %p205; + selp.f32 %f957, 0fB9F560B9, 0fBA574D20, %p205; + fma.rn.f32 %f958, %f956, %f955, %f957; + selp.f32 %f959, 0f3BAC840B, 0f3BAAD5EA, %p205; + fma.rn.f32 %f960, %f958, %f955, %f959; + selp.f32 %f961, 0fBD0C8162, 0fBCDC1BE7, %p205; + fma.rn.f32 %f962, %f960, %f955, %f961; + selp.f32 %f963, 0f3E1CF906, 0f3DE718AF, %p205; + fma.rn.f32 %f964, %f962, %f955, %f963; + selp.f32 %f965, 0f3F6A937E, 0fBEC093AC, %p205; + fma.rn.f32 %f966, %f964, %f955, %f965; + selp.f32 %f967, 0f3F20D842, 0f3E0375D3, %p205; + fma.rn.f32 %f968, %f966, %f955, %f967; + neg.f32 %f969, %f953; + selp.f32 %f970, %f969, %f221, %p205; + fma.rn.f32 %f1761, %f968, %f970, %f970; + @%p204 bra $L__BB8_146; + + mov.f32 %f1657, 0f3F800000; + ex2.approx.ftz.f32 %f971, %f1761; + sub.f32 %f973, %f1657, %f971; + mov.b32 %r388, %f973; + mov.b32 %r389, %f221; + and.b32 %r390, %r389, -2147483648; + or.b32 %r391, %r390, %r388; + mov.b32 %f1761, %r391; + +$L__BB8_146: + cvt.rn.f32.s32 %f1624, %r640; + sub.f32 %f1623, %f1624, %f1773; + add.f32 %f225, %f1623, 0fBF000000; + mul.f32 %f226, %f225, %f160; + abs.f32 %f974, %f226; + setp.ltu.f32 %p206, %f974, 0f3F8060FE; + setp.ge.f32 %p207, %f974, 0f3F8060FE; + mul.f32 %f975, %f226, %f226; + selp.f32 %f976, %f974, %f975, %p207; + selp.f32 %f977, 0f3789CA3C, 0f38B1E96A, %p207; + selp.f32 %f978, 0fB9F560B9, 0fBA574D20, %p207; + fma.rn.f32 %f979, %f977, %f976, %f978; + selp.f32 %f980, 0f3BAC840B, 0f3BAAD5EA, %p207; + fma.rn.f32 %f981, %f979, %f976, %f980; + selp.f32 %f982, 0fBD0C8162, 0fBCDC1BE7, %p207; + fma.rn.f32 %f983, %f981, %f976, %f982; + selp.f32 %f984, 0f3E1CF906, 0f3DE718AF, %p207; + fma.rn.f32 %f985, %f983, %f976, %f984; + selp.f32 %f986, 0f3F6A937E, 0fBEC093AC, %p207; + fma.rn.f32 %f987, %f985, %f976, %f986; + selp.f32 %f988, 0f3F20D842, 0f3E0375D3, %p207; + fma.rn.f32 %f989, %f987, %f976, %f988; + neg.f32 %f990, %f974; + selp.f32 %f991, %f990, %f226, %p207; + fma.rn.f32 %f1762, %f989, %f991, %f991; + @%p206 bra $L__BB8_148; + + mov.f32 %f1656, 0f3F800000; + ex2.approx.ftz.f32 %f992, %f1762; + sub.f32 %f994, %f1656, %f992; + mov.b32 %r392, %f994; + mov.b32 %r393, %f226; + and.b32 %r394, %r393, -2147483648; + or.b32 %r395, %r394, %r392; + mov.b32 %f1762, %r395; + +$L__BB8_148: + sub.f32 %f996, %f1761, %f1762; + mul.f32 %f230, %f996, 0f3F000000; + setp.eq.f32 %p208, %f171, 0f7F800000; + mov.f32 %f1763, 0f7F800000; + @%p208 bra $L__BB8_150; + + fma.rn.f32 %f1763, %f171, %f170, %f171; + +$L__BB8_150: + setp.geu.f32 %p405, %f167, 0f00000000; + mov.b32 %r396, %f1763; + xor.b32 %r397, %r396, -2147483648; + mov.b32 %f997, %r397; + selp.f32 %f233, %f997, %f1763, %p5; + setp.eq.f32 %p209, %f167, 0f00000000; + selp.f32 %f1764, %f172, %f233, %p209; + @%p405 bra $L__BB8_153; + + mov.f32 %f1625, 0f40000000; + cvt.rzi.f32.f32 %f999, %f1625; + setp.eq.f32 %p210, %f999, 0f40000000; + mov.f32 %f1764, %f233; + @%p210 bra $L__BB8_153; + + mov.f32 %f1764, 0f7FFFFFFF; + +$L__BB8_153: + abs.f32 %f1630, %f167; + mov.f32 %f1629, 0f3FB8AA3B; + add.f32 %f1628, %f1630, 0f40000000; + mov.b32 %r599, %f1628; + selp.f32 %f1627, 0fFF800000, 0f7F800000, %p5; + add.f32 %f1626, %f167, 0f40000000; + setp.gtu.f32 %p211, %f1630, 0f7F800000; + mov.f32 %f1765, 0f7F800000; + selp.f32 %f1002, %f1626, %f1764, %p211; + setp.neu.f32 %p212, %f1630, 0f7F800000; + selp.f32 %f1003, %f1002, %f1627, %p212; + setp.gt.s32 %p213, %r599, 2139095039; + selp.f32 %f1004, %f1003, %f1764, %p213; + mul.f32 %f1005, %f1004, 0fBF000000; + setp.eq.f32 %p214, %f167, 0f3F800000; + selp.f32 %f1006, 0fBF000000, %f1005, %p214; + mov.f32 %f1008, 0f3BBB989D; + fma.rn.f32 %f1009, %f1006, %f1008, %f413; + mov.f32 %f1011, 0f437C0000; + cvt.sat.f32.f32 %f1012, %f1009; + mov.f32 %f1013, 0f4B400001; + fma.rm.f32 %f1014, %f1012, %f1011, %f1013; + add.f32 %f1015, %f1014, 0fCB40007F; + neg.f32 %f1016, %f1015; + fma.rn.f32 %f1017, %f1006, %f1629, %f1016; + mov.f32 %f1018, 0f32A57060; + fma.rn.f32 %f1019, %f1006, %f1018, %f1017; + mov.b32 %r398, %f1014; + shl.b32 %r399, %r398, 23; + mov.b32 %f1020, %r399; + ex2.approx.ftz.f32 %f1021, %f1019; + mul.f32 %f236, %f1021, %f1020; + setp.eq.f32 %p215, %f176, 0f7F800000; + @%p215 bra $L__BB8_155; + + fma.rn.f32 %f1765, %f176, %f175, %f176; + +$L__BB8_155: + setp.geu.f32 %p406, %f173, 0f00000000; + mov.b32 %r400, %f1765; + xor.b32 %r401, %r400, -2147483648; + mov.b32 %f1022, %r401; + selp.f32 %f239, %f1022, %f1765, %p6; + setp.eq.f32 %p216, %f173, 0f00000000; + selp.f32 %f1766, %f179, %f239, %p216; + @%p406 bra $L__BB8_158; + + mov.f32 %f1631, 0f40000000; + cvt.rzi.f32.f32 %f1024, %f1631; + setp.eq.f32 %p217, %f1024, 0f40000000; + mov.f32 %f1766, %f239; + @%p217 bra $L__BB8_158; + + mov.f32 %f1766, 0f7FFFFFFF; + +$L__BB8_158: + abs.f32 %f1640, %f173; + mov.f32 %f1639, 0f32A57060; + mov.f32 %f1638, 0f4B400001; + mov.f32 %f1637, 0f437C0000; + mov.f32 %f1636, 0f3BBB989D; + add.f32 %f1635, %f1640, 0f40000000; + mov.b32 %r600, %f1635; + selp.f32 %f1634, 0fFF800000, 0f7F800000, %p6; + add.f32 %f1633, %f173, 0f40000000; + mov.f32 %f1632, 0f3FB8AA3B; + setp.gtu.f32 %p218, %f1640, 0f7F800000; + selp.f32 %f1026, %f1633, %f1766, %p218; + setp.neu.f32 %p219, %f1640, 0f7F800000; + selp.f32 %f1027, %f1026, %f1634, %p219; + setp.gt.s32 %p220, %r600, 2139095039; + selp.f32 %f1028, %f1027, %f1766, %p220; + mul.f32 %f1029, %f1028, 0fBF000000; + setp.eq.f32 %p221, %f173, 0f3F800000; + selp.f32 %f1030, 0fBF000000, %f1029, %p221; + fma.rn.f32 %f1033, %f1030, %f1636, %f413; + cvt.sat.f32.f32 %f1036, %f1033; + fma.rm.f32 %f1038, %f1036, %f1637, %f1638; + add.f32 %f1039, %f1038, 0fCB40007F; + neg.f32 %f1040, %f1039; + fma.rn.f32 %f1041, %f1030, %f1632, %f1040; + fma.rn.f32 %f1043, %f1030, %f1639, %f1041; + mov.b32 %r402, %f1038; + shl.b32 %r403, %r402, 23; + mov.b32 %f1044, %r403; + ex2.approx.ftz.f32 %f1045, %f1043; + mul.f32 %f242, %f1045, %f1044; + sub.f32 %f1046, %f236, %f242; + mul.f32 %f1047, %f49, %f1046; + mul.f32 %f243, %f230, %f1047; + not.pred %p222, %p7; + mov.f64 %fd386, %fd48; + @%p222 bra $L__BB8_160; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r404}, %fd48; + } + xor.b32 %r405, %r404, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r406, %temp}, %fd48; + } + mov.b64 %fd386, {%r406, %r405}; + +$L__BB8_160: + setp.eq.f32 %p223, %f378, 0f00000000; + @%p223 bra $L__BB8_164; + bra.uni $L__BB8_161; + +$L__BB8_164: mov.u32 %r407, 0; - mov.f32 %f2189, 0f00000000; - mov.f32 %f2190, %f2189; - @%p8 bra BB8_89; - -BB8_67: - mov.u32 %r408, 0; - cvt.rn.f32.s32 %f153, %r407; - -BB8_68: - setp.lt.s32 %p74, %r29, 1; - mov.f32 %f2202, %f361; - @%p74 bra BB8_83; + mov.b64 %fd386, {%r407, %r76}; + bra.uni $L__BB8_165; + +$L__BB8_161: + { + .reg .b32 %temp; + mov.b64 {%temp, %r601}, %fd35; + } + setp.gt.s32 %p224, %r601, -1; + @%p224 bra $L__BB8_165; + + mov.f64 %fd347, 0d4008000000000000; + cvt.rzi.f64.f64 %fd227, %fd347; + setp.eq.f64 %p225, %fd227, 0d4008000000000000; + @%p225 bra $L__BB8_165; + + mov.f64 %fd386, 0dFFF8000000000000; + +$L__BB8_165: + { + .reg .b32 %temp; + mov.b64 {%temp, %r603}, %fd36; + } + and.b32 %r602, %r603, 2146435072; + setp.ne.s32 %p407, %r602, 2146435072; + selp.f64 %fd387, %fd386, %fd36, %p407; + @%p10 bra $L__BB8_170; + + mov.f64 %fd346, 0d4008000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r605}, %fd346; + } + and.b32 %r604, %r605, 2147483647; + setp.eq.s32 %p227, %r604, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r408, %temp}, %fd346; + } + setp.eq.s32 %p228, %r408, 0; + and.pred %p229, %p227, %p228; + @%p229 bra $L__BB8_169; + bra.uni $L__BB8_167; + +$L__BB8_169: + mov.u32 %r411, 0; + mov.b64 %fd387, {%r411, %r79}; + bra.uni $L__BB8_170; + +$L__BB8_167: + { + .reg .b32 %temp; + mov.b64 {%temp, %r607}, %fd35; + } + and.b32 %r606, %r607, 2147483647; + setp.ne.s32 %p230, %r606, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r409, %temp}, %fd35; + } + setp.ne.s32 %p231, %r409, 0; + or.pred %p232, %p230, %p231; + mov.f64 %fd387, %fd386; + @%p232 bra $L__BB8_170; - sqrt.rn.f32 %f158, %f40; - cvt.rn.f32.s32 %f159, %r408; - mov.u32 %r409, 0; - mov.f32 %f2202, %f361; - -BB8_70: - mul.wide.s32 %rd64, %r409, 4; - add.s64 %rd65, %rd2, %rd64; - ld.local.f32 %f2178, [%rd65]; - add.s64 %rd66, %rd6, %rd64; - st.local.f32 [%rd66], %f2178; - add.s64 %rd67, %rd3, %rd64; - ld.local.f32 %f2179, [%rd67]; - add.s64 %rd68, %rd7, %rd64; - st.local.f32 [%rd68], %f2179; - sub.f32 %f163, %f153, %f2178; - add.f32 %f861, %f163, 0f3F800000; - mul.f32 %f164, %f861, %f158; - abs.f32 %f165, %f164; - setp.ltu.f32 %p75, %f165, 0f3F800000; - @%p75 bra BB8_72; - bra.uni BB8_71; - -BB8_72: - mul.f32 %f880, %f164, %f164; - mov.f32 %f881, 0f3BA0C9F8; - mov.f32 %f882, 0fBA1268FB; - fma.rn.f32 %f883, %f882, %f880, %f881; - mov.f32 %f884, 0fBCDABFD4; - fma.rn.f32 %f885, %f883, %f880, %f884; - mov.f32 %f886, 0f3DE70331; - fma.rn.f32 %f887, %f885, %f880, %f886; - mov.f32 %f888, 0fBEC09330; - fma.rn.f32 %f889, %f887, %f880, %f888; - mov.f32 %f890, 0f3F906EBA; - fma.rn.f32 %f891, %f889, %f880, %f890; - mul.f32 %f2198, %f164, %f891; - bra.uni BB8_73; - -BB8_71: - mov.f32 %f862, 0f3A03BB71; - mov.f32 %f863, 0fB7B730FB; - fma.rn.f32 %f864, %f863, %f165, %f862; - mov.f32 %f865, 0fBBACA3B3; - fma.rn.f32 %f866, %f864, %f165, %f865; - mov.f32 %f867, 0f3D0A7445; - fma.rn.f32 %f868, %f866, %f165, %f867; - mov.f32 %f869, 0fBE1B3B75; - fma.rn.f32 %f870, %f868, %f165, %f869; - mov.f32 %f871, 0fBF6B385A; - fma.rn.f32 %f872, %f870, %f165, %f871; - mov.f32 %f873, 0fBFD0316E; - fma.rn.f32 %f874, %f872, %f165, %f873; - mov.f32 %f875, 0fBA031CCE; - fma.rn.f32 %f876, %f874, %f165, %f875; - ex2.approx.ftz.f32 %f877, %f876; - mov.f32 %f878, 0f3F800000; - sub.f32 %f879, %f878, %f877; - mov.b32 %r188, %f879; - setp.ltu.f32 %p76, %f165, 0f407AD445; - selp.b32 %r189, %r188, 1065353216, %p76; - mov.b32 %r190, %f164; - and.b32 %r191, %r190, -2147483648; - or.b32 %r192, %r189, %r191; - mov.b32 %f2198, %r192; - -BB8_73: - mul.f32 %f169, %f163, %f158; - abs.f32 %f170, %f169; - setp.ltu.f32 %p77, %f170, 0f3F800000; - @%p77 bra BB8_75; - bra.uni BB8_74; - -BB8_75: - mul.f32 %f910, %f169, %f169; - mov.f32 %f911, 0f3BA0C9F8; - mov.f32 %f912, 0fBA1268FB; - fma.rn.f32 %f913, %f912, %f910, %f911; - mov.f32 %f914, 0fBCDABFD4; - fma.rn.f32 %f915, %f913, %f910, %f914; - mov.f32 %f916, 0f3DE70331; - fma.rn.f32 %f917, %f915, %f910, %f916; - mov.f32 %f918, 0fBEC09330; - fma.rn.f32 %f919, %f917, %f910, %f918; - mov.f32 %f920, 0f3F906EBA; - fma.rn.f32 %f921, %f919, %f910, %f920; - mul.f32 %f2199, %f169, %f921; - bra.uni BB8_76; - -BB8_74: - mov.f32 %f892, 0f3A03BB71; - mov.f32 %f893, 0fB7B730FB; - fma.rn.f32 %f894, %f893, %f170, %f892; - mov.f32 %f895, 0fBBACA3B3; - fma.rn.f32 %f896, %f894, %f170, %f895; - mov.f32 %f897, 0f3D0A7445; - fma.rn.f32 %f898, %f896, %f170, %f897; - mov.f32 %f899, 0fBE1B3B75; - fma.rn.f32 %f900, %f898, %f170, %f899; - mov.f32 %f901, 0fBF6B385A; - fma.rn.f32 %f902, %f900, %f170, %f901; - mov.f32 %f903, 0fBFD0316E; - fma.rn.f32 %f904, %f902, %f170, %f903; - mov.f32 %f905, 0fBA031CCE; - fma.rn.f32 %f906, %f904, %f170, %f905; - ex2.approx.ftz.f32 %f907, %f906; - mov.f32 %f908, 0f3F800000; - sub.f32 %f909, %f908, %f907; - mov.b32 %r193, %f909; - setp.ltu.f32 %p78, %f170, 0f407AD445; - selp.b32 %r194, %r193, 1065353216, %p78; - mov.b32 %r195, %f169; - and.b32 %r196, %r195, -2147483648; - or.b32 %r197, %r194, %r196; - mov.b32 %f2199, %r197; - -BB8_76: - sub.f32 %f174, %f2198, %f2199; - sub.f32 %f175, %f159, %f2179; - add.f32 %f922, %f175, 0f3F800000; - mul.f32 %f176, %f922, %f158; - abs.f32 %f177, %f176; - setp.ltu.f32 %p79, %f177, 0f3F800000; - @%p79 bra BB8_78; - bra.uni BB8_77; - -BB8_78: - mul.f32 %f941, %f176, %f176; - mov.f32 %f942, 0f3BA0C9F8; - mov.f32 %f943, 0fBA1268FB; - fma.rn.f32 %f944, %f943, %f941, %f942; - mov.f32 %f945, 0fBCDABFD4; - fma.rn.f32 %f946, %f944, %f941, %f945; - mov.f32 %f947, 0f3DE70331; - fma.rn.f32 %f948, %f946, %f941, %f947; - mov.f32 %f949, 0fBEC09330; - fma.rn.f32 %f950, %f948, %f941, %f949; - mov.f32 %f951, 0f3F906EBA; - fma.rn.f32 %f952, %f950, %f941, %f951; - mul.f32 %f2200, %f176, %f952; - bra.uni BB8_79; - -BB8_77: - mov.f32 %f923, 0f3A03BB71; - mov.f32 %f924, 0fB7B730FB; - fma.rn.f32 %f925, %f924, %f177, %f923; - mov.f32 %f926, 0fBBACA3B3; - fma.rn.f32 %f927, %f925, %f177, %f926; - mov.f32 %f928, 0f3D0A7445; - fma.rn.f32 %f929, %f927, %f177, %f928; - mov.f32 %f930, 0fBE1B3B75; - fma.rn.f32 %f931, %f929, %f177, %f930; - mov.f32 %f932, 0fBF6B385A; - fma.rn.f32 %f933, %f931, %f177, %f932; - mov.f32 %f934, 0fBFD0316E; - fma.rn.f32 %f935, %f933, %f177, %f934; - mov.f32 %f936, 0fBA031CCE; - fma.rn.f32 %f937, %f935, %f177, %f936; - ex2.approx.ftz.f32 %f938, %f937; - mov.f32 %f939, 0f3F800000; - sub.f32 %f940, %f939, %f938; - mov.b32 %r198, %f940; - setp.ltu.f32 %p80, %f177, 0f407AD445; - selp.b32 %r199, %r198, 1065353216, %p80; - mov.b32 %r200, %f176; - and.b32 %r201, %r200, -2147483648; - or.b32 %r202, %r199, %r201; - mov.b32 %f2200, %r202; - -BB8_79: - mul.f32 %f181, %f175, %f158; - abs.f32 %f182, %f181; - setp.ltu.f32 %p81, %f182, 0f3F800000; - @%p81 bra BB8_81; - bra.uni BB8_80; - -BB8_81: - mul.f32 %f971, %f181, %f181; - mov.f32 %f972, 0f3BA0C9F8; - mov.f32 %f973, 0fBA1268FB; - fma.rn.f32 %f974, %f973, %f971, %f972; - mov.f32 %f975, 0fBCDABFD4; - fma.rn.f32 %f976, %f974, %f971, %f975; - mov.f32 %f977, 0f3DE70331; - fma.rn.f32 %f978, %f976, %f971, %f977; - mov.f32 %f979, 0fBEC09330; - fma.rn.f32 %f980, %f978, %f971, %f979; - mov.f32 %f981, 0f3F906EBA; - fma.rn.f32 %f982, %f980, %f971, %f981; - mul.f32 %f2201, %f181, %f982; - bra.uni BB8_82; - -BB8_80: - mov.f32 %f953, 0f3A03BB71; - mov.f32 %f954, 0fB7B730FB; - fma.rn.f32 %f955, %f954, %f182, %f953; - mov.f32 %f956, 0fBBACA3B3; - fma.rn.f32 %f957, %f955, %f182, %f956; - mov.f32 %f958, 0f3D0A7445; - fma.rn.f32 %f959, %f957, %f182, %f958; - mov.f32 %f960, 0fBE1B3B75; - fma.rn.f32 %f961, %f959, %f182, %f960; - mov.f32 %f962, 0fBF6B385A; - fma.rn.f32 %f963, %f961, %f182, %f962; - mov.f32 %f964, 0fBFD0316E; - fma.rn.f32 %f965, %f963, %f182, %f964; - mov.f32 %f966, 0fBA031CCE; - fma.rn.f32 %f967, %f965, %f182, %f966; - ex2.approx.ftz.f32 %f968, %f967; - mov.f32 %f969, 0f3F800000; - sub.f32 %f970, %f969, %f968; - mov.b32 %r203, %f970; - setp.ltu.f32 %p82, %f182, 0f407AD445; - selp.b32 %r204, %r203, 1065353216, %p82; - mov.b32 %r205, %f181; - and.b32 %r206, %r205, -2147483648; - or.b32 %r207, %r204, %r206; - mov.b32 %f2201, %r207; - -BB8_82: - sub.f32 %f983, %f2200, %f2201; - mul.f32 %f984, %f983, 0f3F000000; - mul.f32 %f985, %f174, 0f3F000000; - mul.f32 %f986, %f985, %f473; - fma.rn.f32 %f2202, %f986, %f984, %f2202; - add.s32 %r409, %r409, 1; - setp.lt.s32 %p83, %r409, %r29; - @%p83 bra BB8_70; - -BB8_83: - mad.lo.s32 %r208, %r408, %r102, %r407; - add.s32 %r209, %r208, %r1; - mul.wide.s32 %rd69, %r209, 4; - add.s64 %rd70, %rd1, %rd69; - ld.global.f32 %f190, [%rd70]; - mov.f32 %f2206, 0f00000000; - setp.leu.f32 %p84, %f2202, 0f3C23D70A; - mov.f32 %f2205, %f2206; - @%p84 bra BB8_85; - - div.rn.f32 %f988, %f190, %f2202; - add.f32 %f2205, %f988, 0fBF800000; - -BB8_85: - @%p84 bra BB8_87; - - mul.f32 %f990, %f2202, %f2202; - div.rn.f32 %f2206, %f190, %f990; - -BB8_87: - mov.f32 %f991, 0f47C35000; - min.f32 %f992, %f2206, %f991; - sub.f32 %f2189, %f2189, %f992; - min.f32 %f993, %f2205, %f991; - add.f32 %f2190, %f2190, %f993; - add.s32 %r408, %r408, 1; - setp.lt.s32 %p86, %r408, %r102; - @%p86 bra BB8_68; - - add.s32 %r407, %r407, 1; - setp.lt.s32 %p87, %r407, %r102; - @%p87 bra BB8_67; - -BB8_89: - cvt.rn.f32.s32 %f2048, %r29; - div.rn.f32 %f994, %f2190, %f2189; - mov.f32 %f995, 0fBF800000; - max.f32 %f996, %f994, %f995; - mov.f32 %f997, 0f3F800000; - min.f32 %f998, %f996, %f997; - div.rn.f32 %f999, %f998, %f2048; - fma.rn.f32 %f1000, %f999, 0fBF000000, %f361; - mov.f32 %f1001, 0f3A83126F; - max.f32 %f361, %f1000, %f1001; mov.u32 %r410, 0; - setp.lt.s32 %p88, %r29, 1; - @%p88 bra BB8_156; - -BB8_90: - mov.f32 %f2213, 0f00000000; - mov.f32 %f2214, %f2213; - mov.f32 %f2215, %f2213; - mov.f32 %f2216, %f2213; - @%p8 bra BB8_155; - - mov.u32 %r411, 0; - mov.f32 %f2213, 0f00000000; - mov.f32 %f2214, %f2213; - mov.f32 %f2215, %f2213; - mov.f32 %f2216, %f2213; - -BB8_92: - mul.wide.s32 %rd161, %r410, 4; - mov.u32 %r412, 0; - mov.f32 %f2049, 0f3F800000; - add.s64 %rd160, %rd6, %rd161; - add.s64 %rd159, %rd7, %rd161; - ld.local.f32 %f2179, [%rd159]; - cvt.rn.f32.s32 %f209, %r411; - ld.local.f32 %f2178, [%rd160]; - sub.f32 %f211, %f209, %f2178; - add.f32 %f1010, %f211, 0f3F800000; - sqrt.rn.f32 %f212, %f40; - mul.f32 %f213, %f1010, %f212; - abs.f32 %f214, %f213; - mul.f32 %f215, %f213, %f213; - mul.f32 %f216, %f211, %f212; - abs.f32 %f217, %f216; - add.f32 %f1011, %f209, 0f3F800000; - sub.f32 %f219, %f1011, %f2178; - div.rn.f32 %f220, %f219, %f472; - cvt.rzi.f32.f32 %f1013, %f2049; - add.f32 %f1014, %f1013, %f1013; - mov.f32 %f1015, 0f40000000; - sub.f32 %f1016, %f1015, %f1014; - abs.f32 %f221, %f1016; - setp.eq.f32 %p90, %f221, 0f3F800000; - abs.f32 %f222, %f220; - setp.lt.f32 %p91, %f222, 0f00800000; - mul.f32 %f1017, %f222, 0f4B800000; - selp.f32 %f1018, 0fC3170000, 0fC2FE0000, %p91; - selp.f32 %f1019, %f1017, %f222, %p91; - mov.b32 %r213, %f1019; - and.b32 %r214, %r213, 8388607; - or.b32 %r215, %r214, 1065353216; - mov.b32 %f1020, %r215; - shr.u32 %r216, %r213, 23; - cvt.rn.f32.u32 %f1021, %r216; - add.f32 %f1022, %f1018, %f1021; - setp.gt.f32 %p92, %f1020, 0f3FB504F3; - mul.f32 %f1023, %f1020, 0f3F000000; - add.f32 %f1024, %f1022, 0f3F800000; - selp.f32 %f1025, %f1023, %f1020, %p92; - selp.f32 %f1026, %f1024, %f1022, %p92; - add.f32 %f223, %f1025, 0fBF800000; - add.f32 %f224, %f1025, 0f3F800000; - add.f32 %f225, %f223, %f223; - mov.f32 %f1027, 0f3F317200; - mul.rn.f32 %f226, %f1026, %f1027; - mov.f32 %f1028, 0f35BFBE8E; - mul.rn.f32 %f227, %f1026, %f1028; - setp.lt.f32 %p93, %f220, 0f00000000; - and.pred %p1, %p93, %p90; - add.f32 %f1029, %f220, %f220; - selp.f32 %f228, %f1029, 0f00000000, %p90; - div.rn.f32 %f231, %f211, %f472; - abs.f32 %f232, %f231; - setp.lt.f32 %p94, %f232, 0f00800000; - mul.f32 %f1031, %f232, 0f4B800000; - selp.f32 %f1032, 0fC3170000, 0fC2FE0000, %p94; - selp.f32 %f1033, %f1031, %f232, %p94; - mov.b32 %r217, %f1033; - and.b32 %r218, %r217, 8388607; - or.b32 %r219, %r218, 1065353216; - mov.b32 %f1034, %r219; - shr.u32 %r220, %r217, 23; - cvt.rn.f32.u32 %f1035, %r220; - add.f32 %f1036, %f1032, %f1035; - setp.gt.f32 %p95, %f1034, 0f3FB504F3; - mul.f32 %f1037, %f1034, 0f3F000000; - add.f32 %f1038, %f1036, 0f3F800000; - selp.f32 %f1039, %f1037, %f1034, %p95; - selp.f32 %f1040, %f1038, %f1036, %p95; - add.f32 %f233, %f1039, 0fBF800000; - add.f32 %f234, %f1039, 0f3F800000; - add.f32 %f235, %f233, %f233; - mul.rn.f32 %f236, %f1040, %f1027; - mul.rn.f32 %f237, %f1040, %f1028; - setp.lt.f32 %p96, %f231, 0f00000000; - and.pred %p2, %p96, %p90; - add.f32 %f1041, %f231, %f231; - selp.f32 %f238, %f1041, 0f00000000, %p90; - mov.b32 %r222, %f213; - and.b32 %r53, %r222, -2147483648; - -BB8_93: - mov.u32 %r413, 0; - mad.lo.s32 %r224, %r412, %r102, %r411; - add.s32 %r225, %r224, %r1; - mul.wide.s32 %rd72, %r225, 4; - add.s64 %rd73, %rd1, %rd72; - ld.global.f32 %f245, [%rd73]; - cvt.rn.f32.s32 %f246, %r412; - mov.f32 %f2217, %f361; - -BB8_94: - cvt.rn.f32.s32 %f2050, %r411; - mul.wide.s32 %rd74, %r413, 4; - add.s64 %rd75, %rd6, %rd74; - add.s64 %rd76, %rd7, %rd74; - ld.local.f32 %f248, [%rd76]; - ld.local.f32 %f1043, [%rd75]; - sub.f32 %f249, %f2050, %f1043; - add.f32 %f1044, %f249, 0f3F800000; - mul.f32 %f250, %f1044, %f212; - abs.f32 %f251, %f250; - setp.ltu.f32 %p97, %f251, 0f3F800000; - @%p97 bra BB8_96; - bra.uni BB8_95; - -BB8_96: - mul.f32 %f1063, %f250, %f250; - mov.f32 %f1064, 0f3BA0C9F8; - mov.f32 %f1065, 0fBA1268FB; - fma.rn.f32 %f1066, %f1065, %f1063, %f1064; - mov.f32 %f1067, 0fBCDABFD4; - fma.rn.f32 %f1068, %f1066, %f1063, %f1067; - mov.f32 %f1069, 0f3DE70331; - fma.rn.f32 %f1070, %f1068, %f1063, %f1069; - mov.f32 %f1071, 0fBEC09330; - fma.rn.f32 %f1072, %f1070, %f1063, %f1071; - mov.f32 %f1073, 0f3F906EBA; - fma.rn.f32 %f1074, %f1072, %f1063, %f1073; - mul.f32 %f2218, %f250, %f1074; - bra.uni BB8_97; - -BB8_95: - mov.f32 %f2051, 0f3F800000; - mov.f32 %f1045, 0f3A03BB71; - mov.f32 %f1046, 0fB7B730FB; - fma.rn.f32 %f1047, %f1046, %f251, %f1045; - mov.f32 %f1048, 0fBBACA3B3; - fma.rn.f32 %f1049, %f1047, %f251, %f1048; - mov.f32 %f1050, 0f3D0A7445; - fma.rn.f32 %f1051, %f1049, %f251, %f1050; - mov.f32 %f1052, 0fBE1B3B75; - fma.rn.f32 %f1053, %f1051, %f251, %f1052; - mov.f32 %f1054, 0fBF6B385A; - fma.rn.f32 %f1055, %f1053, %f251, %f1054; - mov.f32 %f1056, 0fBFD0316E; - fma.rn.f32 %f1057, %f1055, %f251, %f1056; - mov.f32 %f1058, 0fBA031CCE; - fma.rn.f32 %f1059, %f1057, %f251, %f1058; - ex2.approx.ftz.f32 %f1060, %f1059; - sub.f32 %f1062, %f2051, %f1060; - mov.b32 %r226, %f1062; - setp.ltu.f32 %p98, %f251, 0f407AD445; - selp.b32 %r227, %r226, 1065353216, %p98; - mov.b32 %r228, %f250; - and.b32 %r229, %r228, -2147483648; - or.b32 %r230, %r227, %r229; - mov.b32 %f2218, %r230; - -BB8_97: - mul.f32 %f255, %f249, %f212; - abs.f32 %f256, %f255; - setp.ltu.f32 %p99, %f256, 0f3F800000; - @%p99 bra BB8_99; - bra.uni BB8_98; - -BB8_99: - mul.f32 %f1093, %f255, %f255; - mov.f32 %f1094, 0f3BA0C9F8; - mov.f32 %f1095, 0fBA1268FB; - fma.rn.f32 %f1096, %f1095, %f1093, %f1094; - mov.f32 %f1097, 0fBCDABFD4; - fma.rn.f32 %f1098, %f1096, %f1093, %f1097; - mov.f32 %f1099, 0f3DE70331; - fma.rn.f32 %f1100, %f1098, %f1093, %f1099; - mov.f32 %f1101, 0fBEC09330; - fma.rn.f32 %f1102, %f1100, %f1093, %f1101; - mov.f32 %f1103, 0f3F906EBA; - fma.rn.f32 %f1104, %f1102, %f1093, %f1103; - mul.f32 %f2219, %f255, %f1104; - bra.uni BB8_100; - -BB8_98: - mov.f32 %f2052, 0f3F800000; - mov.f32 %f1075, 0f3A03BB71; - mov.f32 %f1076, 0fB7B730FB; - fma.rn.f32 %f1077, %f1076, %f256, %f1075; - mov.f32 %f1078, 0fBBACA3B3; - fma.rn.f32 %f1079, %f1077, %f256, %f1078; - mov.f32 %f1080, 0f3D0A7445; - fma.rn.f32 %f1081, %f1079, %f256, %f1080; - mov.f32 %f1082, 0fBE1B3B75; - fma.rn.f32 %f1083, %f1081, %f256, %f1082; - mov.f32 %f1084, 0fBF6B385A; - fma.rn.f32 %f1085, %f1083, %f256, %f1084; - mov.f32 %f1086, 0fBFD0316E; - fma.rn.f32 %f1087, %f1085, %f256, %f1086; - mov.f32 %f1088, 0fBA031CCE; - fma.rn.f32 %f1089, %f1087, %f256, %f1088; - ex2.approx.ftz.f32 %f1090, %f1089; - sub.f32 %f1092, %f2052, %f1090; - mov.b32 %r231, %f1092; - setp.ltu.f32 %p100, %f256, 0f407AD445; - selp.b32 %r232, %r231, 1065353216, %p100; - mov.b32 %r233, %f255; - and.b32 %r234, %r233, -2147483648; - or.b32 %r235, %r232, %r234; - mov.b32 %f2219, %r235; - -BB8_100: - sub.f32 %f260, %f2218, %f2219; - sub.f32 %f261, %f246, %f248; - add.f32 %f1105, %f261, 0f3F800000; - mul.f32 %f262, %f1105, %f212; - abs.f32 %f263, %f262; - setp.ltu.f32 %p101, %f263, 0f3F800000; - @%p101 bra BB8_102; - bra.uni BB8_101; - -BB8_102: - mul.f32 %f1124, %f262, %f262; - mov.f32 %f1125, 0f3BA0C9F8; - mov.f32 %f1126, 0fBA1268FB; - fma.rn.f32 %f1127, %f1126, %f1124, %f1125; - mov.f32 %f1128, 0fBCDABFD4; - fma.rn.f32 %f1129, %f1127, %f1124, %f1128; - mov.f32 %f1130, 0f3DE70331; - fma.rn.f32 %f1131, %f1129, %f1124, %f1130; - mov.f32 %f1132, 0fBEC09330; - fma.rn.f32 %f1133, %f1131, %f1124, %f1132; - mov.f32 %f1134, 0f3F906EBA; - fma.rn.f32 %f1135, %f1133, %f1124, %f1134; - mul.f32 %f2220, %f262, %f1135; - bra.uni BB8_103; - -BB8_101: - mov.f32 %f2053, 0f3F800000; - mov.f32 %f1106, 0f3A03BB71; - mov.f32 %f1107, 0fB7B730FB; - fma.rn.f32 %f1108, %f1107, %f263, %f1106; - mov.f32 %f1109, 0fBBACA3B3; - fma.rn.f32 %f1110, %f1108, %f263, %f1109; - mov.f32 %f1111, 0f3D0A7445; - fma.rn.f32 %f1112, %f1110, %f263, %f1111; - mov.f32 %f1113, 0fBE1B3B75; - fma.rn.f32 %f1114, %f1112, %f263, %f1113; - mov.f32 %f1115, 0fBF6B385A; - fma.rn.f32 %f1116, %f1114, %f263, %f1115; - mov.f32 %f1117, 0fBFD0316E; - fma.rn.f32 %f1118, %f1116, %f263, %f1117; - mov.f32 %f1119, 0fBA031CCE; - fma.rn.f32 %f1120, %f1118, %f263, %f1119; - ex2.approx.ftz.f32 %f1121, %f1120; - sub.f32 %f1123, %f2053, %f1121; - mov.b32 %r236, %f1123; - setp.ltu.f32 %p102, %f263, 0f407AD445; - selp.b32 %r237, %r236, 1065353216, %p102; - mov.b32 %r238, %f262; - and.b32 %r239, %r238, -2147483648; - or.b32 %r240, %r237, %r239; - mov.b32 %f2220, %r240; - -BB8_103: - mul.f32 %f267, %f261, %f212; - abs.f32 %f268, %f267; - setp.ltu.f32 %p103, %f268, 0f3F800000; - @%p103 bra BB8_105; - bra.uni BB8_104; - -BB8_105: - mul.f32 %f1154, %f267, %f267; - mov.f32 %f1155, 0f3BA0C9F8; - mov.f32 %f1156, 0fBA1268FB; - fma.rn.f32 %f1157, %f1156, %f1154, %f1155; - mov.f32 %f1158, 0fBCDABFD4; - fma.rn.f32 %f1159, %f1157, %f1154, %f1158; - mov.f32 %f1160, 0f3DE70331; - fma.rn.f32 %f1161, %f1159, %f1154, %f1160; - mov.f32 %f1162, 0fBEC09330; - fma.rn.f32 %f1163, %f1161, %f1154, %f1162; - mov.f32 %f1164, 0f3F906EBA; - fma.rn.f32 %f1165, %f1163, %f1154, %f1164; - mul.f32 %f2221, %f267, %f1165; - bra.uni BB8_106; - -BB8_104: - mov.f32 %f2054, 0f3F800000; - mov.f32 %f1136, 0f3A03BB71; - mov.f32 %f1137, 0fB7B730FB; - fma.rn.f32 %f1138, %f1137, %f268, %f1136; - mov.f32 %f1139, 0fBBACA3B3; - fma.rn.f32 %f1140, %f1138, %f268, %f1139; - mov.f32 %f1141, 0f3D0A7445; - fma.rn.f32 %f1142, %f1140, %f268, %f1141; - mov.f32 %f1143, 0fBE1B3B75; - fma.rn.f32 %f1144, %f1142, %f268, %f1143; - mov.f32 %f1145, 0fBF6B385A; - fma.rn.f32 %f1146, %f1144, %f268, %f1145; - mov.f32 %f1147, 0fBFD0316E; - fma.rn.f32 %f1148, %f1146, %f268, %f1147; - mov.f32 %f1149, 0fBA031CCE; - fma.rn.f32 %f1150, %f1148, %f268, %f1149; - ex2.approx.ftz.f32 %f1151, %f1150; - sub.f32 %f1153, %f2054, %f1151; - mov.b32 %r241, %f1153; - setp.ltu.f32 %p104, %f268, 0f407AD445; - selp.b32 %r242, %r241, 1065353216, %p104; - mov.b32 %r243, %f267; - and.b32 %r244, %r243, -2147483648; - or.b32 %r245, %r242, %r244; - mov.b32 %f2221, %r245; - -BB8_106: - sub.f32 %f1166, %f2220, %f2221; - mul.f32 %f1167, %f1166, 0f3F000000; - mul.f32 %f1168, %f260, 0f3F000000; - mul.f32 %f1169, %f1168, %f473; - fma.rn.f32 %f2217, %f1169, %f1167, %f2217; - add.s32 %r413, %r413, 1; - setp.lt.s32 %p105, %r413, %r29; - @%p105 bra BB8_94; - - mov.f32 %f2223, 0f00000000; - setp.leu.f32 %p106, %f2217, 0f3C23D70A; - mov.f32 %f2222, %f2223; - @%p106 bra BB8_109; - - div.rn.f32 %f1171, %f245, %f2217; - add.f32 %f2222, %f1171, 0fBF800000; - -BB8_109: - @%p106 bra BB8_111; - - mul.f32 %f1173, %f2217, %f2217; - div.rn.f32 %f2223, %f245, %f1173; - -BB8_111: - mov.f32 %f1174, 0f47C35000; - min.f32 %f277, %f2222, %f1174; - min.f32 %f278, %f2223, %f1174; - setp.ltu.f32 %p108, %f214, 0f3F800000; - @%p108 bra BB8_113; - bra.uni BB8_112; - -BB8_113: - cvt.rn.f32.s32 %f2105, %r411; - sub.f32 %f2104, %f2105, %f2178; - add.f32 %f2103, %f2104, 0f3F800000; - mul.f32 %f2102, %f2103, %f212; - mov.f32 %f1193, 0f3BA0C9F8; - mov.f32 %f1194, 0fBA1268FB; - fma.rn.f32 %f1195, %f1194, %f215, %f1193; - mov.f32 %f1196, 0fBCDABFD4; - fma.rn.f32 %f1197, %f1195, %f215, %f1196; - mov.f32 %f1198, 0f3DE70331; - fma.rn.f32 %f1199, %f1197, %f215, %f1198; - mov.f32 %f1200, 0fBEC09330; - fma.rn.f32 %f1201, %f1199, %f215, %f1200; - mov.f32 %f1202, 0f3F906EBA; - fma.rn.f32 %f1203, %f1201, %f215, %f1202; - mul.f32 %f2224, %f2102, %f1203; - bra.uni BB8_114; - -BB8_112: - mov.f32 %f2055, 0f3F800000; - setp.ltu.f32 %p109, %f214, 0f407AD445; - mov.f32 %f1175, 0f3A03BB71; - mov.f32 %f1176, 0fB7B730FB; - fma.rn.f32 %f1177, %f1176, %f214, %f1175; - mov.f32 %f1178, 0fBBACA3B3; - fma.rn.f32 %f1179, %f1177, %f214, %f1178; - mov.f32 %f1180, 0f3D0A7445; - fma.rn.f32 %f1181, %f1179, %f214, %f1180; - mov.f32 %f1182, 0fBE1B3B75; - fma.rn.f32 %f1183, %f1181, %f214, %f1182; - mov.f32 %f1184, 0fBF6B385A; - fma.rn.f32 %f1185, %f1183, %f214, %f1184; - mov.f32 %f1186, 0fBFD0316E; - fma.rn.f32 %f1187, %f1185, %f214, %f1186; - mov.f32 %f1188, 0fBA031CCE; - fma.rn.f32 %f1189, %f1187, %f214, %f1188; - ex2.approx.ftz.f32 %f1190, %f1189; - sub.f32 %f1192, %f2055, %f1190; - mov.b32 %r246, %f1192; - selp.b32 %r247, %r246, 1065353216, %p109; - or.b32 %r248, %r247, %r53; - mov.b32 %f2224, %r248; - -BB8_114: - setp.ltu.f32 %p110, %f217, 0f3F800000; - @%p110 bra BB8_116; - bra.uni BB8_115; - -BB8_116: - cvt.rn.f32.s32 %f2100, %r411; - sub.f32 %f2099, %f2100, %f2178; - mul.f32 %f2098, %f2099, %f212; - mul.f32 %f2097, %f2098, %f2098; - mov.f32 %f1222, 0f3BA0C9F8; - mov.f32 %f1223, 0fBA1268FB; - fma.rn.f32 %f1224, %f1223, %f2097, %f1222; - mov.f32 %f1225, 0fBCDABFD4; - fma.rn.f32 %f1226, %f1224, %f2097, %f1225; - mov.f32 %f1227, 0f3DE70331; - fma.rn.f32 %f1228, %f1226, %f2097, %f1227; - mov.f32 %f1229, 0fBEC09330; - fma.rn.f32 %f1230, %f1228, %f2097, %f1229; - mov.f32 %f1231, 0f3F906EBA; - fma.rn.f32 %f1232, %f1230, %f2097, %f1231; - mul.f32 %f2225, %f2098, %f1232; - bra.uni BB8_117; - -BB8_115: - cvt.rn.f32.s32 %f2059, %r411; - sub.f32 %f2058, %f2059, %f2178; - mul.f32 %f2057, %f2058, %f212; - mov.b32 %r384, %f2057; - and.b32 %r383, %r384, -2147483648; - mov.f32 %f2056, 0f3F800000; - setp.ltu.f32 %p111, %f217, 0f407AD445; - mov.f32 %f1204, 0f3A03BB71; - mov.f32 %f1205, 0fB7B730FB; - fma.rn.f32 %f1206, %f1205, %f217, %f1204; - mov.f32 %f1207, 0fBBACA3B3; - fma.rn.f32 %f1208, %f1206, %f217, %f1207; - mov.f32 %f1209, 0f3D0A7445; - fma.rn.f32 %f1210, %f1208, %f217, %f1209; - mov.f32 %f1211, 0fBE1B3B75; - fma.rn.f32 %f1212, %f1210, %f217, %f1211; - mov.f32 %f1213, 0fBF6B385A; - fma.rn.f32 %f1214, %f1212, %f217, %f1213; - mov.f32 %f1215, 0fBFD0316E; - fma.rn.f32 %f1216, %f1214, %f217, %f1215; - mov.f32 %f1217, 0fBA031CCE; - fma.rn.f32 %f1218, %f1216, %f217, %f1217; - ex2.approx.ftz.f32 %f1219, %f1218; - sub.f32 %f1221, %f2056, %f1219; - mov.b32 %r249, %f1221; - selp.b32 %r250, %r249, 1065353216, %p111; - or.b32 %r251, %r250, %r383; - mov.b32 %f2225, %r251; - -BB8_117: - cvt.rn.f32.s32 %f2060, %r412; - sub.f32 %f285, %f2224, %f2225; - sub.f32 %f287, %f2060, %f2179; - add.f32 %f1233, %f287, 0f3F800000; - mul.f32 %f288, %f1233, %f212; - abs.f32 %f289, %f288; - setp.ltu.f32 %p112, %f289, 0f3F800000; - @%p112 bra BB8_119; - bra.uni BB8_118; - -BB8_119: - mul.f32 %f1252, %f288, %f288; - mov.f32 %f1253, 0f3BA0C9F8; - mov.f32 %f1254, 0fBA1268FB; - fma.rn.f32 %f1255, %f1254, %f1252, %f1253; - mov.f32 %f1256, 0fBCDABFD4; - fma.rn.f32 %f1257, %f1255, %f1252, %f1256; - mov.f32 %f1258, 0f3DE70331; - fma.rn.f32 %f1259, %f1257, %f1252, %f1258; - mov.f32 %f1260, 0fBEC09330; - fma.rn.f32 %f1261, %f1259, %f1252, %f1260; - mov.f32 %f1262, 0f3F906EBA; - fma.rn.f32 %f1263, %f1261, %f1252, %f1262; - mul.f32 %f2226, %f288, %f1263; - bra.uni BB8_120; - -BB8_118: - mov.f32 %f2061, 0f3F800000; - mov.f32 %f1234, 0f3A03BB71; - mov.f32 %f1235, 0fB7B730FB; - fma.rn.f32 %f1236, %f1235, %f289, %f1234; - mov.f32 %f1237, 0fBBACA3B3; - fma.rn.f32 %f1238, %f1236, %f289, %f1237; - mov.f32 %f1239, 0f3D0A7445; - fma.rn.f32 %f1240, %f1238, %f289, %f1239; - mov.f32 %f1241, 0fBE1B3B75; - fma.rn.f32 %f1242, %f1240, %f289, %f1241; - mov.f32 %f1243, 0fBF6B385A; - fma.rn.f32 %f1244, %f1242, %f289, %f1243; - mov.f32 %f1245, 0fBFD0316E; - fma.rn.f32 %f1246, %f1244, %f289, %f1245; - mov.f32 %f1247, 0fBA031CCE; - fma.rn.f32 %f1248, %f1246, %f289, %f1247; - ex2.approx.ftz.f32 %f1249, %f1248; - sub.f32 %f1251, %f2061, %f1249; - mov.b32 %r252, %f1251; - setp.ltu.f32 %p113, %f289, 0f407AD445; - selp.b32 %r253, %r252, 1065353216, %p113; - mov.b32 %r254, %f288; - and.b32 %r255, %r254, -2147483648; - or.b32 %r256, %r253, %r255; - mov.b32 %f2226, %r256; - -BB8_120: - sub.f32 %f2062, %f2060, %f2179; - mul.f32 %f293, %f2062, %f212; - abs.f32 %f294, %f293; - setp.ltu.f32 %p114, %f294, 0f3F800000; - @%p114 bra BB8_122; - bra.uni BB8_121; - -BB8_122: - mul.f32 %f1282, %f293, %f293; - mov.f32 %f1283, 0f3BA0C9F8; - mov.f32 %f1284, 0fBA1268FB; - fma.rn.f32 %f1285, %f1284, %f1282, %f1283; - mov.f32 %f1286, 0fBCDABFD4; - fma.rn.f32 %f1287, %f1285, %f1282, %f1286; - mov.f32 %f1288, 0f3DE70331; - fma.rn.f32 %f1289, %f1287, %f1282, %f1288; - mov.f32 %f1290, 0fBEC09330; - fma.rn.f32 %f1291, %f1289, %f1282, %f1290; - mov.f32 %f1292, 0f3F906EBA; - fma.rn.f32 %f1293, %f1291, %f1282, %f1292; - mul.f32 %f2227, %f293, %f1293; - bra.uni BB8_123; - -BB8_121: - mov.f32 %f2064, 0f3F800000; - mov.f32 %f1264, 0f3A03BB71; - mov.f32 %f1265, 0fB7B730FB; - fma.rn.f32 %f1266, %f1265, %f294, %f1264; - mov.f32 %f1267, 0fBBACA3B3; - fma.rn.f32 %f1268, %f1266, %f294, %f1267; - mov.f32 %f1269, 0f3D0A7445; - fma.rn.f32 %f1270, %f1268, %f294, %f1269; - mov.f32 %f1271, 0fBE1B3B75; - fma.rn.f32 %f1272, %f1270, %f294, %f1271; - mov.f32 %f1273, 0fBF6B385A; - fma.rn.f32 %f1274, %f1272, %f294, %f1273; - mov.f32 %f1275, 0fBFD0316E; - fma.rn.f32 %f1276, %f1274, %f294, %f1275; - mov.f32 %f1277, 0fBA031CCE; - fma.rn.f32 %f1278, %f1276, %f294, %f1277; - ex2.approx.ftz.f32 %f1279, %f1278; - sub.f32 %f1281, %f2064, %f1279; - mov.b32 %r257, %f1281; - setp.ltu.f32 %p115, %f294, 0f407AD445; - selp.b32 %r258, %r257, 1065353216, %p115; - mov.b32 %r259, %f293; - and.b32 %r260, %r259, -2147483648; - or.b32 %r261, %r258, %r260; - mov.b32 %f2227, %r261; - -BB8_123: - mov.f32 %f2065, 0f40000000; - mul.f32 %f298, %f285, 0f3F000000; - sub.f32 %f1296, %f2226, %f2227; - mul.f32 %f299, %f1296, 0f3F000000; - // inline asm - rcp.approx.ftz.f32 %f1294,%f224; - // inline asm - mul.f32 %f1297, %f1294, %f225; - mul.f32 %f1298, %f1297, %f1297; - mov.f32 %f1299, 0f3C4CAF63; - mov.f32 %f1300, 0f3B18F0FE; - fma.rn.f32 %f1301, %f1300, %f1298, %f1299; - mov.f32 %f1302, 0f3DAAAABD; - fma.rn.f32 %f1303, %f1301, %f1298, %f1302; - mul.rn.f32 %f1304, %f1303, %f1298; - mul.rn.f32 %f1305, %f1304, %f1297; - sub.f32 %f1306, %f223, %f1297; - neg.f32 %f1307, %f1297; - add.f32 %f1308, %f1306, %f1306; - fma.rn.f32 %f1309, %f1307, %f223, %f1308; - mul.rn.f32 %f1310, %f1294, %f1309; - add.f32 %f1311, %f1305, %f1297; - sub.f32 %f1312, %f1297, %f1311; - add.f32 %f1313, %f1305, %f1312; - add.f32 %f1314, %f1310, %f1313; - add.f32 %f1315, %f1311, %f1314; - sub.f32 %f1316, %f1311, %f1315; - add.f32 %f1317, %f1314, %f1316; - add.f32 %f1318, %f226, %f1315; - sub.f32 %f1319, %f226, %f1318; - add.f32 %f1320, %f1315, %f1319; - add.f32 %f1321, %f1317, %f1320; - add.f32 %f1322, %f227, %f1321; - add.f32 %f1323, %f1318, %f1322; - sub.f32 %f1324, %f1318, %f1323; - add.f32 %f1325, %f1322, %f1324; - mul.rn.f32 %f1327, %f2065, %f1323; - neg.f32 %f1328, %f1327; - fma.rn.f32 %f1329, %f2065, %f1323, %f1328; - fma.rn.f32 %f1330, %f2065, %f1325, %f1329; - mov.f32 %f1331, 0f00000000; - fma.rn.f32 %f1332, %f1331, %f1323, %f1330; - add.rn.f32 %f1333, %f1327, %f1332; - neg.f32 %f1334, %f1333; - add.rn.f32 %f1335, %f1327, %f1334; - add.rn.f32 %f1336, %f1335, %f1332; - mov.b32 %r262, %f1333; - setp.eq.s32 %p116, %r262, 1118925336; - add.s32 %r263, %r262, -1; - mov.b32 %f1337, %r263; - add.f32 %f1338, %f1336, 0f37000000; - selp.f32 %f1339, %f1337, %f1333, %p116; - selp.f32 %f300, %f1338, %f1336, %p116; - mul.f32 %f1340, %f1339, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1341, %f1340; - mov.f32 %f1342, 0fBF317200; - fma.rn.f32 %f1343, %f1341, %f1342, %f1339; - mov.f32 %f1344, 0fB5BFBE8E; - fma.rn.f32 %f1345, %f1341, %f1344, %f1343; - mul.f32 %f1346, %f1345, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1347, %f1346; - add.f32 %f1348, %f1341, 0f00000000; - ex2.approx.f32 %f1349, %f1348; - mul.f32 %f1350, %f1347, %f1349; - setp.lt.f32 %p117, %f1339, 0fC2D20000; - selp.f32 %f1351, 0f00000000, %f1350, %p117; - setp.gt.f32 %p118, %f1339, 0f42D20000; - selp.f32 %f2228, 0f7F800000, %f1351, %p118; - setp.eq.f32 %p119, %f2228, 0f7F800000; - @%p119 bra BB8_125; - - fma.rn.f32 %f2228, %f2228, %f300, %f2228; - -BB8_125: - setp.geu.f32 %p256, %f220, 0f00000000; - mov.b32 %r264, %f2228; - xor.b32 %r265, %r264, -2147483648; - mov.b32 %f1352, %r265; - selp.f32 %f304, %f1352, %f2228, %p1; - setp.eq.f32 %p120, %f220, 0f00000000; - selp.f32 %f2229, %f228, %f304, %p120; - @%p256 bra BB8_127; - - mov.f32 %f2066, 0f40000000; - cvt.rzi.f32.f32 %f1354, %f2066; - setp.neu.f32 %p121, %f1354, 0f40000000; - selp.f32 %f2229, 0f7FFFFFFF, %f304, %p121; - -BB8_127: - abs.f32 %f2077, %f220; - mov.f32 %f2076, 0f00000000; - mov.f32 %f2075, 0f3DAAAABD; - mov.f32 %f2074, 0f3C4CAF63; - mov.f32 %f2073, 0f3B18F0FE; - mov.f32 %f2072, 0fB5BFBE8E; - mov.f32 %f2071, 0fBF317200; - add.f32 %f2070, %f2077, 0f40000000; - mov.b32 %r385, %f2070; - selp.f32 %f2069, 0fFF800000, 0f7F800000, %p1; - add.f32 %f2068, %f220, 0f40000000; - mov.f32 %f2067, 0f40000000; - setp.gtu.f32 %p122, %f2077, 0f7F800000; - selp.f32 %f1357, %f2068, %f2229, %p122; - setp.neu.f32 %p123, %f2077, 0f7F800000; - selp.f32 %f1358, %f1357, %f2069, %p123; - setp.gt.s32 %p124, %r385, 2139095039; - selp.f32 %f1359, %f1358, %f2229, %p124; - mul.f32 %f1360, %f1359, 0fBF000000; - setp.eq.f32 %p125, %f220, 0f3F800000; - selp.f32 %f1361, 0fBF000000, %f1360, %p125; - mul.f32 %f1362, %f1361, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1363, %f1362; - fma.rn.f32 %f1365, %f1363, %f2071, %f1361; - fma.rn.f32 %f1367, %f1363, %f2072, %f1365; - mul.f32 %f1368, %f1367, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1369, %f1368; - add.f32 %f1370, %f1363, 0f00000000; - ex2.approx.f32 %f1371, %f1370; - mul.f32 %f1372, %f1369, %f1371; - setp.lt.f32 %p126, %f1361, 0fC2D20000; - selp.f32 %f1373, 0f00000000, %f1372, %p126; - setp.gt.f32 %p127, %f1361, 0f42D20000; - selp.f32 %f308, 0f7F800000, %f1373, %p127; - // inline asm - rcp.approx.ftz.f32 %f1355,%f234; - // inline asm - mul.f32 %f1374, %f1355, %f235; - mul.f32 %f1375, %f1374, %f1374; - fma.rn.f32 %f1378, %f2073, %f1375, %f2074; - fma.rn.f32 %f1380, %f1378, %f1375, %f2075; - mul.rn.f32 %f1381, %f1380, %f1375; - mul.rn.f32 %f1382, %f1381, %f1374; - sub.f32 %f1383, %f233, %f1374; - neg.f32 %f1384, %f1374; - add.f32 %f1385, %f1383, %f1383; - fma.rn.f32 %f1386, %f1384, %f233, %f1385; - mul.rn.f32 %f1387, %f1355, %f1386; - add.f32 %f1388, %f1382, %f1374; - sub.f32 %f1389, %f1374, %f1388; - add.f32 %f1390, %f1382, %f1389; - add.f32 %f1391, %f1387, %f1390; - add.f32 %f1392, %f1388, %f1391; - sub.f32 %f1393, %f1388, %f1392; - add.f32 %f1394, %f1391, %f1393; - add.f32 %f1395, %f236, %f1392; - sub.f32 %f1396, %f236, %f1395; - add.f32 %f1397, %f1392, %f1396; - add.f32 %f1398, %f1394, %f1397; - add.f32 %f1399, %f237, %f1398; - add.f32 %f1400, %f1395, %f1399; - sub.f32 %f1401, %f1395, %f1400; - add.f32 %f1402, %f1399, %f1401; - mul.rn.f32 %f1404, %f2067, %f1400; - neg.f32 %f1405, %f1404; - fma.rn.f32 %f1406, %f2067, %f1400, %f1405; - fma.rn.f32 %f1407, %f2067, %f1402, %f1406; - fma.rn.f32 %f1409, %f2076, %f1400, %f1407; - add.rn.f32 %f1410, %f1404, %f1409; - neg.f32 %f1411, %f1410; - add.rn.f32 %f1412, %f1404, %f1411; - add.rn.f32 %f1413, %f1412, %f1409; - mov.b32 %r266, %f1410; - setp.eq.s32 %p128, %r266, 1118925336; - add.s32 %r267, %r266, -1; - mov.b32 %f1414, %r267; - add.f32 %f1415, %f1413, 0f37000000; - selp.f32 %f1416, %f1414, %f1410, %p128; - selp.f32 %f309, %f1415, %f1413, %p128; - mul.f32 %f1417, %f1416, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1418, %f1417; - fma.rn.f32 %f1419, %f1418, %f2071, %f1416; - fma.rn.f32 %f1420, %f1418, %f2072, %f1419; - mul.f32 %f1421, %f1420, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1422, %f1421; - add.f32 %f1423, %f1418, 0f00000000; - ex2.approx.f32 %f1424, %f1423; - mul.f32 %f1425, %f1422, %f1424; - setp.lt.f32 %p129, %f1416, 0fC2D20000; - selp.f32 %f1426, 0f00000000, %f1425, %p129; - setp.gt.f32 %p130, %f1416, 0f42D20000; - selp.f32 %f2230, 0f7F800000, %f1426, %p130; - setp.eq.f32 %p131, %f2230, 0f7F800000; - @%p131 bra BB8_129; - - fma.rn.f32 %f2230, %f2230, %f309, %f2230; - -BB8_129: - setp.geu.f32 %p257, %f231, 0f00000000; - mov.b32 %r268, %f2230; - xor.b32 %r269, %r268, -2147483648; - mov.b32 %f1427, %r269; - selp.f32 %f313, %f1427, %f2230, %p2; - setp.eq.f32 %p132, %f231, 0f00000000; - selp.f32 %f2231, %f238, %f313, %p132; - @%p257 bra BB8_131; - - mov.f32 %f2078, 0f40000000; - cvt.rzi.f32.f32 %f1429, %f2078; - setp.neu.f32 %p133, %f1429, 0f40000000; - selp.f32 %f2231, 0f7FFFFFFF, %f313, %p133; - -BB8_131: - cvt.rn.f32.s32 %f2096, %r411; - abs.f32 %f2095, %f231; - cvt.rn.f32.s32 %f2094, %r412; - mov.f32 %f2093, 0f35BFBE8E; - mov.f32 %f2092, 0f3F317200; - add.f32 %f2091, %f2096, 0f3F800000; - sub.f32 %f2090, %f2091, %f2178; - sub.f32 %f2089, %f2096, %f2178; - add.f32 %f2088, %f2095, 0f40000000; - mov.b32 %r386, %f2088; - selp.f32 %f2087, 0fFF800000, 0f7F800000, %p2; - add.f32 %f2086, %f231, 0f40000000; - mov.f32 %f2085, 0f00000000; - mov.f32 %f2084, 0f3DAAAABD; - mov.f32 %f2083, 0f3C4CAF63; - mov.f32 %f2082, 0f3B18F0FE; - mov.f32 %f2081, 0fB5BFBE8E; - mov.f32 %f2080, 0fBF317200; - mov.f32 %f2079, 0f40000000; - setp.gtu.f32 %p134, %f2095, 0f7F800000; - selp.f32 %f1432, %f2086, %f2231, %p134; - setp.neu.f32 %p135, %f2095, 0f7F800000; - selp.f32 %f1433, %f1432, %f2087, %p135; - setp.gt.s32 %p136, %r386, 2139095039; - selp.f32 %f1434, %f1433, %f2231, %p136; - mul.f32 %f1435, %f1434, 0fBF000000; - setp.eq.f32 %p137, %f231, 0f3F800000; - selp.f32 %f1436, 0fBF000000, %f1435, %p137; - mul.f32 %f1437, %f1436, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1438, %f1437; - fma.rn.f32 %f1440, %f1438, %f2080, %f1436; - fma.rn.f32 %f1442, %f1438, %f2081, %f1440; - mul.f32 %f1443, %f1442, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1444, %f1443; - add.f32 %f1445, %f1438, 0f00000000; - ex2.approx.f32 %f1446, %f1445; - mul.f32 %f1447, %f1444, %f1446; - setp.lt.f32 %p138, %f1436, 0fC2D20000; - selp.f32 %f1448, 0f00000000, %f1447, %p138; - setp.gt.f32 %p139, %f1436, 0f42D20000; - selp.f32 %f1449, 0f7F800000, %f1448, %p139; - sub.f32 %f1450, %f308, %f1449; - mul.f32 %f1451, %f85, %f1450; - mul.f32 %f317, %f299, %f1451; - mul.f32 %f1452, %f2089, %f1449; - mul.f32 %f1453, %f2090, %f308; - sub.f32 %f1454, %f1453, %f1452; - mul.f32 %f1455, %f1454, %f87; - mul.f32 %f318, %f299, %f1455; - add.f32 %f1456, %f2094, 0f3F800000; - sub.f32 %f319, %f1456, %f2179; - div.rn.f32 %f320, %f319, %f472; - abs.f32 %f321, %f320; - setp.lt.f32 %p140, %f321, 0f00800000; - mul.f32 %f1457, %f321, 0f4B800000; - selp.f32 %f1458, 0fC3170000, 0fC2FE0000, %p140; - selp.f32 %f1459, %f1457, %f321, %p140; - mov.b32 %r270, %f1459; - and.b32 %r271, %r270, 8388607; - or.b32 %r272, %r271, 1065353216; - mov.b32 %f1460, %r272; - shr.u32 %r273, %r270, 23; - cvt.rn.f32.u32 %f1461, %r273; - add.f32 %f1462, %f1458, %f1461; - setp.gt.f32 %p141, %f1460, 0f3FB504F3; - mul.f32 %f1463, %f1460, 0f3F000000; - add.f32 %f1464, %f1462, 0f3F800000; - selp.f32 %f1465, %f1463, %f1460, %p141; - selp.f32 %f1466, %f1464, %f1462, %p141; - add.f32 %f1467, %f1465, 0fBF800000; - add.f32 %f1431, %f1465, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1430,%f1431; - // inline asm - add.f32 %f1468, %f1467, %f1467; - mul.f32 %f1469, %f1430, %f1468; - mul.f32 %f1470, %f1469, %f1469; - fma.rn.f32 %f1473, %f2082, %f1470, %f2083; - fma.rn.f32 %f1475, %f1473, %f1470, %f2084; - mul.rn.f32 %f1476, %f1475, %f1470; - mul.rn.f32 %f1477, %f1476, %f1469; - sub.f32 %f1478, %f1467, %f1469; - neg.f32 %f1479, %f1469; - add.f32 %f1480, %f1478, %f1478; - fma.rn.f32 %f1481, %f1479, %f1467, %f1480; - mul.rn.f32 %f1482, %f1430, %f1481; - add.f32 %f1483, %f1477, %f1469; - sub.f32 %f1484, %f1469, %f1483; - add.f32 %f1485, %f1477, %f1484; - add.f32 %f1486, %f1482, %f1485; - add.f32 %f1487, %f1483, %f1486; - sub.f32 %f1488, %f1483, %f1487; - add.f32 %f1489, %f1486, %f1488; - mul.rn.f32 %f1491, %f1466, %f2092; - mul.rn.f32 %f1493, %f1466, %f2093; - add.f32 %f1494, %f1491, %f1487; - sub.f32 %f1495, %f1491, %f1494; - add.f32 %f1496, %f1487, %f1495; - add.f32 %f1497, %f1489, %f1496; - add.f32 %f1498, %f1493, %f1497; - add.f32 %f1499, %f1494, %f1498; - sub.f32 %f1500, %f1494, %f1499; - add.f32 %f1501, %f1498, %f1500; - mul.rn.f32 %f1503, %f2079, %f1499; - neg.f32 %f1504, %f1503; - fma.rn.f32 %f1505, %f2079, %f1499, %f1504; - fma.rn.f32 %f1506, %f2079, %f1501, %f1505; - fma.rn.f32 %f1508, %f2085, %f1499, %f1506; - add.rn.f32 %f1509, %f1503, %f1508; - neg.f32 %f1510, %f1509; - add.rn.f32 %f1511, %f1503, %f1510; - add.rn.f32 %f1512, %f1511, %f1508; - mov.b32 %r274, %f1509; - setp.eq.s32 %p142, %r274, 1118925336; - add.s32 %r275, %r274, -1; - mov.b32 %f1513, %r275; - add.f32 %f1514, %f1512, 0f37000000; - selp.f32 %f1515, %f1513, %f1509, %p142; - selp.f32 %f322, %f1514, %f1512, %p142; - mul.f32 %f1516, %f1515, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1517, %f1516; - fma.rn.f32 %f1518, %f1517, %f2080, %f1515; - fma.rn.f32 %f1519, %f1517, %f2081, %f1518; - mul.f32 %f1520, %f1519, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1521, %f1520; - add.f32 %f1522, %f1517, 0f00000000; - ex2.approx.f32 %f1523, %f1522; - mul.f32 %f1524, %f1521, %f1523; - setp.lt.f32 %p143, %f1515, 0fC2D20000; - selp.f32 %f1525, 0f00000000, %f1524, %p143; - setp.gt.f32 %p144, %f1515, 0f42D20000; - selp.f32 %f2232, 0f7F800000, %f1525, %p144; - setp.eq.f32 %p145, %f2232, 0f7F800000; - @%p145 bra BB8_133; - - fma.rn.f32 %f2232, %f2232, %f322, %f2232; - -BB8_133: - setp.lt.f32 %p146, %f320, 0f00000000; - and.pred %p5, %p146, %p90; - mov.b32 %r276, %f2232; - xor.b32 %r277, %r276, -2147483648; - mov.b32 %f1526, %r277; - selp.f32 %f2234, %f1526, %f2232, %p5; - setp.eq.f32 %p148, %f320, 0f00000000; - @%p148 bra BB8_136; - bra.uni BB8_134; - -BB8_136: - add.f32 %f1529, %f320, %f320; - selp.f32 %f2234, %f1529, 0f00000000, %p90; - bra.uni BB8_137; - -BB8_134: - setp.geu.f32 %p149, %f320, 0f00000000; - @%p149 bra BB8_137; - - mov.f32 %f2109, 0f40000000; - cvt.rzi.f32.f32 %f1528, %f2109; - setp.neu.f32 %p150, %f1528, 0f40000000; - selp.f32 %f2234, 0f7FFFFFFF, %f2234, %p150; - -BB8_137: - abs.f32 %f2024, %f320; - add.f32 %f1530, %f2024, 0f40000000; - mov.b32 %r278, %f1530; - setp.lt.s32 %p152, %r278, 2139095040; - @%p152 bra BB8_142; - - abs.f32 %f2107, %f320; - setp.gtu.f32 %p153, %f2107, 0f7F800000; - @%p153 bra BB8_141; - bra.uni BB8_139; - -BB8_141: - add.f32 %f2234, %f320, 0f40000000; - bra.uni BB8_142; - -BB8_139: - abs.f32 %f2108, %f320; - setp.neu.f32 %p154, %f2108, 0f7F800000; - @%p154 bra BB8_142; - - selp.f32 %f2234, 0fFF800000, 0f7F800000, %p5; - -BB8_142: - mov.f32 %f2035, 0f35BFBE8E; - mov.f32 %f2034, 0f3F317200; - mov.f32 %f2033, 0f00000000; - mov.f32 %f2032, 0f3DAAAABD; - mov.f32 %f2031, 0f3C4CAF63; - mov.f32 %f2030, 0f3B18F0FE; - mov.f32 %f2029, 0fB5BFBE8E; - mov.f32 %f2028, 0fBF317200; - mov.f32 %f2027, 0f40000000; - cvt.rn.f32.s32 %f2026, %r412; - sub.f32 %f2025, %f2026, %f2179; - mul.f32 %f1533, %f2234, 0fBF000000; - setp.eq.f32 %p155, %f320, 0f3F800000; - selp.f32 %f1534, 0fBF000000, %f1533, %p155; - mul.f32 %f1535, %f1534, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1536, %f1535; - fma.rn.f32 %f1538, %f1536, %f2028, %f1534; - fma.rn.f32 %f1540, %f1536, %f2029, %f1538; - mul.f32 %f1541, %f1540, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1542, %f1541; - add.f32 %f1543, %f1536, 0f00000000; - ex2.approx.f32 %f1544, %f1543; - mul.f32 %f1545, %f1542, %f1544; - setp.lt.f32 %p156, %f1534, 0fC2D20000; - selp.f32 %f1546, 0f00000000, %f1545, %p156; - setp.gt.f32 %p157, %f1534, 0f42D20000; - selp.f32 %f333, 0f7F800000, %f1546, %p157; - div.rn.f32 %f334, %f2025, %f472; - abs.f32 %f335, %f334; - setp.lt.f32 %p158, %f335, 0f00800000; - mul.f32 %f1547, %f335, 0f4B800000; - selp.f32 %f1548, 0fC3170000, 0fC2FE0000, %p158; - selp.f32 %f1549, %f1547, %f335, %p158; - mov.b32 %r279, %f1549; - and.b32 %r280, %r279, 8388607; - or.b32 %r281, %r280, 1065353216; - mov.b32 %f1550, %r281; - shr.u32 %r282, %r279, 23; - cvt.rn.f32.u32 %f1551, %r282; - add.f32 %f1552, %f1548, %f1551; - setp.gt.f32 %p159, %f1550, 0f3FB504F3; - mul.f32 %f1553, %f1550, 0f3F000000; - add.f32 %f1554, %f1552, 0f3F800000; - selp.f32 %f1555, %f1553, %f1550, %p159; - selp.f32 %f1556, %f1554, %f1552, %p159; - add.f32 %f1557, %f1555, 0fBF800000; - add.f32 %f1532, %f1555, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f1531,%f1532; - // inline asm - add.f32 %f1558, %f1557, %f1557; - mul.f32 %f1559, %f1531, %f1558; - mul.f32 %f1560, %f1559, %f1559; - fma.rn.f32 %f1563, %f2030, %f1560, %f2031; - fma.rn.f32 %f1565, %f1563, %f1560, %f2032; - mul.rn.f32 %f1566, %f1565, %f1560; - mul.rn.f32 %f1567, %f1566, %f1559; - sub.f32 %f1568, %f1557, %f1559; - neg.f32 %f1569, %f1559; - add.f32 %f1570, %f1568, %f1568; - fma.rn.f32 %f1571, %f1569, %f1557, %f1570; - mul.rn.f32 %f1572, %f1531, %f1571; - add.f32 %f1573, %f1567, %f1559; - sub.f32 %f1574, %f1559, %f1573; - add.f32 %f1575, %f1567, %f1574; - add.f32 %f1576, %f1572, %f1575; - add.f32 %f1577, %f1573, %f1576; - sub.f32 %f1578, %f1573, %f1577; - add.f32 %f1579, %f1576, %f1578; - mul.rn.f32 %f1581, %f1556, %f2034; - mul.rn.f32 %f1583, %f1556, %f2035; - add.f32 %f1584, %f1581, %f1577; - sub.f32 %f1585, %f1581, %f1584; - add.f32 %f1586, %f1577, %f1585; - add.f32 %f1587, %f1579, %f1586; - add.f32 %f1588, %f1583, %f1587; - add.f32 %f1589, %f1584, %f1588; - sub.f32 %f1590, %f1584, %f1589; - add.f32 %f1591, %f1588, %f1590; - mul.rn.f32 %f1593, %f2027, %f1589; - neg.f32 %f1594, %f1593; - fma.rn.f32 %f1595, %f2027, %f1589, %f1594; - fma.rn.f32 %f1596, %f2027, %f1591, %f1595; - fma.rn.f32 %f1598, %f2033, %f1589, %f1596; - add.rn.f32 %f1599, %f1593, %f1598; - neg.f32 %f1600, %f1599; - add.rn.f32 %f1601, %f1593, %f1600; - add.rn.f32 %f1602, %f1601, %f1598; - mov.b32 %r283, %f1599; - setp.eq.s32 %p160, %r283, 1118925336; - add.s32 %r284, %r283, -1; - mov.b32 %f1603, %r284; - add.f32 %f1604, %f1602, 0f37000000; - selp.f32 %f1605, %f1603, %f1599, %p160; - selp.f32 %f336, %f1604, %f1602, %p160; - mul.f32 %f1606, %f1605, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1607, %f1606; - fma.rn.f32 %f1608, %f1607, %f2028, %f1605; - fma.rn.f32 %f1609, %f1607, %f2029, %f1608; - mul.f32 %f1610, %f1609, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1611, %f1610; - add.f32 %f1612, %f1607, 0f00000000; - ex2.approx.f32 %f1613, %f1612; - mul.f32 %f1614, %f1611, %f1613; - setp.lt.f32 %p161, %f1605, 0fC2D20000; - selp.f32 %f1615, 0f00000000, %f1614, %p161; - setp.gt.f32 %p162, %f1605, 0f42D20000; - selp.f32 %f2235, 0f7F800000, %f1615, %p162; - setp.eq.f32 %p163, %f2235, 0f7F800000; - @%p163 bra BB8_144; - - fma.rn.f32 %f2235, %f2235, %f336, %f2235; - -BB8_144: - setp.lt.f32 %p164, %f334, 0f00000000; - and.pred %p6, %p164, %p90; - mov.b32 %r285, %f2235; - xor.b32 %r286, %r285, -2147483648; - mov.b32 %f1616, %r286; - selp.f32 %f2237, %f1616, %f2235, %p6; - setp.eq.f32 %p166, %f334, 0f00000000; - @%p166 bra BB8_147; - bra.uni BB8_145; - -BB8_147: - add.f32 %f1619, %f334, %f334; - selp.f32 %f2237, %f1619, 0f00000000, %p90; - bra.uni BB8_148; - -BB8_145: - setp.geu.f32 %p167, %f334, 0f00000000; - @%p167 bra BB8_148; - - mov.f32 %f2106, 0f40000000; - cvt.rzi.f32.f32 %f1618, %f2106; - setp.neu.f32 %p168, %f1618, 0f40000000; - selp.f32 %f2237, 0f7FFFFFFF, %f2237, %p168; - -BB8_148: - abs.f32 %f2110, %f334; - add.f32 %f1620, %f2110, 0f40000000; - mov.b32 %r287, %f1620; - setp.lt.s32 %p170, %r287, 2139095040; - @%p170 bra BB8_153; - - abs.f32 %f2111, %f334; - setp.gtu.f32 %p171, %f2111, 0f7F800000; - @%p171 bra BB8_152; - bra.uni BB8_150; - -BB8_152: - add.f32 %f2237, %f334, 0f40000000; - bra.uni BB8_153; - -BB8_150: - abs.f32 %f2112, %f334; - setp.neu.f32 %p172, %f2112, 0f7F800000; - @%p172 bra BB8_153; - - selp.f32 %f2237, 0fFF800000, 0f7F800000, %p6; - -BB8_153: - cvt.rn.f32.s32 %f2041, %r412; - add.f32 %f2040, %f2041, 0f3F800000; - sub.f32 %f2039, %f2040, %f2179; - mov.f32 %f2038, 0fB5BFBE8E; - mov.f32 %f2037, 0fBF317200; - sub.f32 %f2036, %f2041, %f2179; - mul.f32 %f1621, %f2237, 0fBF000000; - setp.eq.f32 %p173, %f334, 0f3F800000; - selp.f32 %f1622, 0fBF000000, %f1621, %p173; - mul.f32 %f1623, %f1622, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1624, %f1623; - fma.rn.f32 %f1626, %f1624, %f2037, %f1622; - fma.rn.f32 %f1628, %f1624, %f2038, %f1626; - mul.f32 %f1629, %f1628, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1630, %f1629; - add.f32 %f1631, %f1624, 0f00000000; - ex2.approx.f32 %f1632, %f1631; - mul.f32 %f1633, %f1630, %f1632; - setp.lt.f32 %p174, %f1622, 0fC2D20000; - selp.f32 %f1634, 0f00000000, %f1633, %p174; - setp.gt.f32 %p175, %f1622, 0f42D20000; - selp.f32 %f1635, 0f7F800000, %f1634, %p175; - sub.f32 %f1636, %f333, %f1635; - mul.f32 %f1637, %f85, %f1636; - mul.f32 %f1638, %f298, %f1637; - mul.f32 %f1639, %f2036, %f1635; - mul.f32 %f1640, %f2039, %f333; - sub.f32 %f1641, %f1640, %f1639; - mul.f32 %f1642, %f87, %f1641; - mul.f32 %f1643, %f298, %f1642; - mul.f32 %f1644, %f317, %f317; - mul.f32 %f1645, %f278, %f1644; - mul.f32 %f1646, %f277, %f318; - sub.f32 %f1647, %f1646, %f1645; - add.f32 %f2214, %f2214, %f1647; - mul.f32 %f1648, %f277, %f1643; - mul.f32 %f1649, %f1638, %f1638; - mul.f32 %f1650, %f278, %f1649; - sub.f32 %f1651, %f1648, %f1650; - add.f32 %f2213, %f2213, %f1651; - fma.rn.f32 %f2216, %f277, %f317, %f2216; - fma.rn.f32 %f2215, %f277, %f1638, %f2215; - add.s32 %r412, %r412, 1; - setp.lt.s32 %p176, %r412, %r102; - @%p176 bra BB8_93; - - add.s32 %r411, %r411, 1; - setp.lt.s32 %p177, %r411, %r102; - @%p177 bra BB8_92; - -BB8_155: - mov.f32 %f2044, 0fBF800000; - mov.f32 %f2043, 0f3F800000; - cvt.rn.f32.s32 %f2042, %r29; - div.rn.f32 %f1652, %f2216, %f2214; - max.f32 %f1654, %f1652, %f2044; - min.f32 %f1656, %f1654, %f2043; - div.rn.f32 %f1657, %f1656, %f2042; - mul.f32 %f1658, %f1657, 0f3F000000; - sub.f32 %f2178, %f2178, %f1658; - div.rn.f32 %f1659, %f2215, %f2213; - max.f32 %f1660, %f1659, %f2044; - min.f32 %f1661, %f1660, %f2043; - div.rn.f32 %f1662, %f1661, %f2042; - mul.f32 %f1663, %f1662, 0f3F000000; - sub.f32 %f2179, %f2179, %f1663; - mul.wide.s32 %rd77, %r410, 4; - add.s64 %rd78, %rd2, %rd77; - st.local.f32 [%rd78], %f2178; - add.s64 %rd79, %rd3, %rd77; - st.local.f32 [%rd79], %f2179; - add.s32 %r410, %r410, 1; - setp.lt.s32 %p178, %r410, %r29; - @%p178 bra BB8_90; - -BB8_156: - add.s32 %r406, %r406, 1; - setp.lt.s32 %p179, %r406, %r103; - @%p179 bra BB8_66; - bra.uni BB8_157; - -BB8_64: - mov.f32 %f361, %f82; - -BB8_157: - mov.f32 %f2272, 0f447A0000; - mov.f32 %f2271, 0fC47A0000; - mov.f32 %f2254, 0f00000000; - @%p8 bra BB8_158; - - mov.u32 %r288, 0; - mov.f32 %f2272, 0f447A0000; - mov.f32 %f2271, 0fC47A0000; - mov.f32 %f2254, 0f00000000; - mov.f32 %f2273, %f2271; - mov.f32 %f2274, %f2272; - mov.u32 %r414, %r288; - -BB8_160: - cvt.rn.f32.s32 %f371, %r414; - mov.u32 %r415, %r288; - -BB8_161: - setp.lt.s32 %p181, %r29, 1; - @%p181 bra BB8_162; - - sqrt.rn.f32 %f379, %f40; - cvt.rn.f32.s32 %f380, %r415; - mov.u32 %r416, 0; - mov.f32 %f2270, %f361; - -BB8_164: - mul.wide.s32 %rd80, %r416, 4; - add.s64 %rd81, %rd2, %rd80; - add.s64 %rd82, %rd3, %rd80; - ld.local.f32 %f2178, [%rd81]; - setp.gt.f32 %p182, %f2178, %f2273; - selp.f32 %f2273, %f2178, %f2273, %p182; - setp.lt.f32 %p183, %f2178, %f2274; - selp.f32 %f2274, %f2178, %f2274, %p183; - ld.local.f32 %f2179, [%rd82]; - setp.gt.f32 %p184, %f2179, %f2271; - selp.f32 %f2271, %f2179, %f2271, %p184; - setp.lt.f32 %p185, %f2179, %f2272; - selp.f32 %f2272, %f2179, %f2272, %p185; - sub.f32 %f392, %f371, %f2178; - add.f32 %f1674, %f392, 0f3F800000; - mul.f32 %f393, %f1674, %f379; - abs.f32 %f394, %f393; - setp.ltu.f32 %p186, %f394, 0f3F800000; - @%p186 bra BB8_166; - bra.uni BB8_165; - -BB8_166: - mul.f32 %f1693, %f393, %f393; - mov.f32 %f1694, 0f3BA0C9F8; - mov.f32 %f1695, 0fBA1268FB; - fma.rn.f32 %f1696, %f1695, %f1693, %f1694; - mov.f32 %f1697, 0fBCDABFD4; - fma.rn.f32 %f1698, %f1696, %f1693, %f1697; - mov.f32 %f1699, 0f3DE70331; - fma.rn.f32 %f1700, %f1698, %f1693, %f1699; - mov.f32 %f1701, 0fBEC09330; - fma.rn.f32 %f1702, %f1700, %f1693, %f1701; - mov.f32 %f1703, 0f3F906EBA; - fma.rn.f32 %f1704, %f1702, %f1693, %f1703; - mul.f32 %f2266, %f393, %f1704; - bra.uni BB8_167; - -BB8_165: - mov.f32 %f1675, 0f3A03BB71; - mov.f32 %f1676, 0fB7B730FB; - fma.rn.f32 %f1677, %f1676, %f394, %f1675; - mov.f32 %f1678, 0fBBACA3B3; - fma.rn.f32 %f1679, %f1677, %f394, %f1678; - mov.f32 %f1680, 0f3D0A7445; - fma.rn.f32 %f1681, %f1679, %f394, %f1680; - mov.f32 %f1682, 0fBE1B3B75; - fma.rn.f32 %f1683, %f1681, %f394, %f1682; - mov.f32 %f1684, 0fBF6B385A; - fma.rn.f32 %f1685, %f1683, %f394, %f1684; - mov.f32 %f1686, 0fBFD0316E; - fma.rn.f32 %f1687, %f1685, %f394, %f1686; - mov.f32 %f1688, 0fBA031CCE; - fma.rn.f32 %f1689, %f1687, %f394, %f1688; - ex2.approx.ftz.f32 %f1690, %f1689; - mov.f32 %f1691, 0f3F800000; - sub.f32 %f1692, %f1691, %f1690; - mov.b32 %r291, %f1692; - setp.ltu.f32 %p187, %f394, 0f407AD445; - selp.b32 %r292, %r291, 1065353216, %p187; - mov.b32 %r293, %f393; - and.b32 %r294, %r293, -2147483648; - or.b32 %r295, %r292, %r294; - mov.b32 %f2266, %r295; - -BB8_167: - mul.f32 %f398, %f392, %f379; - abs.f32 %f399, %f398; - setp.ltu.f32 %p188, %f399, 0f3F800000; - @%p188 bra BB8_169; - bra.uni BB8_168; - -BB8_169: - mul.f32 %f1723, %f398, %f398; - mov.f32 %f1724, 0f3BA0C9F8; - mov.f32 %f1725, 0fBA1268FB; - fma.rn.f32 %f1726, %f1725, %f1723, %f1724; - mov.f32 %f1727, 0fBCDABFD4; - fma.rn.f32 %f1728, %f1726, %f1723, %f1727; - mov.f32 %f1729, 0f3DE70331; - fma.rn.f32 %f1730, %f1728, %f1723, %f1729; - mov.f32 %f1731, 0fBEC09330; - fma.rn.f32 %f1732, %f1730, %f1723, %f1731; - mov.f32 %f1733, 0f3F906EBA; - fma.rn.f32 %f1734, %f1732, %f1723, %f1733; - mul.f32 %f2267, %f398, %f1734; - bra.uni BB8_170; - -BB8_168: - mov.f32 %f1705, 0f3A03BB71; - mov.f32 %f1706, 0fB7B730FB; - fma.rn.f32 %f1707, %f1706, %f399, %f1705; - mov.f32 %f1708, 0fBBACA3B3; - fma.rn.f32 %f1709, %f1707, %f399, %f1708; - mov.f32 %f1710, 0f3D0A7445; - fma.rn.f32 %f1711, %f1709, %f399, %f1710; - mov.f32 %f1712, 0fBE1B3B75; - fma.rn.f32 %f1713, %f1711, %f399, %f1712; - mov.f32 %f1714, 0fBF6B385A; - fma.rn.f32 %f1715, %f1713, %f399, %f1714; - mov.f32 %f1716, 0fBFD0316E; - fma.rn.f32 %f1717, %f1715, %f399, %f1716; - mov.f32 %f1718, 0fBA031CCE; - fma.rn.f32 %f1719, %f1717, %f399, %f1718; - ex2.approx.ftz.f32 %f1720, %f1719; - mov.f32 %f1721, 0f3F800000; - sub.f32 %f1722, %f1721, %f1720; - mov.b32 %r296, %f1722; - setp.ltu.f32 %p189, %f399, 0f407AD445; - selp.b32 %r297, %r296, 1065353216, %p189; - mov.b32 %r298, %f398; - and.b32 %r299, %r298, -2147483648; - or.b32 %r300, %r297, %r299; - mov.b32 %f2267, %r300; - -BB8_170: - sub.f32 %f403, %f2266, %f2267; - sub.f32 %f404, %f380, %f2179; - add.f32 %f1735, %f404, 0f3F800000; - mul.f32 %f405, %f1735, %f379; - abs.f32 %f406, %f405; - setp.ltu.f32 %p190, %f406, 0f3F800000; - @%p190 bra BB8_172; - bra.uni BB8_171; - -BB8_172: - mul.f32 %f1754, %f405, %f405; - mov.f32 %f1755, 0f3BA0C9F8; - mov.f32 %f1756, 0fBA1268FB; - fma.rn.f32 %f1757, %f1756, %f1754, %f1755; - mov.f32 %f1758, 0fBCDABFD4; - fma.rn.f32 %f1759, %f1757, %f1754, %f1758; - mov.f32 %f1760, 0f3DE70331; - fma.rn.f32 %f1761, %f1759, %f1754, %f1760; - mov.f32 %f1762, 0fBEC09330; - fma.rn.f32 %f1763, %f1761, %f1754, %f1762; - mov.f32 %f1764, 0f3F906EBA; - fma.rn.f32 %f1765, %f1763, %f1754, %f1764; - mul.f32 %f2268, %f405, %f1765; - bra.uni BB8_173; - -BB8_171: - mov.f32 %f1736, 0f3A03BB71; - mov.f32 %f1737, 0fB7B730FB; - fma.rn.f32 %f1738, %f1737, %f406, %f1736; - mov.f32 %f1739, 0fBBACA3B3; - fma.rn.f32 %f1740, %f1738, %f406, %f1739; - mov.f32 %f1741, 0f3D0A7445; - fma.rn.f32 %f1742, %f1740, %f406, %f1741; - mov.f32 %f1743, 0fBE1B3B75; - fma.rn.f32 %f1744, %f1742, %f406, %f1743; - mov.f32 %f1745, 0fBF6B385A; - fma.rn.f32 %f1746, %f1744, %f406, %f1745; - mov.f32 %f1747, 0fBFD0316E; - fma.rn.f32 %f1748, %f1746, %f406, %f1747; - mov.f32 %f1749, 0fBA031CCE; - fma.rn.f32 %f1750, %f1748, %f406, %f1749; - ex2.approx.ftz.f32 %f1751, %f1750; - mov.f32 %f1752, 0f3F800000; - sub.f32 %f1753, %f1752, %f1751; - mov.b32 %r301, %f1753; - setp.ltu.f32 %p191, %f406, 0f407AD445; - selp.b32 %r302, %r301, 1065353216, %p191; - mov.b32 %r303, %f405; - and.b32 %r304, %r303, -2147483648; - or.b32 %r305, %r302, %r304; - mov.b32 %f2268, %r305; - -BB8_173: - mul.f32 %f410, %f404, %f379; - abs.f32 %f411, %f410; - setp.ltu.f32 %p192, %f411, 0f3F800000; - @%p192 bra BB8_175; - bra.uni BB8_174; - -BB8_175: - mul.f32 %f1784, %f410, %f410; - mov.f32 %f1785, 0f3BA0C9F8; - mov.f32 %f1786, 0fBA1268FB; - fma.rn.f32 %f1787, %f1786, %f1784, %f1785; - mov.f32 %f1788, 0fBCDABFD4; - fma.rn.f32 %f1789, %f1787, %f1784, %f1788; - mov.f32 %f1790, 0f3DE70331; - fma.rn.f32 %f1791, %f1789, %f1784, %f1790; - mov.f32 %f1792, 0fBEC09330; - fma.rn.f32 %f1793, %f1791, %f1784, %f1792; - mov.f32 %f1794, 0f3F906EBA; - fma.rn.f32 %f1795, %f1793, %f1784, %f1794; - mul.f32 %f2269, %f410, %f1795; - bra.uni BB8_176; - -BB8_174: - mov.f32 %f1766, 0f3A03BB71; - mov.f32 %f1767, 0fB7B730FB; - fma.rn.f32 %f1768, %f1767, %f411, %f1766; - mov.f32 %f1769, 0fBBACA3B3; - fma.rn.f32 %f1770, %f1768, %f411, %f1769; - mov.f32 %f1771, 0f3D0A7445; - fma.rn.f32 %f1772, %f1770, %f411, %f1771; - mov.f32 %f1773, 0fBE1B3B75; - fma.rn.f32 %f1774, %f1772, %f411, %f1773; - mov.f32 %f1775, 0fBF6B385A; - fma.rn.f32 %f1776, %f1774, %f411, %f1775; - mov.f32 %f1777, 0fBFD0316E; - fma.rn.f32 %f1778, %f1776, %f411, %f1777; - mov.f32 %f1779, 0fBA031CCE; - fma.rn.f32 %f1780, %f1778, %f411, %f1779; - ex2.approx.ftz.f32 %f1781, %f1780; - mov.f32 %f1782, 0f3F800000; - sub.f32 %f1783, %f1782, %f1781; - mov.b32 %r306, %f1783; - setp.ltu.f32 %p193, %f411, 0f407AD445; - selp.b32 %r307, %r306, 1065353216, %p193; - mov.b32 %r308, %f410; - and.b32 %r309, %r308, -2147483648; - or.b32 %r310, %r307, %r309; - mov.b32 %f2269, %r310; - -BB8_176: - sub.f32 %f1796, %f2268, %f2269; - mul.f32 %f1797, %f1796, 0f3F000000; - mul.f32 %f1798, %f403, 0f3F000000; - mul.f32 %f1799, %f1798, %f473; - fma.rn.f32 %f2270, %f1799, %f1797, %f2270; - add.s32 %r416, %r416, 1; - setp.lt.s32 %p194, %r416, %r29; - @%p194 bra BB8_164; - bra.uni BB8_177; - -BB8_162: - mov.f32 %f2270, %f361; - -BB8_177: - mad.lo.s32 %r311, %r415, %r102, %r414; - add.s32 %r312, %r311, %r1; - mul.wide.s32 %rd83, %r312, 4; - add.s64 %rd84, %rd1, %rd83; - ld.global.f32 %f423, [%rd84]; - mul.f32 %f1800, %f2270, 0f4B000000; - setp.lt.f32 %p195, %f2270, 0f00800000; - selp.f32 %f424, %f1800, %f2270, %p195; - selp.f32 %f1801, 0fC1B80000, 0f00000000, %p195; - mov.b32 %r313, %f424; - add.s32 %r314, %r313, -1059760811; - and.b32 %r315, %r314, -8388608; - sub.s32 %r316, %r313, %r315; - mov.b32 %f1802, %r316; - cvt.rn.f32.s32 %f1803, %r315; - mov.f32 %f1804, 0f34000000; - fma.rn.f32 %f1805, %f1803, %f1804, %f1801; - add.f32 %f1806, %f1802, 0fBF800000; - mov.f32 %f1807, 0f3E1039F6; - mov.f32 %f1808, 0fBE055027; - fma.rn.f32 %f1809, %f1808, %f1806, %f1807; - mov.f32 %f1810, 0fBDF8CDCC; - fma.rn.f32 %f1811, %f1809, %f1806, %f1810; - mov.f32 %f1812, 0f3E0F2955; - fma.rn.f32 %f1813, %f1811, %f1806, %f1812; - mov.f32 %f1814, 0fBE2AD8B9; - fma.rn.f32 %f1815, %f1813, %f1806, %f1814; - mov.f32 %f1816, 0f3E4CED0B; - fma.rn.f32 %f1817, %f1815, %f1806, %f1816; - mov.f32 %f1818, 0fBE7FFF22; - fma.rn.f32 %f1819, %f1817, %f1806, %f1818; - mov.f32 %f1820, 0f3EAAAA78; - fma.rn.f32 %f1821, %f1819, %f1806, %f1820; - mov.f32 %f1822, 0fBF000000; - fma.rn.f32 %f1823, %f1821, %f1806, %f1822; - mul.f32 %f1824, %f1806, %f1823; - fma.rn.f32 %f1825, %f1824, %f1806, %f1806; - mov.f32 %f1826, 0f3F317218; - fma.rn.f32 %f2277, %f1805, %f1826, %f1825; - setp.lt.u32 %p196, %r313, 2139095040; - @%p196 bra BB8_179; - - mov.f32 %f1827, 0f7F800000; - fma.rn.f32 %f2277, %f424, %f1827, %f1827; - -BB8_179: - setp.eq.f32 %p197, %f424, 0f00000000; - selp.f32 %f428, 0fFF800000, %f2277, %p197; - mul.f32 %f1828, %f2270, 0f40C90FD8; - setp.lt.f32 %p198, %f1828, 0f00800000; - mul.f32 %f1829, %f1828, 0f4B000000; - selp.f32 %f429, %f1829, %f1828, %p198; - selp.f32 %f1830, 0fC1B80000, 0f00000000, %p198; - mov.b32 %r317, %f429; - add.s32 %r318, %r317, -1059760811; - and.b32 %r319, %r318, -8388608; - sub.s32 %r320, %r317, %r319; - mov.b32 %f1831, %r320; - cvt.rn.f32.s32 %f1832, %r319; - fma.rn.f32 %f1834, %f1832, %f1804, %f1830; - add.f32 %f1835, %f1831, 0fBF800000; - fma.rn.f32 %f1838, %f1808, %f1835, %f1807; - fma.rn.f32 %f1840, %f1838, %f1835, %f1810; - fma.rn.f32 %f1842, %f1840, %f1835, %f1812; - fma.rn.f32 %f1844, %f1842, %f1835, %f1814; - fma.rn.f32 %f1846, %f1844, %f1835, %f1816; - fma.rn.f32 %f1848, %f1846, %f1835, %f1818; - fma.rn.f32 %f1850, %f1848, %f1835, %f1820; - fma.rn.f32 %f1852, %f1850, %f1835, %f1822; - mul.f32 %f1853, %f1835, %f1852; - fma.rn.f32 %f1854, %f1853, %f1835, %f1835; - fma.rn.f32 %f2278, %f1834, %f1826, %f1854; - setp.lt.u32 %p199, %r317, 2139095040; - @%p199 bra BB8_181; - - mov.f32 %f1856, 0f7F800000; - fma.rn.f32 %f2278, %f429, %f1856, %f1856; - -BB8_181: - setp.gt.f32 %p200, %f423, 0f00000000; - @%p200 bra BB8_183; - bra.uni BB8_182; - -BB8_183: - mul.f32 %f1857, %f423, 0f4B000000; - setp.lt.f32 %p201, %f423, 0f00800000; - selp.f32 %f434, %f1857, %f423, %p201; - selp.f32 %f1858, 0fC1B80000, 0f00000000, %p201; - mov.b32 %r321, %f434; - add.s32 %r322, %r321, -1059760811; - and.b32 %r323, %r322, -8388608; - sub.s32 %r324, %r321, %r323; - mov.b32 %f1859, %r324; - cvt.rn.f32.s32 %f1860, %r323; - fma.rn.f32 %f1862, %f1860, %f1804, %f1858; - add.f32 %f1863, %f1859, 0fBF800000; - fma.rn.f32 %f1866, %f1808, %f1863, %f1807; - fma.rn.f32 %f1868, %f1866, %f1863, %f1810; - fma.rn.f32 %f1870, %f1868, %f1863, %f1812; - fma.rn.f32 %f1872, %f1870, %f1863, %f1814; - fma.rn.f32 %f1874, %f1872, %f1863, %f1816; - fma.rn.f32 %f1876, %f1874, %f1863, %f1818; - fma.rn.f32 %f1878, %f1876, %f1863, %f1820; - fma.rn.f32 %f1880, %f1878, %f1863, %f1822; - mul.f32 %f1881, %f1863, %f1880; - fma.rn.f32 %f1882, %f1881, %f1863, %f1863; - fma.rn.f32 %f2279, %f1862, %f1826, %f1882; - setp.lt.u32 %p202, %r321, 2139095040; - @%p202 bra BB8_185; - - mov.f32 %f1884, 0f7F800000; - fma.rn.f32 %f2279, %f434, %f1884, %f1884; - -BB8_185: - setp.eq.f32 %p203, %f434, 0f00000000; - selp.f32 %f1885, 0fFF800000, %f2279, %p203; - mul.f32 %f1886, %f423, %f1885; - mul.f32 %f1887, %f423, %f428; - sub.f32 %f1888, %f1887, %f2270; - sub.f32 %f1889, %f1888, %f1886; - add.f32 %f438, %f423, %f1889; - mul.f32 %f1890, %f423, 0f40C90FD8; - setp.lt.f32 %p204, %f1890, 0f00800000; - mul.f32 %f1891, %f1890, 0f4B000000; - selp.f32 %f439, %f1891, %f1890, %p204; - selp.f32 %f1892, 0fC1B80000, 0f00000000, %p204; - mov.b32 %r325, %f439; - add.s32 %r326, %r325, -1059760811; - and.b32 %r327, %r326, -8388608; - sub.s32 %r328, %r325, %r327; - mov.b32 %f1893, %r328; - cvt.rn.f32.s32 %f1894, %r327; - fma.rn.f32 %f1896, %f1894, %f1804, %f1892; - add.f32 %f1897, %f1893, 0fBF800000; - fma.rn.f32 %f1900, %f1808, %f1897, %f1807; - fma.rn.f32 %f1902, %f1900, %f1897, %f1810; - fma.rn.f32 %f1904, %f1902, %f1897, %f1812; - fma.rn.f32 %f1906, %f1904, %f1897, %f1814; - fma.rn.f32 %f1908, %f1906, %f1897, %f1816; - fma.rn.f32 %f1910, %f1908, %f1897, %f1818; - fma.rn.f32 %f1912, %f1910, %f1897, %f1820; - fma.rn.f32 %f1914, %f1912, %f1897, %f1822; - mul.f32 %f1915, %f1897, %f1914; - fma.rn.f32 %f1916, %f1915, %f1897, %f1897; - fma.rn.f32 %f2280, %f1896, %f1826, %f1916; - setp.lt.u32 %p205, %r325, 2139095040; - @%p205 bra BB8_187; - - mov.f32 %f1918, 0f7F800000; - fma.rn.f32 %f2280, %f439, %f1918, %f1918; - -BB8_187: - mul.f32 %f1919, %f2280, 0f3F000000; - setp.eq.f32 %p206, %f439, 0f00000000; - selp.f32 %f1920, 0fFF800000, %f1919, %p206; - sub.f32 %f2281, %f438, %f1920; - bra.uni BB8_188; - -BB8_182: - neg.f32 %f2281, %f2270; - -BB8_188: - mul.f32 %f1921, %f2270, %f428; - sub.f32 %f1922, %f2281, %f1921; - add.f32 %f1923, %f2270, %f1922; - add.f32 %f1924, %f1921, %f1923; - sub.f32 %f1925, %f1924, %f2270; - setp.eq.f32 %p207, %f429, 0f00000000; - mul.f32 %f1926, %f2278, 0f3F000000; - selp.f32 %f1927, 0fFF800000, %f1926, %p207; - add.f32 %f1928, %f1927, %f1925; - fma.rn.f32 %f2254, %f1928, 0fC0000000, %f2254; - add.s32 %r415, %r415, 1; - setp.lt.s32 %p208, %r415, %r102; - @%p208 bra BB8_161; - - add.s32 %r414, %r414, 1; - setp.lt.s32 %p209, %r414, %r102; - @%p209 bra BB8_160; - bra.uni BB8_190; - -BB8_158: - mov.f32 %f2273, %f2271; - mov.f32 %f2274, %f2272; - -BB8_190: - mad.lo.s32 %r329, %r102, %r102, -1; - shl.b32 %r330, %r29, 1; - sub.s32 %r331, %r329, %r330; - cvt.rn.f32.s32 %f1929, %r331; - sqrt.rn.f32 %f1930, %f1929; - sqrt.rn.f32 %f1931, %f2254; - sub.f32 %f1932, %f1931, %f1930; - setp.lt.f32 %p210, %f1932, 0f00000000; - mul.f32 %f1933, %f1932, %f1932; - mul.f32 %f1934, %f1933, 0fBF7DB8BB; - fma.rn.f32 %f1935, %f1932, 0f3F4E353F, %f1934; - mul.f32 %f1936, %f1935, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1937, %f1936; - mov.f32 %f1938, 0fBF317200; - fma.rn.f32 %f1939, %f1937, %f1938, %f1935; - mov.f32 %f1940, 0fB5BFBE8E; - fma.rn.f32 %f1941, %f1937, %f1940, %f1939; - mul.f32 %f1942, %f1941, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1943, %f1942; - add.f32 %f1944, %f1937, 0f00000000; - ex2.approx.f32 %f1945, %f1944; - mul.f32 %f1946, %f1943, %f1945; - setp.lt.f32 %p211, %f1935, 0fC2D20000; - setp.gt.f32 %p212, %f1935, 0f42D20000; - mul.f32 %f1947, %f1946, 0f3F000000; - mov.f32 %f1948, 0f3F800000; - sub.f32 %f1949, %f1948, %f1947; - selp.f32 %f1950, 0f3F800000, %f1949, %p211; - selp.f32 %f1951, 0fFF800000, %f1950, %p212; - selp.f32 %f1952, %f1951, 0f00000000, %p210; - setp.gt.f32 %p213, %f1932, 0f00000000; - mul.f32 %f1953, %f1933, 0fBF2D21FF; - fma.rn.f32 %f1954, %f1932, 0fBF9F5F70, %f1953; - mul.f32 %f1955, %f1954, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f1956, %f1955; - fma.rn.f32 %f1957, %f1956, %f1938, %f1954; - fma.rn.f32 %f1958, %f1956, %f1940, %f1957; - mul.f32 %f1959, %f1958, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f1960, %f1959; - add.f32 %f1961, %f1956, 0f00000000; - ex2.approx.f32 %f1962, %f1961; - mul.f32 %f1963, %f1960, %f1962; - setp.lt.f32 %p214, %f1954, 0fC2D20000; - setp.gt.f32 %p215, %f1954, 0f42D20000; - mul.f32 %f1964, %f1963, 0f3F000000; - selp.f32 %f1965, 0f00000000, %f1964, %p214; - selp.f32 %f1966, 0f7F800000, %f1965, %p215; - selp.f32 %f1967, %f1966, 0f00000000, %p213; - add.f32 %f453, %f1952, %f1967; - setp.leu.f32 %p216, %f453, %f2291; - @%p216 bra BB8_205; - - abs.f32 %f1968, %f83; - mov.b32 %r332, %f83; - and.b32 %r333, %r332, -2147483648; - or.b32 %r334, %r333, 1056964608; - mov.b32 %f1969, %r334; - add.f32 %f1970, %f83, %f1969; - cvt.rzi.f32.f32 %f1971, %f1970; - setp.gt.f32 %p217, %f1968, 0f4B000000; - selp.f32 %f2289, %f83, %f1971, %p217; - setp.geu.f32 %p218, %f1968, 0f3F000000; - @%p218 bra BB8_193; - - cvt.rzi.f32.f32 %f2289, %f83; - -BB8_193: - neg.f32 %f457, %f2289; - setp.leu.f32 %p219, %f2274, %f457; - @%p219 bra BB8_205; - - cvt.rn.f32.s32 %f2047, %r155; - add.f32 %f1972, %f2047, %f2289; - setp.geu.f32 %p220, %f2273, %f1972; - setp.leu.f32 %p221, %f2272, %f457; - or.pred %p222, %p220, %p221; - setp.geu.f32 %p223, %f2271, %f1972; - or.pred %p224, %p222, %p223; - @%p224 bra BB8_205; - - setp.lt.s32 %p225, %r29, 1; - @%p225 bra BB8_204; - - and.b32 %r67, %r29, 3; - setp.eq.s32 %p226, %r67, 0; - mov.u32 %r420, 0; - @%p226 bra BB8_202; - - setp.eq.s32 %p227, %r67, 1; - mov.u32 %r418, 0; - @%p227 bra BB8_201; - - setp.eq.s32 %p228, %r67, 2; - mov.u32 %r417, 0; - @%p228 bra BB8_200; - - ld.local.f32 %f1973, [%rd2]; - st.local.f32 [%rd4], %f1973; - ld.local.f32 %f1974, [%rd3]; - st.local.f32 [%rd5], %f1974; - mov.u32 %r417, 1; - -BB8_200: - mul.wide.u32 %rd85, %r417, 4; - add.s64 %rd86, %rd2, %rd85; - ld.local.f32 %f1975, [%rd86]; - add.s64 %rd87, %rd4, %rd85; - st.local.f32 [%rd87], %f1975; - add.s64 %rd88, %rd3, %rd85; - ld.local.f32 %f1976, [%rd88]; - add.s64 %rd89, %rd5, %rd85; - st.local.f32 [%rd89], %f1976; - add.s32 %r418, %r417, 1; - -BB8_201: - mul.wide.s32 %rd90, %r418, 4; + mov.b64 %fd387, {%r410, %r81}; + +$L__BB8_170: + cvt.rn.f32.s32 %f1655, %r639; + cvt.rn.f32.s32 %f1654, %r640; + mov.f32 %f1653, 0f3102E308; + mov.f32 %f1652, 0fBF317218; + mov.f32 %f1651, 0f35BFBE8E; + mov.f32 %f1650, 0f3F317200; + mov.f32 %f1649, 0f3DAAAABD; + mov.f32 %f1648, 0f3C4CAF63; + mov.f32 %f1647, 0f3B18F0FE; + add.f32 %f1646, %f1655, 0f3F000000; + sub.f32 %f1645, %f1646, %f1774; + sub.f32 %f1644, %f1655, %f1774; + add.f32 %f1643, %f1644, 0fBF000000; + mov.f32 %f1642, 0f3FB8AA3B; + mov.f32 %f1641, 0f40000000; + mul.f32 %f244, %f219, 0f3F000000; + setp.eq.f32 %p233, %f378, 0f3F800000; + selp.f64 %fd230, 0d3FF0000000000000, %fd387, %p233; + div.rn.f64 %fd68, %fd34, %fd230; + mul.f32 %f1049, %f1643, %f242; + mul.f32 %f1050, %f1645, %f236; + sub.f32 %f1051, %f1050, %f1049; + cvt.f64.f32 %fd231, %f1051; + mul.f64 %fd232, %fd68, %fd231; + cvt.f64.f32 %fd233, %f230; + mul.f64 %fd234, %fd232, %fd233; + cvt.rn.f32.f64 %f245, %fd234; + add.f32 %f1052, %f1654, 0f3F000000; + sub.f32 %f246, %f1052, %f1773; + div.rn.f32 %f247, %f246, %f378; + abs.f32 %f248, %f247; + setp.lt.f32 %p234, %f248, 0f00800000; + mul.f32 %f1053, %f248, 0f4B800000; + selp.f32 %f1054, %f1053, %f248, %p234; + selp.f32 %f1055, 0fC3170000, 0fC2FE0000, %p234; + mov.b32 %r412, %f1054; + and.b32 %r413, %r412, 8388607; + or.b32 %r414, %r413, 1065353216; + mov.b32 %f1056, %r414; + shr.u32 %r415, %r412, 23; + cvt.rn.f32.u32 %f1057, %r415; + add.f32 %f1058, %f1055, %f1057; + setp.gt.f32 %p235, %f1056, 0f3FB504F3; + mul.f32 %f1059, %f1056, 0f3F000000; + add.f32 %f1060, %f1058, 0f3F800000; + selp.f32 %f1061, %f1060, %f1058, %p235; + selp.f32 %f1062, %f1059, %f1056, %p235; + add.f32 %f1063, %f1062, 0fBF800000; + add.f32 %f1064, %f1062, 0f3F800000; + rcp.approx.ftz.f32 %f1065, %f1064; + add.f32 %f1066, %f1063, %f1063; + mul.f32 %f1068, %f1066, %f1065; + mul.f32 %f1069, %f1068, %f1068; + fma.rn.f32 %f1072, %f1647, %f1069, %f1648; + fma.rn.f32 %f1074, %f1072, %f1069, %f1649; + mul.rn.f32 %f1075, %f1074, %f1069; + mul.rn.f32 %f1076, %f1075, %f1068; + sub.f32 %f1077, %f1063, %f1068; + add.f32 %f1078, %f1077, %f1077; + neg.f32 %f1079, %f1068; + fma.rn.f32 %f1080, %f1079, %f1063, %f1078; + mul.rn.f32 %f1081, %f1065, %f1080; + add.f32 %f1082, %f1076, %f1068; + sub.f32 %f1083, %f1068, %f1082; + add.f32 %f1084, %f1076, %f1083; + add.f32 %f1085, %f1081, %f1084; + add.f32 %f1086, %f1082, %f1085; + sub.f32 %f1087, %f1082, %f1086; + add.f32 %f1088, %f1085, %f1087; + mul.rn.f32 %f1090, %f1061, %f1650; + mul.rn.f32 %f1092, %f1061, %f1651; + add.f32 %f1093, %f1090, %f1086; + sub.f32 %f1094, %f1090, %f1093; + add.f32 %f1095, %f1086, %f1094; + add.f32 %f1096, %f1088, %f1095; + add.f32 %f1097, %f1092, %f1096; + add.f32 %f1098, %f1093, %f1097; + sub.f32 %f1099, %f1093, %f1098; + add.f32 %f1100, %f1097, %f1099; + mul.rn.f32 %f1101, %f1641, %f1098; + neg.f32 %f1102, %f1101; + fma.rn.f32 %f1103, %f1641, %f1098, %f1102; + fma.rn.f32 %f1104, %f1641, %f1100, %f1103; + mov.f32 %f1105, 0f00000000; + fma.rn.f32 %f1106, %f1105, %f1098, %f1104; + add.rn.f32 %f1107, %f1101, %f1106; + neg.f32 %f1108, %f1107; + add.rn.f32 %f1109, %f1101, %f1108; + add.rn.f32 %f1110, %f1109, %f1106; + mov.b32 %r416, %f1107; + setp.eq.s32 %p236, %r416, 1118925336; + add.s32 %r417, %r416, -1; + mov.b32 %f1111, %r417; + add.f32 %f1112, %f1110, 0f37000000; + selp.f32 %f249, %f1112, %f1110, %p236; + selp.f32 %f1113, %f1111, %f1107, %p236; + mul.rn.f32 %f1115, %f1113, %f1642; + cvt.rzi.f32.f32 %f1116, %f1115; + abs.f32 %f1117, %f1116; + setp.gt.f32 %p237, %f1117, 0f42FC0000; + mov.b32 %r418, %f1116; + and.b32 %r419, %r418, -2147483648; + or.b32 %r420, %r419, 1123811328; + mov.b32 %f1118, %r420; + selp.f32 %f1119, %f1118, %f1116, %p237; + fma.rn.f32 %f1121, %f1119, %f1652, %f1113; + fma.rn.f32 %f1123, %f1119, %f1653, %f1121; + mul.f32 %f1124, %f1123, 0f3FB8AA3B; + add.f32 %f1125, %f1119, 0f4B40007F; + mov.b32 %r421, %f1125; + shl.b32 %r422, %r421, 23; + mov.b32 %f1126, %r422; + ex2.approx.ftz.f32 %f1127, %f1124; + mul.f32 %f250, %f1127, %f1126; + setp.eq.f32 %p238, %f250, 0f7F800000; + mov.f32 %f1767, 0f7F800000; + @%p238 bra $L__BB8_172; + + fma.rn.f32 %f1767, %f250, %f249, %f250; + +$L__BB8_172: + setp.lt.f32 %p239, %f247, 0f00000000; + and.pred %p12, %p239, %p147; + setp.eq.f32 %p241, %f247, 0f00000000; + @%p241 bra $L__BB8_176; + bra.uni $L__BB8_173; + +$L__BB8_176: + add.f32 %f1132, %f247, %f247; + selp.f32 %f1769, %f1132, 0f00000000, %p147; + bra.uni $L__BB8_177; + +$L__BB8_173: + mov.b32 %r423, %f1767; + xor.b32 %r424, %r423, -2147483648; + mov.b32 %f1128, %r424; + selp.f32 %f1769, %f1128, %f1767, %p12; + setp.geu.f32 %p242, %f247, 0f00000000; + @%p242 bra $L__BB8_177; + + mov.f32 %f1667, 0f40000000; + cvt.rzi.f32.f32 %f1130, %f1667; + setp.eq.f32 %p243, %f1130, 0f40000000; + @%p243 bra $L__BB8_177; + + mov.f32 %f1769, 0f7FFFFFFF; + +$L__BB8_177: + abs.f32 %f1573, %f247; + add.f32 %f1133, %f1573, 0f40000000; + mov.b32 %r425, %f1133; + setp.lt.s32 %p245, %r425, 2139095040; + @%p245 bra $L__BB8_182; + + abs.f32 %f1665, %f247; + setp.gtu.f32 %p246, %f1665, 0f7F800000; + @%p246 bra $L__BB8_181; + bra.uni $L__BB8_179; + +$L__BB8_181: + add.f32 %f1769, %f247, 0f40000000; + bra.uni $L__BB8_182; + +$L__BB8_179: + abs.f32 %f1666, %f247; + setp.neu.f32 %p247, %f1666, 0f7F800000; + @%p247 bra $L__BB8_182; + + selp.f32 %f1769, 0fFF800000, 0f7F800000, %p12; + +$L__BB8_182: + mov.f32 %f1590, 0f00000000; + cvt.rn.f32.s32 %f1589, %r640; + sub.f32 %f1588, %f1589, %f1773; + add.f32 %f1587, %f1588, 0fBF000000; + mov.f32 %f1586, 0f3102E308; + mov.f32 %f1585, 0fBF317218; + mov.f32 %f1584, 0f35BFBE8E; + mov.f32 %f1583, 0f3F317200; + mov.f32 %f1582, 0f3DAAAABD; + mov.f32 %f1581, 0f3C4CAF63; + mov.f32 %f1580, 0f3B18F0FE; + mov.f32 %f1579, 0f32A57060; + mov.f32 %f1578, 0f4B400001; + mov.f32 %f1577, 0f437C0000; + mov.f32 %f1576, 0f3BBB989D; + mov.f32 %f1575, 0f3FB8AA3B; + mov.f32 %f1574, 0f40000000; + mul.f32 %f1135, %f1769, 0fBF000000; + setp.eq.f32 %p248, %f247, 0f3F800000; + selp.f32 %f1136, 0fBF000000, %f1135, %p248; + fma.rn.f32 %f1139, %f1136, %f1576, %f413; + cvt.sat.f32.f32 %f1142, %f1139; + fma.rm.f32 %f1144, %f1142, %f1577, %f1578; + add.f32 %f1145, %f1144, 0fCB40007F; + neg.f32 %f1146, %f1145; + fma.rn.f32 %f1147, %f1136, %f1575, %f1146; + fma.rn.f32 %f1149, %f1136, %f1579, %f1147; + mov.b32 %r426, %f1144; + shl.b32 %r427, %r426, 23; + mov.b32 %f1150, %r427; + ex2.approx.ftz.f32 %f1151, %f1149; + mul.f32 %f259, %f1151, %f1150; + div.rn.f32 %f260, %f1587, %f378; + abs.f32 %f261, %f260; + setp.lt.f32 %p249, %f261, 0f00800000; + mul.f32 %f1152, %f261, 0f4B800000; + selp.f32 %f1153, %f1152, %f261, %p249; + selp.f32 %f1154, 0fC3170000, 0fC2FE0000, %p249; + mov.b32 %r428, %f1153; + and.b32 %r429, %r428, 8388607; + or.b32 %r430, %r429, 1065353216; + mov.b32 %f1155, %r430; + shr.u32 %r431, %r428, 23; + cvt.rn.f32.u32 %f1156, %r431; + add.f32 %f1157, %f1154, %f1156; + setp.gt.f32 %p250, %f1155, 0f3FB504F3; + mul.f32 %f1158, %f1155, 0f3F000000; + add.f32 %f1159, %f1157, 0f3F800000; + selp.f32 %f1160, %f1159, %f1157, %p250; + selp.f32 %f1161, %f1158, %f1155, %p250; + add.f32 %f1162, %f1161, 0fBF800000; + add.f32 %f1163, %f1161, 0f3F800000; + rcp.approx.ftz.f32 %f1164, %f1163; + add.f32 %f1165, %f1162, %f1162; + mul.f32 %f1167, %f1165, %f1164; + mul.f32 %f1168, %f1167, %f1167; + fma.rn.f32 %f1171, %f1580, %f1168, %f1581; + fma.rn.f32 %f1173, %f1171, %f1168, %f1582; + mul.rn.f32 %f1174, %f1173, %f1168; + mul.rn.f32 %f1175, %f1174, %f1167; + sub.f32 %f1176, %f1162, %f1167; + add.f32 %f1177, %f1176, %f1176; + neg.f32 %f1178, %f1167; + fma.rn.f32 %f1179, %f1178, %f1162, %f1177; + mul.rn.f32 %f1180, %f1164, %f1179; + add.f32 %f1181, %f1175, %f1167; + sub.f32 %f1182, %f1167, %f1181; + add.f32 %f1183, %f1175, %f1182; + add.f32 %f1184, %f1180, %f1183; + add.f32 %f1185, %f1181, %f1184; + sub.f32 %f1186, %f1181, %f1185; + add.f32 %f1187, %f1184, %f1186; + mul.rn.f32 %f1189, %f1160, %f1583; + mul.rn.f32 %f1191, %f1160, %f1584; + add.f32 %f1192, %f1189, %f1185; + sub.f32 %f1193, %f1189, %f1192; + add.f32 %f1194, %f1185, %f1193; + add.f32 %f1195, %f1187, %f1194; + add.f32 %f1196, %f1191, %f1195; + add.f32 %f1197, %f1192, %f1196; + sub.f32 %f1198, %f1192, %f1197; + add.f32 %f1199, %f1196, %f1198; + mul.rn.f32 %f1200, %f1574, %f1197; + neg.f32 %f1201, %f1200; + fma.rn.f32 %f1202, %f1574, %f1197, %f1201; + fma.rn.f32 %f1203, %f1574, %f1199, %f1202; + fma.rn.f32 %f1205, %f1590, %f1197, %f1203; + add.rn.f32 %f1206, %f1200, %f1205; + neg.f32 %f1207, %f1206; + add.rn.f32 %f1208, %f1200, %f1207; + add.rn.f32 %f1209, %f1208, %f1205; + mov.b32 %r432, %f1206; + setp.eq.s32 %p251, %r432, 1118925336; + add.s32 %r433, %r432, -1; + mov.b32 %f1210, %r433; + add.f32 %f1211, %f1209, 0f37000000; + selp.f32 %f262, %f1211, %f1209, %p251; + selp.f32 %f1212, %f1210, %f1206, %p251; + mul.rn.f32 %f1213, %f1212, %f1575; + cvt.rzi.f32.f32 %f1214, %f1213; + abs.f32 %f1215, %f1214; + setp.gt.f32 %p252, %f1215, 0f42FC0000; + mov.b32 %r434, %f1214; + and.b32 %r435, %r434, -2147483648; + or.b32 %r436, %r435, 1123811328; + mov.b32 %f1216, %r436; + selp.f32 %f1217, %f1216, %f1214, %p252; + fma.rn.f32 %f1219, %f1217, %f1585, %f1212; + fma.rn.f32 %f1221, %f1217, %f1586, %f1219; + mul.f32 %f1222, %f1221, 0f3FB8AA3B; + add.f32 %f1223, %f1217, 0f4B40007F; + mov.b32 %r437, %f1223; + shl.b32 %r438, %r437, 23; + mov.b32 %f1224, %r438; + ex2.approx.ftz.f32 %f1225, %f1222; + mul.f32 %f263, %f1225, %f1224; + setp.eq.f32 %p253, %f263, 0f7F800000; + mov.f32 %f1770, 0f7F800000; + @%p253 bra $L__BB8_184; + + fma.rn.f32 %f1770, %f263, %f262, %f263; + +$L__BB8_184: + setp.lt.f32 %p254, %f260, 0f00000000; + and.pred %p13, %p254, %p147; + setp.eq.f32 %p256, %f260, 0f00000000; + @%p256 bra $L__BB8_188; + bra.uni $L__BB8_185; + +$L__BB8_188: + add.f32 %f1230, %f260, %f260; + selp.f32 %f1772, %f1230, 0f00000000, %p147; + bra.uni $L__BB8_189; + +$L__BB8_185: + mov.b32 %r439, %f1770; + xor.b32 %r440, %r439, -2147483648; + mov.b32 %f1226, %r440; + selp.f32 %f1772, %f1226, %f1770, %p13; + setp.geu.f32 %p257, %f260, 0f00000000; + @%p257 bra $L__BB8_189; + + mov.f32 %f1664, 0f40000000; + cvt.rzi.f32.f32 %f1228, %f1664; + setp.eq.f32 %p258, %f1228, 0f40000000; + @%p258 bra $L__BB8_189; + + mov.f32 %f1772, 0f7FFFFFFF; + +$L__BB8_189: + abs.f32 %f1668, %f260; + add.f32 %f1231, %f1668, 0f40000000; + mov.b32 %r441, %f1231; + setp.lt.s32 %p260, %r441, 2139095040; + @%p260 bra $L__BB8_194; + + abs.f32 %f1669, %f260; + setp.gtu.f32 %p261, %f1669, 0f7F800000; + @%p261 bra $L__BB8_193; + bra.uni $L__BB8_191; + +$L__BB8_193: + add.f32 %f1772, %f260, 0f40000000; + bra.uni $L__BB8_194; + +$L__BB8_191: + abs.f32 %f1670, %f260; + setp.neu.f32 %p262, %f1670, 0f7F800000; + @%p262 bra $L__BB8_194; + + selp.f32 %f1772, 0fFF800000, 0f7F800000, %p13; + +$L__BB8_194: + cvt.rn.f32.s32 %f1600, %r640; + add.f32 %f1599, %f1600, 0f3F000000; + sub.f32 %f1598, %f1599, %f1773; + sub.f32 %f1597, %f1600, %f1773; + add.f32 %f1596, %f1597, 0fBF000000; + mov.f32 %f1595, 0f32A57060; + mov.f32 %f1594, 0f4B400001; + mov.f32 %f1593, 0f437C0000; + mov.f32 %f1592, 0f3BBB989D; + mov.f32 %f1591, 0f3FB8AA3B; + { + .reg .b32 %temp; + mov.b64 {%temp, %r595}, %fd207; + } + and.b32 %r594, %r595, 2146435072; + mul.f32 %f1232, %f1772, 0fBF000000; + setp.eq.f32 %p263, %f260, 0f3F800000; + selp.f32 %f1233, 0fBF000000, %f1232, %p263; + fma.rn.f32 %f1236, %f1233, %f1592, %f413; + cvt.sat.f32.f32 %f1239, %f1236; + fma.rm.f32 %f1241, %f1239, %f1593, %f1594; + add.f32 %f1242, %f1241, 0fCB40007F; + neg.f32 %f1243, %f1242; + fma.rn.f32 %f1244, %f1233, %f1591, %f1243; + fma.rn.f32 %f1246, %f1233, %f1595, %f1244; + mov.b32 %r442, %f1241; + shl.b32 %r443, %r442, 23; + mov.b32 %f1247, %r443; + ex2.approx.ftz.f32 %f1248, %f1246; + mul.f32 %f1249, %f1248, %f1247; + sub.f32 %f1250, %f259, %f1249; + mul.f32 %f1251, %f49, %f1250; + mul.f32 %f272, %f244, %f1251; + mul.f32 %f1252, %f1596, %f1249; + mul.f32 %f1253, %f1598, %f259; + sub.f32 %f1254, %f1253, %f1252; + cvt.f64.f32 %fd235, %f1254; + mul.f64 %fd236, %fd68, %fd235; + cvt.f64.f32 %fd237, %f244; + mul.f64 %fd69, %fd236, %fd237; + cvt.f64.f32 %fd70, %f243; + { + .reg .b32 %temp; + mov.b64 {%temp, %r91}, %fd70; + } + abs.f64 %fd71, %fd70; + { // callseq 179, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd71; + .param .b64 param1; + st.param.f64 [param1+0], %fd207; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd389, [retval0+0]; + } // callseq 179 + setp.lt.s32 %p264, %r91, 0; + setp.eq.s32 %p265, %r594, 1062207488; + and.pred %p14, %p264, %p265; + not.pred %p266, %p14; + @%p266 bra $L__BB8_196; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r444}, %fd389; + } + xor.b32 %r445, %r444, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r446, %temp}, %fd389; + } + mov.b64 %fd389, {%r446, %r445}; + +$L__BB8_196: + setp.eq.f32 %p267, %f243, 0f00000000; + @%p267 bra $L__BB8_200; + bra.uni $L__BB8_197; + +$L__BB8_200: + { + .reg .b32 %temp; + mov.b64 {%temp, %r615}, %fd207; + } + setp.lt.s32 %p270, %r615, 0; + mov.u32 %r447, 0; + selp.b32 %r448, %r91, 0, %p265; + or.b32 %r449, %r448, 2146435072; + selp.b32 %r450, %r449, %r448, %p270; + mov.b64 %fd389, {%r447, %r450}; + bra.uni $L__BB8_201; + +$L__BB8_197: + setp.gt.s32 %p268, %r91, -1; + @%p268 bra $L__BB8_201; + + cvt.rzi.f64.f64 %fd240, %fd207; + setp.eq.f64 %p269, %fd240, 0d4000000000000000; + @%p269 bra $L__BB8_201; + + mov.f64 %fd389, 0dFFF8000000000000; + +$L__BB8_201: + add.f64 %fd77, %fd70, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r451}, %fd77; + } + and.b32 %r452, %r451, 2146435072; + setp.ne.s32 %p272, %r452, 2146435072; + mov.f64 %fd390, %fd389; + @%p272 bra $L__BB8_207; + + setp.gtu.f64 %p273, %fd71, 0d7FF0000000000000; + mov.f64 %fd390, %fd77; + @%p273 bra $L__BB8_207; + + setp.eq.s32 %p274, %r84, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r453, %temp}, %fd207; + } + setp.eq.s32 %p275, %r453, 0; + and.pred %p276, %p274, %p275; + @%p276 bra $L__BB8_206; + bra.uni $L__BB8_204; + +$L__BB8_206: + { + .reg .b32 %temp; + mov.b64 {%temp, %r614}, %fd207; + } + setp.lt.s32 %p282, %r614, 0; + mov.u32 %r458, 0; + setp.gt.f64 %p283, %fd71, 0d3FF0000000000000; + selp.b32 %r459, 2146435072, 0, %p283; + xor.b32 %r460, %r459, 2146435072; + selp.b32 %r461, %r460, %r459, %p282; + setp.eq.f32 %p284, %f243, 0fBF800000; + selp.b32 %r462, 1072693248, %r461, %p284; + mov.b64 %fd390, {%r458, %r462}; + bra.uni $L__BB8_207; + +$L__BB8_204: + { + .reg .b32 %temp; + mov.b64 {%r454, %temp}, %fd70; + } + and.b32 %r455, %r91, 2147483647; + setp.ne.s32 %p277, %r455, 2146435072; + setp.ne.s32 %p278, %r454, 0; + or.pred %p279, %p277, %p278; + mov.f64 %fd390, %fd389; + @%p279 bra $L__BB8_207; + + setp.ne.s32 %p280, %r84, 1071644672; + and.pred %p281, %p280, %p14; + selp.b32 %r456, %r86, %r85, %p281; + mov.u32 %r457, 0; + mov.b64 %fd390, {%r457, %r456}; + +$L__BB8_207: + mov.f32 %f1601, 0f47C35000; + setp.eq.f32 %p285, %f243, 0f3F800000; + selp.f64 %fd243, 0d3FF0000000000000, %fd390, %p285; + min.f32 %f1256, %f1758, %f1601; + cvt.f64.f32 %fd81, %f1256; + mul.f64 %fd244, %fd243, %fd81; + mul.f32 %f1257, %f214, %f245; + cvt.f64.f32 %fd245, %f1257; + sub.f64 %fd246, %fd245, %fd244; + cvt.f64.f32 %fd247, %f1749; + add.f64 %fd248, %fd246, %fd247; + cvt.rn.f32.f64 %f1749, %fd248; + cvt.f64.f32 %fd82, %f272; + { + .reg .b32 %temp; + mov.b64 {%temp, %r92}, %fd82; + } + abs.f64 %fd83, %fd82; + { // callseq 180, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd83; + .param .b64 param1; + st.param.f64 [param1+0], %fd207; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd392, [retval0+0]; + } // callseq 180 + setp.lt.s32 %p286, %r92, 0; + and.pred %p15, %p286, %p265; + not.pred %p288, %p15; + @%p288 bra $L__BB8_209; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r463}, %fd392; + } + xor.b32 %r464, %r463, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r465, %temp}, %fd392; + } + mov.b64 %fd392, {%r465, %r464}; + +$L__BB8_209: + setp.eq.f32 %p289, %f272, 0f00000000; + @%p289 bra $L__BB8_213; + bra.uni $L__BB8_210; + +$L__BB8_213: + { + .reg .b32 %temp; + mov.b64 {%temp, %r613}, %fd207; + } + setp.lt.s32 %p292, %r613, 0; + mov.u32 %r466, 0; + selp.b32 %r467, %r92, 0, %p265; + or.b32 %r468, %r467, 2146435072; + selp.b32 %r469, %r468, %r467, %p292; + mov.b64 %fd392, {%r466, %r469}; + bra.uni $L__BB8_214; + +$L__BB8_210: + setp.gt.s32 %p290, %r92, -1; + @%p290 bra $L__BB8_214; + + cvt.rzi.f64.f64 %fd251, %fd207; + setp.eq.f64 %p291, %fd251, 0d4000000000000000; + @%p291 bra $L__BB8_214; + + mov.f64 %fd392, 0dFFF8000000000000; + +$L__BB8_214: + add.f64 %fd89, %fd82, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r470}, %fd89; + } + and.b32 %r471, %r470, 2146435072; + setp.ne.s32 %p294, %r471, 2146435072; + mov.f64 %fd393, %fd392; + @%p294 bra $L__BB8_220; + + setp.gtu.f64 %p295, %fd83, 0d7FF0000000000000; + mov.f64 %fd393, %fd89; + @%p295 bra $L__BB8_220; + + setp.eq.s32 %p296, %r84, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r472, %temp}, %fd207; + } + setp.eq.s32 %p297, %r472, 0; + and.pred %p298, %p296, %p297; + @%p298 bra $L__BB8_219; + bra.uni $L__BB8_217; + +$L__BB8_219: + { + .reg .b32 %temp; + mov.b64 {%temp, %r612}, %fd207; + } + setp.lt.s32 %p304, %r612, 0; + mov.u32 %r477, 0; + setp.gt.f64 %p305, %fd83, 0d3FF0000000000000; + selp.b32 %r478, 2146435072, 0, %p305; + xor.b32 %r479, %r478, 2146435072; + selp.b32 %r480, %r479, %r478, %p304; + setp.eq.f32 %p306, %f272, 0fBF800000; + selp.b32 %r481, 1072693248, %r480, %p306; + mov.b64 %fd393, {%r477, %r481}; + bra.uni $L__BB8_220; + +$L__BB8_217: + { + .reg .b32 %temp; + mov.b64 {%r473, %temp}, %fd82; + } + and.b32 %r474, %r92, 2147483647; + setp.ne.s32 %p299, %r474, 2146435072; + setp.ne.s32 %p300, %r473, 0; + or.pred %p301, %p299, %p300; + mov.f64 %fd393, %fd392; + @%p301 bra $L__BB8_220; + + setp.ne.s32 %p302, %r84, 1071644672; + and.pred %p303, %p302, %p15; + selp.b32 %r475, %r86, %r85, %p303; + mov.u32 %r476, 0; + mov.b64 %fd393, {%r476, %r475}; + +$L__BB8_220: + setp.eq.f32 %p307, %f272, 0f3F800000; + selp.f64 %fd254, 0d3FF0000000000000, %fd393, %p307; + mul.f64 %fd255, %fd254, %fd81; + cvt.rn.f32.f64 %f1258, %fd69; + mul.f32 %f1259, %f214, %f1258; + cvt.f64.f32 %fd256, %f1259; + sub.f64 %fd257, %fd256, %fd255; + cvt.f64.f32 %fd258, %f1748; + add.f64 %fd259, %fd257, %fd258; + cvt.rn.f32.f64 %f1748, %fd259; + fma.rn.f32 %f1750, %f214, %f243, %f1750; + fma.rn.f32 %f1751, %f214, %f272, %f1751; + add.s32 %r640, %r640, 1; + setp.lt.s32 %p308, %r640, %r151; + @%p308 bra $L__BB8_113; + + add.s32 %r639, %r639, 1; + setp.lt.s32 %p309, %r639, %r151; + @%p309 bra $L__BB8_112; + +$L__BB8_222: + mov.f32 %f1604, 0fBF800000; + mov.f32 %f1603, 0f3F800000; + cvt.rn.f32.s32 %f1602, %r628; + div.rn.f32 %f1260, %f1750, %f1749; + max.f32 %f1262, %f1260, %f1604; + min.f32 %f1264, %f1262, %f1603; + div.rn.f32 %f1265, %f1264, %f1602; + mul.f32 %f1266, %f1265, 0f3F000000; + sub.f32 %f1774, %f1774, %f1266; + div.rn.f32 %f1267, %f1751, %f1748; + max.f32 %f1268, %f1267, %f1604; + min.f32 %f1269, %f1268, %f1603; + div.rn.f32 %f1270, %f1269, %f1602; + mul.f32 %f1271, %f1270, 0f3F000000; + sub.f32 %f1773, %f1773, %f1271; + shl.b64 %rd90, %rd14, 2; add.s64 %rd91, %rd2, %rd90; - ld.local.f32 %f1977, [%rd91]; - add.s64 %rd92, %rd4, %rd90; - st.local.f32 [%rd92], %f1977; - add.s64 %rd93, %rd3, %rd90; - ld.local.f32 %f1978, [%rd93]; - add.s64 %rd94, %rd5, %rd90; - st.local.f32 [%rd94], %f1978; - add.s32 %r420, %r418, 1; - -BB8_202: - setp.lt.u32 %p229, %r29, 4; - @%p229 bra BB8_204; - -BB8_203: - mul.wide.s32 %rd95, %r420, 4; - add.s64 %rd96, %rd2, %rd95; - ld.local.f32 %f1979, [%rd96]; - add.s64 %rd97, %rd4, %rd95; - ld.local.f32 %f1980, [%rd96+4]; - ld.local.f32 %f1981, [%rd96+8]; - ld.local.f32 %f1982, [%rd96+12]; - st.local.f32 [%rd97], %f1979; - add.s64 %rd98, %rd3, %rd95; - st.local.f32 [%rd97+4], %f1980; - st.local.f32 [%rd97+8], %f1981; - st.local.f32 [%rd97+12], %f1982; - ld.local.f32 %f1983, [%rd98]; - add.s64 %rd99, %rd5, %rd95; - ld.local.f32 %f1984, [%rd98+4]; - ld.local.f32 %f1985, [%rd98+8]; - ld.local.f32 %f1986, [%rd98+12]; - st.local.f32 [%rd99], %f1983; - st.local.f32 [%rd99+4], %f1984; - st.local.f32 [%rd99+8], %f1985; - st.local.f32 [%rd99+12], %f1986; - add.s32 %r420, %r420, 4; - setp.lt.s32 %p230, %r420, %r29; - @%p230 bra BB8_203; - -BB8_204: - mov.u32 %r421, %r29; - mov.f32 %f2290, %f361; - mov.f32 %f2291, %f453; - -BB8_205: - add.s32 %r400, %r29, 1; - setp.lt.s32 %p231, %r29, %r104; - @%p231 bra BB8_36; - -BB8_206: - ld.local.f32 %f462, [%rd4]; - setp.eq.f32 %p232, %f462, 0f00000000; - @%p232 bra BB8_217; - - ld.param.f32 %f2046, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_6]; - cvt.f64.f32 %fd1, %f2291; - cvt.f64.f32 %fd2, %f2046; - mul.f64 %fd3, %fd2, 0d3F847AE147AE147B; - setp.leu.f64 %p233, %fd1, %fd3; - setp.lt.s32 %p234, %r421, 1; - or.pred %p235, %p233, %p234; - @%p235 bra BB8_217; - - and.b32 %r78, %r421, 3; - setp.eq.s32 %p236, %r78, 0; - mov.u32 %r426, 0; - @%p236 bra BB8_214; - - setp.eq.s32 %p237, %r78, 1; - mov.u32 %r424, 0; - mov.f32 %f2295, %f462; - @%p237 bra BB8_213; - - setp.eq.s32 %p238, %r78, 2; - mov.u32 %r423, 0; - mov.f32 %f2294, %f462; - @%p238 bra BB8_212; - - st.local.f32 [%rd2], %f462; - ld.local.f32 %f1987, [%rd5]; - st.local.f32 [%rd3], %f1987; - ld.local.f32 %f2294, [%rd4+4]; - mov.u32 %r423, 1; - -BB8_212: - mul.wide.u32 %rd100, %r423, 4; - add.s64 %rd101, %rd2, %rd100; - st.local.f32 [%rd101], %f2294; - add.s64 %rd102, %rd5, %rd100; - ld.local.f32 %f1988, [%rd102]; - add.s64 %rd103, %rd3, %rd100; - st.local.f32 [%rd103], %f1988; - add.s32 %r424, %r423, 1; - mul.wide.u32 %rd104, %r424, 4; - add.s64 %rd105, %rd4, %rd104; - ld.local.f32 %f2295, [%rd105]; - -BB8_213: - mul.wide.s32 %rd106, %r424, 4; - add.s64 %rd107, %rd2, %rd106; - st.local.f32 [%rd107], %f2295; - add.s64 %rd108, %rd5, %rd106; - ld.local.f32 %f1989, [%rd108]; - add.s64 %rd109, %rd3, %rd106; - st.local.f32 [%rd109], %f1989; - add.s32 %r426, %r424, 1; - -BB8_214: - setp.lt.u32 %p239, %r421, 4; - @%p239 bra BB8_217; - - mul.wide.s32 %rd162, %r426, 4; - -BB8_216: - add.s64 %rd110, %rd4, %rd162; - ld.local.f32 %f1990, [%rd110]; - add.s64 %rd111, %rd2, %rd162; - ld.local.f32 %f1991, [%rd110+4]; - ld.local.f32 %f1992, [%rd110+8]; - ld.local.f32 %f1993, [%rd110+12]; - st.local.f32 [%rd111], %f1990; - add.s64 %rd112, %rd5, %rd162; - st.local.f32 [%rd111+4], %f1991; - st.local.f32 [%rd111+8], %f1992; - st.local.f32 [%rd111+12], %f1993; - ld.local.f32 %f1994, [%rd112]; - add.s64 %rd113, %rd3, %rd162; - ld.local.f32 %f1995, [%rd112+4]; - ld.local.f32 %f1996, [%rd112+8]; - ld.local.f32 %f1997, [%rd112+12]; - st.local.f32 [%rd113], %f1994; - st.local.f32 [%rd113+4], %f1995; - st.local.f32 [%rd113+8], %f1996; - st.local.f32 [%rd113+12], %f1997; - add.s64 %rd162, %rd162, 16; - add.s32 %r426, %r426, 4; - setp.lt.s32 %p240, %r426, %r421; - @%p240 bra BB8_216; - -BB8_217: - setp.lt.s32 %p254, %r104, 1; - ld.param.f32 %f2045, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_6]; - setp.gt.f32 %p241, %f2291, %f2045; - selp.f32 %f467, 0f3F800000, 0f00000000, %p241; - @%p254 bra BB8_237; - - mul.lo.s32 %r86, %r109, %r104; - and.b32 %r346, %r104, 3; - mov.u32 %r427, 0; - setp.eq.s32 %p243, %r346, 0; - @%p243 bra BB8_224; - - setp.eq.s32 %p244, %r346, 1; - @%p244 bra BB8_223; - - setp.eq.s32 %p245, %r346, 2; - @%p245 bra BB8_222; - - ld.param.u64 %rd151, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_7]; - mul.f32 %f1998, %f467, %f462; - cvta.to.global.u64 %rd114, %rd151; - mul.wide.s32 %rd115, %r86, 4; - add.s64 %rd116, %rd114, %rd115; - st.global.f32 [%rd116], %f1998; - ld.local.f32 %f462, [%rd4+4]; - mov.u32 %r427, 1; - -BB8_222: - ld.param.u64 %rd152, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_7]; - add.s32 %r352, %r86, %r427; - cvta.to.global.u64 %rd117, %rd152; - mul.wide.s32 %rd118, %r352, 4; - add.s64 %rd119, %rd117, %rd118; - mul.f32 %f1999, %f467, %f462; - st.global.f32 [%rd119], %f1999; - add.s32 %r427, %r427, 1; - mul.wide.u32 %rd120, %r427, 4; - add.s64 %rd121, %rd4, %rd120; - ld.local.f32 %f462, [%rd121]; - -BB8_223: - ld.param.u64 %rd153, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_7]; - add.s32 %r353, %r86, %r427; - cvta.to.global.u64 %rd122, %rd153; - mul.wide.s32 %rd123, %r353, 4; - add.s64 %rd124, %rd122, %rd123; - mul.f32 %f2000, %f467, %f462; - st.global.f32 [%rd124], %f2000; - add.s32 %r427, %r427, 1; - -BB8_224: - setp.lt.u32 %p246, %r104, 4; - @%p246 bra BB8_227; - - ld.param.u64 %rd154, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_7]; - mad.lo.s32 %r358, %r104, %r109, %r427; - cvta.to.global.u64 %rd125, %rd154; - mul.wide.s32 %rd126, %r358, 4; - add.s64 %rd164, %rd125, %rd126; - mul.wide.s32 %rd127, %r427, 4; - add.s64 %rd163, %rd4, %rd127; - -BB8_226: - ld.local.f32 %f2001, [%rd163]; - mul.f32 %f2002, %f467, %f2001; - ld.local.f32 %f2003, [%rd163+4]; - ld.local.f32 %f2004, [%rd163+8]; - ld.local.f32 %f2005, [%rd163+12]; - st.global.f32 [%rd164], %f2002; - mul.f32 %f2006, %f467, %f2003; - st.global.f32 [%rd164+4], %f2006; - mul.f32 %f2007, %f467, %f2004; - st.global.f32 [%rd164+8], %f2007; - mul.f32 %f2008, %f467, %f2005; - st.global.f32 [%rd164+12], %f2008; - add.s64 %rd164, %rd164, 16; - add.s64 %rd163, %rd163, 16; - add.s32 %r427, %r427, 4; - setp.lt.s32 %p247, %r427, %r104; - @%p247 bra BB8_226; - -BB8_227: - setp.lt.s32 %p255, %r104, 1; - @%p255 bra BB8_237; - - mov.u32 %r431, 0; - @%p243 bra BB8_234; - - setp.eq.s32 %p250, %r346, 1; - @%p250 bra BB8_233; - - setp.eq.s32 %p251, %r346, 2; - @%p251 bra BB8_232; - - ld.param.u64 %rd155, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_8]; - ld.local.f32 %f2009, [%rd5]; - mul.f32 %f2010, %f467, %f2009; - cvta.to.global.u64 %rd128, %rd155; - mul.wide.s32 %rd129, %r86, 4; - add.s64 %rd130, %rd128, %rd129; - st.global.f32 [%rd130], %f2010; - mov.u32 %r431, 1; - -BB8_232: - ld.param.u64 %rd156, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_8]; - mul.wide.u32 %rd131, %r431, 4; - add.s64 %rd132, %rd5, %rd131; - ld.local.f32 %f2011, [%rd132]; - mul.f32 %f2012, %f467, %f2011; - add.s32 %r368, %r86, %r431; - cvta.to.global.u64 %rd133, %rd156; - mul.wide.s32 %rd134, %r368, 4; - add.s64 %rd135, %rd133, %rd134; - st.global.f32 [%rd135], %f2012; - add.s32 %r431, %r431, 1; - -BB8_233: - ld.param.u64 %rd157, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_8]; - mul.wide.s32 %rd136, %r431, 4; - add.s64 %rd137, %rd5, %rd136; - ld.local.f32 %f2013, [%rd137]; - mul.f32 %f2014, %f467, %f2013; - add.s32 %r369, %r86, %r431; - cvta.to.global.u64 %rd138, %rd157; - mul.wide.s32 %rd139, %r369, 4; - add.s64 %rd140, %rd138, %rd139; - st.global.f32 [%rd140], %f2014; - add.s32 %r431, %r431, 1; - -BB8_234: - @%p246 bra BB8_237; - - ld.param.u64 %rd158, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_8]; - mad.lo.s32 %r374, %r104, %r109, %r431; - cvta.to.global.u64 %rd141, %rd158; - mul.wide.s32 %rd142, %r374, 4; - add.s64 %rd166, %rd141, %rd142; - mul.wide.s32 %rd143, %r431, 4; - add.s64 %rd165, %rd5, %rd143; - -BB8_236: - ld.local.f32 %f2015, [%rd165]; - mul.f32 %f2016, %f467, %f2015; - ld.local.f32 %f2017, [%rd165+4]; - ld.local.f32 %f2018, [%rd165+8]; - ld.local.f32 %f2019, [%rd165+12]; - st.global.f32 [%rd166], %f2016; - mul.f32 %f2020, %f467, %f2017; - st.global.f32 [%rd166+4], %f2020; - mul.f32 %f2021, %f467, %f2018; - st.global.f32 [%rd166+8], %f2021; - mul.f32 %f2022, %f467, %f2019; - st.global.f32 [%rd166+12], %f2022; - add.s64 %rd166, %rd166, 16; - add.s64 %rd165, %rd165, 16; - add.s32 %r431, %r431, 4; - setp.lt.s32 %p253, %r431, %r104; - @%p253 bra BB8_236; - -BB8_237: - ld.param.u64 %rd150, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_10]; - ld.param.u64 %rd149, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_9]; - mul.f32 %f2023, %f2290, %f467; - cvta.to.global.u64 %rd144, %rd149; - mul.wide.s32 %rd145, %r109, 4; - add.s64 %rd146, %rd144, %rd145; - st.global.f32 [%rd146], %f2023; - cvta.to.global.u64 %rd147, %rd150; - add.s64 %rd148, %rd147, %rd145; - st.global.f32 [%rd148], %f2291; - -BB8_238: + st.local.f32 [%rd91], %f1774; + add.s64 %rd92, %rd3, %rd90; + st.local.f32 [%rd92], %f1773; + cvt.u32.u64 %r482, %rd14; + add.s32 %r638, %r482, 1; + setp.lt.u32 %p310, %r638, %r628; + @%p310 bra $L__BB8_110; + + ld.param.u32 %r596, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_3]; + add.s32 %r634, %r634, 1; + setp.lt.s32 %p311, %r634, %r596; + @%p311 bra $L__BB8_77; + +$L__BB8_224: + mov.f32 %f1787, 0f00000000; + mov.f32 %f1790, 0fC47A0000; + mov.f32 %f1791, 0f447A0000; + mov.f32 %f1789, %f1791; + mov.f32 %f1788, %f1790; + @%p18 bra $L__BB8_250; + + mov.u32 %r483, 0; + mov.u32 %r642, %r483; + +$L__BB8_226: + cvt.rn.f32.s32 %f293, %r642; + sqrt.rn.f32 %f294, %f34; + mov.u32 %r643, %r483; + +$L__BB8_227: + cvt.rn.f32.s32 %f300, %r643; + mov.f32 %f1792, %f1781; + mov.u32 %r644, %r483; + +$L__BB8_228: + mul.wide.s32 %rd93, %r644, 4; + add.s64 %rd94, %rd2, %rd93; + add.s64 %rd95, %rd3, %rd93; + ld.local.f32 %f1774, [%rd94]; + setp.gt.f32 %p313, %f1774, %f1790; + selp.f32 %f1790, %f1774, %f1790, %p313; + setp.lt.f32 %p314, %f1774, %f1791; + selp.f32 %f1791, %f1774, %f1791, %p314; + ld.local.f32 %f1773, [%rd95]; + setp.gt.f32 %p315, %f1773, %f1788; + selp.f32 %f1788, %f1773, %f1788, %p315; + setp.lt.f32 %p316, %f1773, %f1789; + selp.f32 %f1789, %f1773, %f1789, %p316; + sub.f32 %f312, %f293, %f1774; + add.f32 %f1282, %f312, 0f3F000000; + mul.f32 %f313, %f1282, %f294; + abs.f32 %f1283, %f313; + setp.ltu.f32 %p317, %f1283, 0f3F8060FE; + setp.ge.f32 %p318, %f1283, 0f3F8060FE; + mul.f32 %f1284, %f313, %f313; + selp.f32 %f1285, %f1283, %f1284, %p318; + selp.f32 %f1286, 0f3789CA3C, 0f38B1E96A, %p318; + selp.f32 %f1287, 0fB9F560B9, 0fBA574D20, %p318; + fma.rn.f32 %f1288, %f1286, %f1285, %f1287; + selp.f32 %f1289, 0f3BAC840B, 0f3BAAD5EA, %p318; + fma.rn.f32 %f1290, %f1288, %f1285, %f1289; + selp.f32 %f1291, 0fBD0C8162, 0fBCDC1BE7, %p318; + fma.rn.f32 %f1292, %f1290, %f1285, %f1291; + selp.f32 %f1293, 0f3E1CF906, 0f3DE718AF, %p318; + fma.rn.f32 %f1294, %f1292, %f1285, %f1293; + selp.f32 %f1295, 0f3F6A937E, 0fBEC093AC, %p318; + fma.rn.f32 %f1296, %f1294, %f1285, %f1295; + selp.f32 %f1297, 0f3F20D842, 0f3E0375D3, %p318; + fma.rn.f32 %f1298, %f1296, %f1285, %f1297; + neg.f32 %f1299, %f1283; + selp.f32 %f1300, %f1299, %f313, %p318; + fma.rn.f32 %f1797, %f1298, %f1300, %f1300; + @%p317 bra $L__BB8_230; + + ex2.approx.ftz.f32 %f1301, %f1797; + mov.f32 %f1302, 0f3F800000; + sub.f32 %f1303, %f1302, %f1301; + mov.b32 %r486, %f1303; + mov.b32 %r487, %f313; + and.b32 %r488, %r487, -2147483648; + or.b32 %r489, %r488, %r486; + mov.b32 %f1797, %r489; + +$L__BB8_230: + add.f32 %f1304, %f312, 0fBF000000; + mul.f32 %f317, %f1304, %f294; + abs.f32 %f1305, %f317; + setp.ltu.f32 %p319, %f1305, 0f3F8060FE; + setp.ge.f32 %p320, %f1305, 0f3F8060FE; + mul.f32 %f1306, %f317, %f317; + selp.f32 %f1307, %f1305, %f1306, %p320; + selp.f32 %f1308, 0f3789CA3C, 0f38B1E96A, %p320; + selp.f32 %f1309, 0fB9F560B9, 0fBA574D20, %p320; + fma.rn.f32 %f1310, %f1308, %f1307, %f1309; + selp.f32 %f1311, 0f3BAC840B, 0f3BAAD5EA, %p320; + fma.rn.f32 %f1312, %f1310, %f1307, %f1311; + selp.f32 %f1313, 0fBD0C8162, 0fBCDC1BE7, %p320; + fma.rn.f32 %f1314, %f1312, %f1307, %f1313; + selp.f32 %f1315, 0f3E1CF906, 0f3DE718AF, %p320; + fma.rn.f32 %f1316, %f1314, %f1307, %f1315; + selp.f32 %f1317, 0f3F6A937E, 0fBEC093AC, %p320; + fma.rn.f32 %f1318, %f1316, %f1307, %f1317; + selp.f32 %f1319, 0f3F20D842, 0f3E0375D3, %p320; + fma.rn.f32 %f1320, %f1318, %f1307, %f1319; + neg.f32 %f1321, %f1305; + selp.f32 %f1322, %f1321, %f317, %p320; + fma.rn.f32 %f1798, %f1320, %f1322, %f1322; + @%p319 bra $L__BB8_232; + + ex2.approx.ftz.f32 %f1323, %f1798; + mov.f32 %f1324, 0f3F800000; + sub.f32 %f1325, %f1324, %f1323; + mov.b32 %r490, %f1325; + mov.b32 %r491, %f317; + and.b32 %r492, %r491, -2147483648; + or.b32 %r493, %r492, %r490; + mov.b32 %f1798, %r493; + +$L__BB8_232: + sub.f32 %f321, %f1797, %f1798; + sub.f32 %f322, %f300, %f1773; + add.f32 %f1326, %f322, 0f3F000000; + mul.f32 %f323, %f1326, %f294; + abs.f32 %f1327, %f323; + setp.ltu.f32 %p321, %f1327, 0f3F8060FE; + setp.ge.f32 %p322, %f1327, 0f3F8060FE; + mul.f32 %f1328, %f323, %f323; + selp.f32 %f1329, %f1327, %f1328, %p322; + selp.f32 %f1330, 0f3789CA3C, 0f38B1E96A, %p322; + selp.f32 %f1331, 0fB9F560B9, 0fBA574D20, %p322; + fma.rn.f32 %f1332, %f1330, %f1329, %f1331; + selp.f32 %f1333, 0f3BAC840B, 0f3BAAD5EA, %p322; + fma.rn.f32 %f1334, %f1332, %f1329, %f1333; + selp.f32 %f1335, 0fBD0C8162, 0fBCDC1BE7, %p322; + fma.rn.f32 %f1336, %f1334, %f1329, %f1335; + selp.f32 %f1337, 0f3E1CF906, 0f3DE718AF, %p322; + fma.rn.f32 %f1338, %f1336, %f1329, %f1337; + selp.f32 %f1339, 0f3F6A937E, 0fBEC093AC, %p322; + fma.rn.f32 %f1340, %f1338, %f1329, %f1339; + selp.f32 %f1341, 0f3F20D842, 0f3E0375D3, %p322; + fma.rn.f32 %f1342, %f1340, %f1329, %f1341; + neg.f32 %f1343, %f1327; + selp.f32 %f1344, %f1343, %f323, %p322; + fma.rn.f32 %f1799, %f1342, %f1344, %f1344; + @%p321 bra $L__BB8_234; + + ex2.approx.ftz.f32 %f1345, %f1799; + mov.f32 %f1346, 0f3F800000; + sub.f32 %f1347, %f1346, %f1345; + mov.b32 %r494, %f1347; + mov.b32 %r495, %f323; + and.b32 %r496, %r495, -2147483648; + or.b32 %r497, %r496, %r494; + mov.b32 %f1799, %r497; + +$L__BB8_234: + add.f32 %f1348, %f322, 0fBF000000; + mul.f32 %f327, %f1348, %f294; + abs.f32 %f1349, %f327; + setp.ltu.f32 %p323, %f1349, 0f3F8060FE; + setp.ge.f32 %p324, %f1349, 0f3F8060FE; + mul.f32 %f1350, %f327, %f327; + selp.f32 %f1351, %f1349, %f1350, %p324; + selp.f32 %f1352, 0f3789CA3C, 0f38B1E96A, %p324; + selp.f32 %f1353, 0fB9F560B9, 0fBA574D20, %p324; + fma.rn.f32 %f1354, %f1352, %f1351, %f1353; + selp.f32 %f1355, 0f3BAC840B, 0f3BAAD5EA, %p324; + fma.rn.f32 %f1356, %f1354, %f1351, %f1355; + selp.f32 %f1357, 0fBD0C8162, 0fBCDC1BE7, %p324; + fma.rn.f32 %f1358, %f1356, %f1351, %f1357; + selp.f32 %f1359, 0f3E1CF906, 0f3DE718AF, %p324; + fma.rn.f32 %f1360, %f1358, %f1351, %f1359; + selp.f32 %f1361, 0f3F6A937E, 0fBEC093AC, %p324; + fma.rn.f32 %f1362, %f1360, %f1351, %f1361; + selp.f32 %f1363, 0f3F20D842, 0f3E0375D3, %p324; + fma.rn.f32 %f1364, %f1362, %f1351, %f1363; + neg.f32 %f1365, %f1349; + selp.f32 %f1366, %f1365, %f327, %p324; + fma.rn.f32 %f1800, %f1364, %f1366, %f1366; + @%p323 bra $L__BB8_236; + + ex2.approx.ftz.f32 %f1367, %f1800; + mov.f32 %f1368, 0f3F800000; + sub.f32 %f1369, %f1368, %f1367; + mov.b32 %r498, %f1369; + mov.b32 %r499, %f327; + and.b32 %r500, %r499, -2147483648; + or.b32 %r501, %r500, %r498; + mov.b32 %f1800, %r501; + +$L__BB8_236: + sub.f32 %f1370, %f1799, %f1800; + mul.f32 %f1371, %f1370, 0f3F000000; + mul.f32 %f1372, %f321, 0f3F000000; + mul.f32 %f1373, %f1372, %f379; + fma.rn.f32 %f1792, %f1373, %f1371, %f1792; + add.s32 %r644, %r644, 1; + setp.lt.u32 %p325, %r644, %r628; + @%p325 bra $L__BB8_228; + + mad.lo.s32 %r502, %r643, %r151, %r642; + add.s32 %r503, %r502, %r1; + mul.wide.s32 %rd96, %r503, 4; + add.s64 %rd97, %rd1, %rd96; + ld.global.f32 %f332, [%rd97]; + mul.f32 %f1374, %f1792, 0f4B000000; + setp.lt.f32 %p326, %f1792, 0f00800000; + selp.f32 %f333, %f1374, %f1792, %p326; + selp.f32 %f1375, 0fC1B80000, 0f00000000, %p326; + mov.b32 %r504, %f333; + add.s32 %r505, %r504, -1059760811; + and.b32 %r506, %r505, -8388608; + sub.s32 %r507, %r504, %r506; + mov.b32 %f1376, %r507; + cvt.rn.f32.s32 %f1377, %r506; + mov.f32 %f1378, 0f34000000; + fma.rn.f32 %f1379, %f1377, %f1378, %f1375; + add.f32 %f1380, %f1376, 0fBF800000; + mov.f32 %f1381, 0f3E1039F6; + mov.f32 %f1382, 0fBE055027; + fma.rn.f32 %f1383, %f1382, %f1380, %f1381; + mov.f32 %f1384, 0fBDF8CDCC; + fma.rn.f32 %f1385, %f1383, %f1380, %f1384; + mov.f32 %f1386, 0f3E0F2955; + fma.rn.f32 %f1387, %f1385, %f1380, %f1386; + mov.f32 %f1388, 0fBE2AD8B9; + fma.rn.f32 %f1389, %f1387, %f1380, %f1388; + mov.f32 %f1390, 0f3E4CED0B; + fma.rn.f32 %f1391, %f1389, %f1380, %f1390; + mov.f32 %f1392, 0fBE7FFF22; + fma.rn.f32 %f1393, %f1391, %f1380, %f1392; + mov.f32 %f1394, 0f3EAAAA78; + fma.rn.f32 %f1395, %f1393, %f1380, %f1394; + mov.f32 %f1396, 0fBF000000; + fma.rn.f32 %f1397, %f1395, %f1380, %f1396; + mul.f32 %f1398, %f1380, %f1397; + fma.rn.f32 %f1399, %f1398, %f1380, %f1380; + mov.f32 %f1400, 0f3F317218; + fma.rn.f32 %f1801, %f1379, %f1400, %f1399; + setp.lt.u32 %p327, %r504, 2139095040; + @%p327 bra $L__BB8_239; + + mov.f32 %f1401, 0f7F800000; + fma.rn.f32 %f1801, %f333, %f1401, %f1401; + +$L__BB8_239: + setp.eq.f32 %p328, %f333, 0f00000000; + selp.f32 %f337, 0fFF800000, %f1801, %p328; + mul.f32 %f1402, %f1792, 0f40C90FD8; + setp.lt.f32 %p329, %f1402, 0f00800000; + mul.f32 %f1403, %f1402, 0f4B000000; + selp.f32 %f338, %f1403, %f1402, %p329; + selp.f32 %f1404, 0fC1B80000, 0f00000000, %p329; + mov.b32 %r508, %f338; + add.s32 %r509, %r508, -1059760811; + and.b32 %r510, %r509, -8388608; + sub.s32 %r511, %r508, %r510; + mov.b32 %f1405, %r511; + cvt.rn.f32.s32 %f1406, %r510; + fma.rn.f32 %f1408, %f1406, %f1378, %f1404; + add.f32 %f1409, %f1405, 0fBF800000; + fma.rn.f32 %f1412, %f1382, %f1409, %f1381; + fma.rn.f32 %f1414, %f1412, %f1409, %f1384; + fma.rn.f32 %f1416, %f1414, %f1409, %f1386; + fma.rn.f32 %f1418, %f1416, %f1409, %f1388; + fma.rn.f32 %f1420, %f1418, %f1409, %f1390; + fma.rn.f32 %f1422, %f1420, %f1409, %f1392; + fma.rn.f32 %f1424, %f1422, %f1409, %f1394; + fma.rn.f32 %f1426, %f1424, %f1409, %f1396; + mul.f32 %f1427, %f1409, %f1426; + fma.rn.f32 %f1428, %f1427, %f1409, %f1409; + fma.rn.f32 %f1802, %f1408, %f1400, %f1428; + setp.lt.u32 %p330, %r508, 2139095040; + @%p330 bra $L__BB8_241; + + mov.f32 %f1430, 0f7F800000; + fma.rn.f32 %f1802, %f338, %f1430, %f1430; + +$L__BB8_241: + setp.gt.f32 %p331, %f332, 0f00000000; + @%p331 bra $L__BB8_243; + bra.uni $L__BB8_242; + +$L__BB8_243: + mul.f32 %f1431, %f332, 0f4B000000; + setp.lt.f32 %p332, %f332, 0f00800000; + selp.f32 %f343, %f1431, %f332, %p332; + selp.f32 %f1432, 0fC1B80000, 0f00000000, %p332; + mov.b32 %r512, %f343; + add.s32 %r513, %r512, -1059760811; + and.b32 %r514, %r513, -8388608; + sub.s32 %r515, %r512, %r514; + mov.b32 %f1433, %r515; + cvt.rn.f32.s32 %f1434, %r514; + fma.rn.f32 %f1436, %f1434, %f1378, %f1432; + add.f32 %f1437, %f1433, 0fBF800000; + fma.rn.f32 %f1440, %f1382, %f1437, %f1381; + fma.rn.f32 %f1442, %f1440, %f1437, %f1384; + fma.rn.f32 %f1444, %f1442, %f1437, %f1386; + fma.rn.f32 %f1446, %f1444, %f1437, %f1388; + fma.rn.f32 %f1448, %f1446, %f1437, %f1390; + fma.rn.f32 %f1450, %f1448, %f1437, %f1392; + fma.rn.f32 %f1452, %f1450, %f1437, %f1394; + fma.rn.f32 %f1454, %f1452, %f1437, %f1396; + mul.f32 %f1455, %f1437, %f1454; + fma.rn.f32 %f1456, %f1455, %f1437, %f1437; + fma.rn.f32 %f1803, %f1436, %f1400, %f1456; + setp.lt.u32 %p333, %r512, 2139095040; + @%p333 bra $L__BB8_245; + + mov.f32 %f1458, 0f7F800000; + fma.rn.f32 %f1803, %f343, %f1458, %f1458; + +$L__BB8_245: + setp.eq.f32 %p334, %f343, 0f00000000; + selp.f32 %f1459, 0fFF800000, %f1803, %p334; + mul.f32 %f1460, %f332, %f1459; + mul.f32 %f1461, %f332, %f337; + sub.f32 %f1462, %f1461, %f1792; + sub.f32 %f1463, %f1462, %f1460; + add.f32 %f347, %f332, %f1463; + mul.f32 %f1464, %f332, 0f40C90FD8; + setp.lt.f32 %p335, %f1464, 0f00800000; + mul.f32 %f1465, %f1464, 0f4B000000; + selp.f32 %f348, %f1465, %f1464, %p335; + selp.f32 %f1466, 0fC1B80000, 0f00000000, %p335; + mov.b32 %r516, %f348; + add.s32 %r517, %r516, -1059760811; + and.b32 %r518, %r517, -8388608; + sub.s32 %r519, %r516, %r518; + mov.b32 %f1467, %r519; + cvt.rn.f32.s32 %f1468, %r518; + fma.rn.f32 %f1470, %f1468, %f1378, %f1466; + add.f32 %f1471, %f1467, 0fBF800000; + fma.rn.f32 %f1474, %f1382, %f1471, %f1381; + fma.rn.f32 %f1476, %f1474, %f1471, %f1384; + fma.rn.f32 %f1478, %f1476, %f1471, %f1386; + fma.rn.f32 %f1480, %f1478, %f1471, %f1388; + fma.rn.f32 %f1482, %f1480, %f1471, %f1390; + fma.rn.f32 %f1484, %f1482, %f1471, %f1392; + fma.rn.f32 %f1486, %f1484, %f1471, %f1394; + fma.rn.f32 %f1488, %f1486, %f1471, %f1396; + mul.f32 %f1489, %f1471, %f1488; + fma.rn.f32 %f1490, %f1489, %f1471, %f1471; + fma.rn.f32 %f1804, %f1470, %f1400, %f1490; + setp.lt.u32 %p336, %r516, 2139095040; + @%p336 bra $L__BB8_247; + + mov.f32 %f1492, 0f7F800000; + fma.rn.f32 %f1804, %f348, %f1492, %f1492; + +$L__BB8_247: + mul.f32 %f1493, %f1804, 0f3F000000; + setp.eq.f32 %p337, %f348, 0f00000000; + selp.f32 %f1494, 0fFF800000, %f1493, %p337; + sub.f32 %f1805, %f347, %f1494; + bra.uni $L__BB8_248; + +$L__BB8_242: + neg.f32 %f1805, %f1792; + +$L__BB8_248: + mul.f32 %f1495, %f1792, %f337; + sub.f32 %f1496, %f1805, %f1495; + add.f32 %f1497, %f1792, %f1496; + add.f32 %f1498, %f1495, %f1497; + sub.f32 %f1499, %f1498, %f1792; + setp.eq.f32 %p338, %f338, 0f00000000; + mul.f32 %f1500, %f1802, 0f3F000000; + selp.f32 %f1501, 0fFF800000, %f1500, %p338; + add.f32 %f1502, %f1501, %f1499; + add.f32 %f1503, %f1502, %f1502; + sub.f32 %f1787, %f1787, %f1503; + add.s32 %r643, %r643, 1; + setp.lt.s32 %p339, %r643, %r151; + @%p339 bra $L__BB8_227; + + add.s32 %r642, %r642, 1; + setp.lt.s32 %p340, %r642, %r151; + @%p340 bra $L__BB8_226; + +$L__BB8_250: + shl.b32 %r520, %r628, 1; + not.b32 %r521, %r520; + mad.lo.s32 %r522, %r151, %r151, %r521; + cvt.rn.f32.s32 %f1504, %r522; + sqrt.rn.f32 %f1505, %f1504; + sqrt.rn.f32 %f1506, %f1787; + sub.f32 %f362, %f1506, %f1505; + cvt.f64.f32 %fd93, %f362; + { + .reg .b32 %temp; + mov.b64 {%temp, %r103}, %fd93; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r104}, %fd207; + } + and.b32 %r105, %r104, 2146435072; + setp.eq.s32 %p341, %r105, 1062207488; + abs.f64 %fd94, %fd93; + { // callseq 181, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd94; + .param .b64 param1; + st.param.f64 [param1+0], %fd207; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd395, [retval0+0]; + } // callseq 181 + setp.lt.s32 %p342, %r103, 0; + and.pred %p16, %p342, %p341; + not.pred %p343, %p16; + @%p343 bra $L__BB8_252; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r523}, %fd395; + } + xor.b32 %r524, %r523, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r525, %temp}, %fd395; + } + mov.b64 %fd395, {%r525, %r524}; + +$L__BB8_252: + setp.eq.f32 %p344, %f362, 0f00000000; + @%p344 bra $L__BB8_256; + bra.uni $L__BB8_253; + +$L__BB8_256: + selp.b32 %r526, %r103, 0, %p341; + mov.u32 %r527, 0; + or.b32 %r528, %r526, 2146435072; + setp.lt.s32 %p348, %r104, 0; + selp.b32 %r529, %r528, %r526, %p348; + mov.b64 %fd395, {%r527, %r529}; + bra.uni $L__BB8_257; + +$L__BB8_253: + setp.gt.s32 %p345, %r103, -1; + @%p345 bra $L__BB8_257; + + cvt.rzi.f64.f64 %fd262, %fd207; + setp.eq.f64 %p346, %fd262, 0d4000000000000000; + @%p346 bra $L__BB8_257; + + mov.f64 %fd395, 0dFFF8000000000000; + +$L__BB8_257: + add.f64 %fd100, %fd93, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r530}, %fd100; + } + and.b32 %r531, %r530, 2146435072; + setp.ne.s32 %p349, %r531, 2146435072; + mov.f64 %fd396, %fd395; + @%p349 bra $L__BB8_263; + + setp.gtu.f64 %p350, %fd94, 0d7FF0000000000000; + mov.f64 %fd396, %fd100; + @%p350 bra $L__BB8_263; + + { + .reg .b32 %temp; + mov.b64 {%r532, %temp}, %fd207; + } + and.b32 %r106, %r104, 2147483647; + setp.eq.s32 %p351, %r106, 2146435072; + setp.eq.s32 %p352, %r532, 0; + and.pred %p353, %p351, %p352; + @%p353 bra $L__BB8_262; + bra.uni $L__BB8_260; + +$L__BB8_262: + setp.gt.f64 %p360, %fd94, 0d3FF0000000000000; + selp.b32 %r539, 2146435072, 0, %p360; + mov.u32 %r540, 0; + xor.b32 %r541, %r539, 2146435072; + setp.lt.s32 %p361, %r104, 0; + selp.b32 %r542, %r541, %r539, %p361; + setp.eq.f32 %p362, %f362, 0fBF800000; + selp.b32 %r543, 1072693248, %r542, %p362; + mov.b64 %fd396, {%r540, %r543}; + bra.uni $L__BB8_263; + +$L__BB8_260: + { + .reg .b32 %temp; + mov.b64 {%r533, %temp}, %fd93; + } + and.b32 %r534, %r103, 2147483647; + setp.ne.s32 %p354, %r534, 2146435072; + setp.ne.s32 %p355, %r533, 0; + or.pred %p356, %p354, %p355; + mov.f64 %fd396, %fd395; + @%p356 bra $L__BB8_263; + + setp.gt.s32 %p357, %r104, -1; + selp.b32 %r535, 2146435072, 0, %p357; + mov.u32 %r536, 0; + setp.ne.s32 %p358, %r106, 1071644672; + and.pred %p359, %p358, %p16; + or.b32 %r537, %r535, -2147483648; + selp.b32 %r538, %r537, %r535, %p359; + mov.b64 %fd396, {%r536, %r538}; + +$L__BB8_263: + setp.eq.f32 %p363, %f362, 0f3F800000; + selp.f64 %fd104, 0d3FF0000000000000, %fd396, %p363; + mov.f64 %fd265, 0d3FF0000000000000; + mul.f32 %f1507, %f362, 0f3F4E353F; + cvt.f64.f32 %fd266, %f1507; + fma.rn.f64 %fd105, %fd104, 0dBFEFB71760000000, %fd266; + mov.f64 %fd267, 0d4338000000000000; + mov.f64 %fd268, 0d3FF71547652B82FE; + fma.rn.f64 %fd269, %fd105, %fd268, %fd267; + { + .reg .b32 %temp; + mov.b64 {%r107, %temp}, %fd269; + } + mov.f64 %fd270, 0dC338000000000000; + add.rn.f64 %fd271, %fd269, %fd270; + mov.f64 %fd272, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd273, %fd271, %fd272, %fd105; + mov.f64 %fd274, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd275, %fd271, %fd274, %fd273; + mov.f64 %fd276, 0d3E928AF3FCA213EA; + mov.f64 %fd277, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd278, %fd277, %fd275, %fd276; + mov.f64 %fd279, 0d3EC71DEE62401315; + fma.rn.f64 %fd280, %fd278, %fd275, %fd279; + mov.f64 %fd281, 0d3EFA01997C89EB71; + fma.rn.f64 %fd282, %fd280, %fd275, %fd281; + mov.f64 %fd283, 0d3F2A01A014761F65; + fma.rn.f64 %fd284, %fd282, %fd275, %fd283; + mov.f64 %fd285, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd286, %fd284, %fd275, %fd285; + mov.f64 %fd287, 0d3F81111111122322; + fma.rn.f64 %fd288, %fd286, %fd275, %fd287; + mov.f64 %fd289, 0d3FA55555555502A1; + fma.rn.f64 %fd290, %fd288, %fd275, %fd289; + mov.f64 %fd291, 0d3FC5555555555511; + fma.rn.f64 %fd292, %fd290, %fd275, %fd291; + mov.f64 %fd293, 0d3FE000000000000B; + fma.rn.f64 %fd294, %fd292, %fd275, %fd293; + fma.rn.f64 %fd295, %fd294, %fd275, %fd265; + fma.rn.f64 %fd296, %fd295, %fd275, %fd265; + { + .reg .b32 %temp; + mov.b64 {%r108, %temp}, %fd296; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r109}, %fd296; + } + shl.b32 %r544, %r107, 20; + add.s32 %r545, %r109, %r544; + mov.b64 %fd397, {%r108, %r545}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r546}, %fd105; + } + mov.b32 %f1508, %r546; + abs.f32 %f363, %f1508; + setp.lt.f32 %p364, %f363, 0f4086232B; + @%p364 bra $L__BB8_266; + + setp.lt.f64 %p365, %fd105, 0d0000000000000000; + add.f64 %fd297, %fd105, 0d7FF0000000000000; + selp.f64 %fd397, 0d0000000000000000, %fd297, %p365; + setp.geu.f32 %p366, %f363, 0f40874800; + @%p366 bra $L__BB8_266; + + mov.f64 %fd368, 0d4338000000000000; + mov.f64 %fd367, 0d3FF71547652B82FE; + fma.rn.f64 %fd364, %fd105, %fd367, %fd368; + { + .reg .b32 %temp; + mov.b64 {%r617, %temp}, %fd364; + } + shr.u32 %r547, %r617, 31; + add.s32 %r548, %r617, %r547; + shr.s32 %r549, %r548, 1; + shl.b32 %r550, %r549, 20; + add.s32 %r551, %r109, %r550; + mov.b64 %fd298, {%r108, %r551}; + sub.s32 %r552, %r617, %r549; + shl.b32 %r553, %r552, 20; + add.s32 %r554, %r553, 1072693248; + mov.u32 %r555, 0; + mov.b64 %fd299, {%r555, %r554}; + mul.f64 %fd397, %fd298, %fd299; + +$L__BB8_266: + mov.f64 %fd370, 0d4338000000000000; + mov.f64 %fd369, 0d3FF71547652B82FE; + mov.f64 %fd365, 0d3FF0000000000000; + mul.f32 %f1509, %f362, 0fBF9F5F70; + cvt.f64.f32 %fd300, %f1509; + fma.rn.f64 %fd110, %fd104, 0dBFE5A43FE0000000, %fd300; + fma.rn.f64 %fd303, %fd110, %fd369, %fd370; + { + .reg .b32 %temp; + mov.b64 {%r110, %temp}, %fd303; + } + add.rn.f64 %fd305, %fd303, %fd270; + fma.rn.f64 %fd307, %fd305, %fd272, %fd110; + fma.rn.f64 %fd309, %fd305, %fd274, %fd307; + fma.rn.f64 %fd312, %fd277, %fd309, %fd276; + fma.rn.f64 %fd314, %fd312, %fd309, %fd279; + fma.rn.f64 %fd316, %fd314, %fd309, %fd281; + fma.rn.f64 %fd318, %fd316, %fd309, %fd283; + fma.rn.f64 %fd320, %fd318, %fd309, %fd285; + fma.rn.f64 %fd322, %fd320, %fd309, %fd287; + fma.rn.f64 %fd324, %fd322, %fd309, %fd289; + fma.rn.f64 %fd326, %fd324, %fd309, %fd291; + fma.rn.f64 %fd328, %fd326, %fd309, %fd293; + fma.rn.f64 %fd330, %fd328, %fd309, %fd365; + fma.rn.f64 %fd331, %fd330, %fd309, %fd365; + { + .reg .b32 %temp; + mov.b64 {%r111, %temp}, %fd331; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r112}, %fd331; + } + shl.b32 %r556, %r110, 20; + add.s32 %r557, %r112, %r556; + mov.b64 %fd398, {%r111, %r557}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r558}, %fd110; + } + mov.b32 %f1510, %r558; + abs.f32 %f364, %f1510; + setp.lt.f32 %p367, %f364, 0f4086232B; + @%p367 bra $L__BB8_269; + + setp.lt.f64 %p368, %fd110, 0d0000000000000000; + add.f64 %fd332, %fd110, 0d7FF0000000000000; + selp.f64 %fd398, 0d0000000000000000, %fd332, %p368; + setp.geu.f32 %p369, %f364, 0f40874800; + @%p369 bra $L__BB8_269; + + shr.u32 %r559, %r110, 31; + add.s32 %r560, %r110, %r559; + shr.s32 %r561, %r560, 1; + shl.b32 %r562, %r561, 20; + add.s32 %r563, %r112, %r562; + mov.b64 %fd333, {%r111, %r563}; + sub.s32 %r564, %r110, %r561; + shl.b32 %r565, %r564, 20; + add.s32 %r566, %r565, 1072693248; + mov.u32 %r567, 0; + mov.b64 %fd334, {%r567, %r566}; + mul.f64 %fd398, %fd333, %fd334; + +$L__BB8_269: + mov.f64 %fd366, 0d3FF0000000000000; + mul.f64 %fd335, %fd398, 0d3FE0000000000000; + setp.gt.f32 %p370, %f362, 0f00000000; + selp.f64 %fd336, 0d3FF0000000000000, 0d0000000000000000, %p370; + mul.f64 %fd337, %fd335, %fd336; + setp.lt.f32 %p371, %f362, 0f00000000; + selp.f64 %fd338, 0d3FF0000000000000, 0d0000000000000000, %p371; + mul.f64 %fd339, %fd397, 0d3FE0000000000000; + sub.f64 %fd341, %fd366, %fd339; + fma.rn.f64 %fd342, %fd341, %fd338, %fd337; + cvt.rn.f32.f64 %f365, %fd342; + setp.geu.f32 %p372, %f1814, %f365; + @%p372 bra $L__BB8_279; + + mov.b32 %r568, %f50; + and.b32 %r569, %r568, -2147483648; + or.b32 %r570, %r569, 1056964608; + mov.b32 %f1511, %r570; + add.rz.f32 %f1512, %f50, %f1511; + cvt.rzi.f32.f32 %f366, %f1512; + neg.f32 %f367, %f366; + setp.leu.f32 %p373, %f1791, %f367; + @%p373 bra $L__BB8_279; + + cvt.rn.f32.s32 %f1607, %r241; + add.f32 %f1513, %f366, %f1607; + setp.geu.f32 %p374, %f1790, %f1513; + setp.leu.f32 %p375, %f1789, %f367; + or.pred %p376, %p375, %p374; + setp.geu.f32 %p377, %f1788, %f1513; + or.pred %p378, %p377, %p376; + @%p378 bra $L__BB8_279; + + and.b32 %r113, %r628, 3; + setp.lt.u32 %p379, %r44, 3; + mov.u32 %r647, 0; + @%p379 bra $L__BB8_275; + + sub.s32 %r646, %r628, %r113; + +$L__BB8_274: + mul.wide.s32 %rd98, %r647, 4; + add.s64 %rd99, %rd2, %rd98; + ld.local.v4.f32 {%f1514, %f1515, %f1516, %f1517}, [%rd99]; + add.s64 %rd100, %rd4, %rd98; + add.s64 %rd101, %rd3, %rd98; + ld.local.v4.f32 {%f1522, %f1523, %f1524, %f1525}, [%rd101]; + add.s64 %rd102, %rd5, %rd98; + st.local.v4.f32 [%rd100], {%f1514, %f1515, %f1516, %f1517}; + st.local.v4.f32 [%rd102], {%f1522, %f1523, %f1524, %f1525}; + add.s32 %r647, %r647, 4; + add.s32 %r646, %r646, -4; + setp.ne.s32 %p380, %r646, 0; + @%p380 bra $L__BB8_274; + +$L__BB8_275: + setp.eq.s32 %p381, %r113, 0; + mov.u32 %r648, %r628; + mov.f32 %f1813, %f1781; + mov.f32 %f1814, %f365; + @%p381 bra $L__BB8_279; + + mul.wide.s32 %rd103, %r647, 4; + add.s64 %rd15, %rd2, %rd103; + ld.local.f32 %f1530, [%rd15]; + add.s64 %rd16, %rd4, %rd103; + st.local.f32 [%rd16], %f1530; + add.s64 %rd17, %rd3, %rd103; + ld.local.f32 %f1531, [%rd17]; + add.s64 %rd18, %rd5, %rd103; + st.local.f32 [%rd18], %f1531; + setp.eq.s32 %p382, %r113, 1; + mov.u32 %r648, %r628; + mov.f32 %f1813, %f1781; + mov.f32 %f1814, %f365; + @%p382 bra $L__BB8_279; + + ld.local.f32 %f1532, [%rd15+4]; + st.local.f32 [%rd16+4], %f1532; + ld.local.f32 %f1533, [%rd17+4]; + st.local.f32 [%rd18+4], %f1533; + setp.eq.s32 %p383, %r113, 2; + mov.u32 %r648, %r628; + mov.f32 %f1813, %f1781; + mov.f32 %f1814, %f365; + @%p383 bra $L__BB8_279; + + ld.local.f32 %f1534, [%rd15+8]; + st.local.f32 [%rd16+8], %f1534; + ld.local.f32 %f1535, [%rd17+8]; + st.local.f32 [%rd18+8], %f1535; + mov.u32 %r648, %r628; + mov.f32 %f1813, %f1781; + mov.f32 %f1814, %f365; + +$L__BB8_279: + add.s32 %r626, %r628, 1; + setp.lt.s32 %p384, %r628, %r153; + @%p384 bra $L__BB8_53; + +$L__BB8_280: + ld.param.f32 %f1605, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_6]; + ld.local.f32 %f1818, [%rd4]; + setp.eq.f32 %p385, %f1818, 0f00000000; + cvt.f64.f32 %fd343, %f1605; + mul.f64 %fd344, %fd343, 0d3F847AE147AE147B; + cvt.f64.f32 %fd345, %f1814; + setp.geu.f64 %p386, %fd344, %fd345; + or.pred %p387, %p386, %p385; + setp.lt.s32 %p388, %r648, 1; + or.pred %p389, %p387, %p388; + @%p389 bra $L__BB8_288; + + add.s32 %r574, %r648, -1; + and.b32 %r653, %r648, 3; + setp.lt.u32 %p390, %r574, 3; + mov.u32 %r650, 0; + @%p390 bra $L__BB8_285; + + sub.s32 %r651, %r648, %r653; + mov.f32 %f1817, %f1818; + bra.uni $L__BB8_283; + +$L__BB8_284: + ld.local.f32 %f1817, [%rd19+12]; + +$L__BB8_283: + mul.wide.s32 %rd104, %r650, 4; + add.s64 %rd105, %rd2, %rd104; + add.s64 %rd106, %rd5, %rd104; + ld.local.v4.f32 {%f1536, %f1537, %f1538, %f1539}, [%rd106]; + add.s64 %rd107, %rd3, %rd104; + add.s32 %r576, %r650, 1; + mul.wide.s32 %rd108, %r576, 4; + add.s64 %rd19, %rd4, %rd108; + ld.local.f32 %f1544, [%rd19+8]; + ld.local.f32 %f1545, [%rd19+4]; + ld.local.f32 %f1546, [%rd19]; + st.local.v4.f32 [%rd105], {%f1817, %f1546, %f1545, %f1544}; + st.local.v4.f32 [%rd107], {%f1536, %f1537, %f1538, %f1539}; + add.s32 %r650, %r650, 4; + add.s32 %r651, %r651, -4; + setp.eq.s32 %p391, %r651, 0; + @%p391 bra $L__BB8_285; + bra.uni $L__BB8_284; + +$L__BB8_285: + setp.eq.s32 %p392, %r653, 0; + @%p392 bra $L__BB8_288; + + mul.wide.s32 %rd109, %r650, 4; + add.s64 %rd139, %rd3, %rd109; + add.s64 %rd138, %rd5, %rd109; + add.s64 %rd137, %rd2, %rd109; + add.s64 %rd136, %rd4, %rd109; + +$L__BB8_287: + .pragma "nounroll"; + ld.local.f32 %f1547, [%rd136]; + st.local.f32 [%rd137], %f1547; + ld.local.f32 %f1548, [%rd138]; + st.local.f32 [%rd139], %f1548; + add.s64 %rd139, %rd139, 4; + add.s64 %rd138, %rd138, 4; + add.s64 %rd137, %rd137, 4; + add.s64 %rd136, %rd136, 4; + add.s32 %r653, %r653, -1; + setp.ne.s32 %p393, %r653, 0; + @%p393 bra $L__BB8_287; + +$L__BB8_288: + setp.lt.s32 %p404, %r153, 1; + ld.param.f32 %f1606, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_6]; + setp.gt.f32 %p394, %f1814, %f1606; + selp.f32 %f375, 0f3F800000, 0f00000000, %p394; + @%p404 bra $L__BB8_302; + + mul.lo.s32 %r132, %r158, %r153; + and.b32 %r661, %r153, 3; + add.s32 %r134, %r153, -1; + setp.lt.u32 %p396, %r134, 3; + mov.u32 %r654, 0; + @%p396 bra $L__BB8_293; + + ld.param.u64 %rd130, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_7]; + sub.s32 %r655, %r153, %r661; + cvta.to.global.u64 %rd32, %rd130; + bra.uni $L__BB8_291; + +$L__BB8_292: + ld.local.f32 %f1818, [%rd33+12]; + +$L__BB8_291: + add.s32 %r583, %r654, %r132; + mul.wide.s32 %rd110, %r583, 4; + add.s64 %rd111, %rd32, %rd110; + mul.f32 %f1549, %f1818, %f375; + st.global.f32 [%rd111], %f1549; + add.s32 %r584, %r654, 1; + mul.wide.s32 %rd112, %r584, 4; + add.s64 %rd33, %rd4, %rd112; + ld.local.f32 %f1550, [%rd33]; + mul.f32 %f1551, %f1550, %f375; + st.global.f32 [%rd111+4], %f1551; + ld.local.f32 %f1552, [%rd33+4]; + mul.f32 %f1553, %f1552, %f375; + st.global.f32 [%rd111+8], %f1553; + ld.local.f32 %f1554, [%rd33+8]; + mul.f32 %f1555, %f1554, %f375; + st.global.f32 [%rd111+12], %f1555; + add.s32 %r654, %r654, 4; + add.s32 %r655, %r655, -4; + setp.eq.s32 %p397, %r655, 0; + @%p397 bra $L__BB8_293; + bra.uni $L__BB8_292; + +$L__BB8_293: + setp.eq.s32 %p398, %r661, 0; + @%p398 bra $L__BB8_296; + + ld.param.u64 %rd131, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_7]; + add.s32 %r585, %r654, %r132; + cvta.to.global.u64 %rd113, %rd131; + mul.wide.s32 %rd114, %r585, 4; + add.s64 %rd141, %rd113, %rd114; + mul.wide.s32 %rd115, %r654, 4; + add.s64 %rd140, %rd4, %rd115; + mov.u32 %r657, %r661; + +$L__BB8_295: + .pragma "nounroll"; + ld.local.f32 %f1556, [%rd140]; + mul.f32 %f1557, %f1556, %f375; + st.global.f32 [%rd141], %f1557; + add.s64 %rd141, %rd141, 4; + add.s64 %rd140, %rd140, 4; + add.s32 %r657, %r657, -1; + setp.ne.s32 %p399, %r657, 0; + @%p399 bra $L__BB8_295; + +$L__BB8_296: + mov.u32 %r660, 0; + @%p396 bra $L__BB8_299; + + ld.param.u64 %rd132, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_8]; + sub.s32 %r659, %r153, %r661; + cvta.to.global.u64 %rd40, %rd132; + +$L__BB8_298: + mul.wide.s32 %rd116, %r660, 4; + add.s64 %rd117, %rd5, %rd116; + ld.local.v4.f32 {%f1558, %f1559, %f1560, %f1561}, [%rd117]; + mul.f32 %f1566, %f1558, %f375; + add.s32 %r588, %r660, %r132; + mul.wide.s32 %rd118, %r588, 4; + add.s64 %rd119, %rd40, %rd118; + st.global.f32 [%rd119], %f1566; + mul.f32 %f1567, %f1559, %f375; + st.global.f32 [%rd119+4], %f1567; + mul.f32 %f1568, %f1560, %f375; + st.global.f32 [%rd119+8], %f1568; + mul.f32 %f1569, %f1561, %f375; + st.global.f32 [%rd119+12], %f1569; + add.s32 %r660, %r660, 4; + add.s32 %r659, %r659, -4; + setp.ne.s32 %p401, %r659, 0; + @%p401 bra $L__BB8_298; + +$L__BB8_299: + @%p398 bra $L__BB8_302; + + ld.param.u64 %rd133, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_8]; + add.s32 %r589, %r660, %r132; + cvta.to.global.u64 %rd120, %rd133; + mul.wide.s32 %rd121, %r589, 4; + add.s64 %rd143, %rd120, %rd121; + mul.wide.s32 %rd122, %r660, 4; + add.s64 %rd142, %rd5, %rd122; + +$L__BB8_301: + .pragma "nounroll"; + ld.local.f32 %f1570, [%rd142]; + mul.f32 %f1571, %f1570, %f375; + st.global.f32 [%rd143], %f1571; + add.s64 %rd143, %rd143, 4; + add.s64 %rd142, %rd142, 4; + add.s32 %r661, %r661, -1; + setp.ne.s32 %p403, %r661, 0; + @%p403 bra $L__BB8_301; + +$L__BB8_302: + ld.param.u64 %rd129, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_10]; + ld.param.u64 %rd128, [_Z15kernel_gaussMFAPKffiiiffPfS1_S1_S1_i_param_9]; + mul.f32 %f1572, %f1813, %f375; + cvta.to.global.u64 %rd123, %rd128; + mul.wide.s32 %rd124, %r158, 4; + add.s64 %rd125, %rd123, %rd124; + st.global.f32 [%rd125], %f1572; + cvta.to.global.u64 %rd126, %rd129; + add.s64 %rd127, %rd126, %rd124; + st.global.f32 [%rd127], %f1814; + +$L__BB8_303: ret; -} +} // .globl _Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1_ .visible .entry _Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1_( .param .u32 _Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_0, @@ -33284,3967 +54881,3574 @@ BB8_238: .param .u64 _Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_11 ) { - .local .align 4 .b8 __local_depot9[1596]; + .local .align 4 .b8 __local_depot9[1552]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<249>; - .reg .f32 %f<1479>; - .reg .b32 %r<929>; - .reg .b64 %rd<521>; + .reg .f32 %f<1230>; + .reg .b32 %r<614>; + .reg .f64 %fd<49>; + .reg .b64 %rd<314>; mov.u64 %SPL, __local_depot9; - ld.param.u32 %r289, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_0]; - ld.param.u32 %r290, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_1]; - ld.param.f32 %f195, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_2]; - ld.param.f32 %f196, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_3]; - ld.param.u64 %rd60, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_4]; - ld.param.u64 %rd61, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_5]; - ld.param.u64 %rd62, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_6]; - ld.param.u64 %rd64, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_8]; - ld.param.u32 %r291, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_9]; - ld.param.u64 %rd65, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_10]; - ld.param.u64 %rd66, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_11]; - cvta.to.global.u64 %rd1, %rd66; - cvta.to.global.u64 %rd2, %rd65; - add.u64 %rd3, %SPL, 0; - add.u64 %rd4, %SPL, 484; - add.u64 %rd5, %SPL, 968; - mov.u32 %r824, 0; - st.local.u32 [%rd4], %r824; - st.local.u32 [%rd4+4], %r824; - st.local.u32 [%rd4+8], %r824; - st.local.u32 [%rd4+12], %r824; - st.local.u32 [%rd4+16], %r824; - st.local.u32 [%rd4+20], %r824; - st.local.u32 [%rd4+24], %r824; - st.local.u32 [%rd4+28], %r824; - st.local.u32 [%rd4+32], %r824; - st.local.u32 [%rd4+36], %r824; - st.local.u32 [%rd4+40], %r824; - st.local.u32 [%rd4+44], %r824; - st.local.u32 [%rd4+48], %r824; - st.local.u32 [%rd4+52], %r824; - st.local.u32 [%rd4+56], %r824; - st.local.u32 [%rd4+60], %r824; - st.local.u32 [%rd4+64], %r824; - st.local.u32 [%rd4+68], %r824; - st.local.u32 [%rd4+72], %r824; - st.local.u32 [%rd4+76], %r824; - st.local.u32 [%rd4+80], %r824; - st.local.u32 [%rd4+84], %r824; - st.local.u32 [%rd4+88], %r824; - st.local.u32 [%rd4+92], %r824; - st.local.u32 [%rd4+96], %r824; - st.local.u32 [%rd4+100], %r824; - st.local.u32 [%rd4+104], %r824; - st.local.u32 [%rd4+108], %r824; - st.local.u32 [%rd4+112], %r824; - st.local.u32 [%rd4+116], %r824; - st.local.u32 [%rd4+120], %r824; - st.local.u32 [%rd4+124], %r824; - st.local.u32 [%rd4+128], %r824; - st.local.u32 [%rd4+132], %r824; - st.local.u32 [%rd4+136], %r824; - st.local.u32 [%rd4+140], %r824; - st.local.u32 [%rd4+144], %r824; - st.local.u32 [%rd4+148], %r824; - st.local.u32 [%rd4+152], %r824; - st.local.u32 [%rd4+156], %r824; - st.local.u32 [%rd4+160], %r824; - st.local.u32 [%rd4+164], %r824; - st.local.u32 [%rd4+168], %r824; - st.local.u32 [%rd4+172], %r824; - st.local.u32 [%rd4+176], %r824; - st.local.u32 [%rd4+180], %r824; - st.local.u32 [%rd4+184], %r824; - st.local.u32 [%rd4+188], %r824; - st.local.u32 [%rd4+192], %r824; - st.local.u32 [%rd4+196], %r824; - st.local.u32 [%rd4+200], %r824; - st.local.u32 [%rd4+204], %r824; - st.local.u32 [%rd4+208], %r824; - st.local.u32 [%rd4+212], %r824; - st.local.u32 [%rd4+216], %r824; - st.local.u32 [%rd4+220], %r824; - st.local.u32 [%rd4+224], %r824; - st.local.u32 [%rd4+228], %r824; - st.local.u32 [%rd4+232], %r824; - st.local.u32 [%rd4+236], %r824; - st.local.u32 [%rd4+240], %r824; - st.local.u32 [%rd4+244], %r824; - st.local.u32 [%rd4+248], %r824; - st.local.u32 [%rd4+252], %r824; - st.local.u32 [%rd4+256], %r824; - st.local.u32 [%rd4+260], %r824; - st.local.u32 [%rd4+264], %r824; - st.local.u32 [%rd4+268], %r824; - st.local.u32 [%rd4+272], %r824; - st.local.u32 [%rd4+276], %r824; - st.local.u32 [%rd4+280], %r824; - st.local.u32 [%rd4+284], %r824; - st.local.u32 [%rd4+288], %r824; - st.local.u32 [%rd4+292], %r824; - st.local.u32 [%rd4+296], %r824; - st.local.u32 [%rd4+300], %r824; - st.local.u32 [%rd4+304], %r824; - st.local.u32 [%rd4+308], %r824; - st.local.u32 [%rd4+312], %r824; - st.local.u32 [%rd4+316], %r824; - st.local.u32 [%rd4+320], %r824; - st.local.u32 [%rd4+324], %r824; - st.local.u32 [%rd4+328], %r824; - st.local.u32 [%rd4+332], %r824; - st.local.u32 [%rd4+336], %r824; - st.local.u32 [%rd4+340], %r824; - st.local.u32 [%rd4+344], %r824; - st.local.u32 [%rd4+348], %r824; - st.local.u32 [%rd4+352], %r824; - st.local.u32 [%rd4+356], %r824; - st.local.u32 [%rd4+360], %r824; - st.local.u32 [%rd4+364], %r824; - st.local.u32 [%rd4+368], %r824; - st.local.u32 [%rd4+372], %r824; - st.local.u32 [%rd4+376], %r824; - st.local.u32 [%rd4+380], %r824; - st.local.u32 [%rd4+384], %r824; - st.local.u32 [%rd4+388], %r824; - st.local.u32 [%rd4+392], %r824; - st.local.u32 [%rd4+396], %r824; - st.local.u32 [%rd4+400], %r824; - st.local.u32 [%rd4+404], %r824; - st.local.u32 [%rd4+408], %r824; - st.local.u32 [%rd4+412], %r824; - st.local.u32 [%rd4+416], %r824; - st.local.u32 [%rd4+420], %r824; - st.local.u32 [%rd4+424], %r824; - st.local.u32 [%rd4+428], %r824; - st.local.u32 [%rd4+432], %r824; - st.local.u32 [%rd4+436], %r824; - st.local.u32 [%rd4+440], %r824; - st.local.u32 [%rd4+444], %r824; - st.local.u32 [%rd4+448], %r824; - st.local.u32 [%rd4+452], %r824; - st.local.u32 [%rd4+456], %r824; - st.local.u32 [%rd4+460], %r824; - st.local.u32 [%rd4+464], %r824; - st.local.u32 [%rd4+468], %r824; - st.local.u32 [%rd4+472], %r824; - st.local.u32 [%rd4+476], %r824; - st.local.u32 [%rd4+480], %r824; - st.local.u32 [%rd3], %r824; - st.local.u32 [%rd3+4], %r824; - st.local.u32 [%rd3+8], %r824; - st.local.u32 [%rd3+12], %r824; - st.local.u32 [%rd3+16], %r824; - st.local.u32 [%rd3+20], %r824; - st.local.u32 [%rd3+24], %r824; - st.local.u32 [%rd3+28], %r824; - st.local.u32 [%rd3+32], %r824; - st.local.u32 [%rd3+36], %r824; - st.local.u32 [%rd3+40], %r824; - st.local.u32 [%rd3+44], %r824; - st.local.u32 [%rd3+48], %r824; - st.local.u32 [%rd3+52], %r824; - st.local.u32 [%rd3+56], %r824; - st.local.u32 [%rd3+60], %r824; - st.local.u32 [%rd3+64], %r824; - st.local.u32 [%rd3+68], %r824; - st.local.u32 [%rd3+72], %r824; - st.local.u32 [%rd3+76], %r824; - st.local.u32 [%rd3+80], %r824; - st.local.u32 [%rd3+84], %r824; - st.local.u32 [%rd3+88], %r824; - st.local.u32 [%rd3+92], %r824; - st.local.u32 [%rd3+96], %r824; - st.local.u32 [%rd3+100], %r824; - st.local.u32 [%rd3+104], %r824; - st.local.u32 [%rd3+108], %r824; - st.local.u32 [%rd3+112], %r824; - st.local.u32 [%rd3+116], %r824; - st.local.u32 [%rd3+120], %r824; - st.local.u32 [%rd3+124], %r824; - st.local.u32 [%rd3+128], %r824; - st.local.u32 [%rd3+132], %r824; - st.local.u32 [%rd3+136], %r824; - st.local.u32 [%rd3+140], %r824; - st.local.u32 [%rd3+144], %r824; - st.local.u32 [%rd3+148], %r824; - st.local.u32 [%rd3+152], %r824; - st.local.u32 [%rd3+156], %r824; - st.local.u32 [%rd3+160], %r824; - st.local.u32 [%rd3+164], %r824; - st.local.u32 [%rd3+168], %r824; - st.local.u32 [%rd3+172], %r824; - st.local.u32 [%rd3+176], %r824; - st.local.u32 [%rd3+180], %r824; - st.local.u32 [%rd3+184], %r824; - st.local.u32 [%rd3+188], %r824; - st.local.u32 [%rd3+192], %r824; - st.local.u32 [%rd3+196], %r824; - st.local.u32 [%rd3+200], %r824; - st.local.u32 [%rd3+204], %r824; - st.local.u32 [%rd3+208], %r824; - st.local.u32 [%rd3+212], %r824; - st.local.u32 [%rd3+216], %r824; - st.local.u32 [%rd3+220], %r824; - st.local.u32 [%rd3+224], %r824; - st.local.u32 [%rd3+228], %r824; - st.local.u32 [%rd3+232], %r824; - st.local.u32 [%rd3+236], %r824; - st.local.u32 [%rd3+240], %r824; - st.local.u32 [%rd3+244], %r824; - st.local.u32 [%rd3+248], %r824; - st.local.u32 [%rd3+252], %r824; - st.local.u32 [%rd3+256], %r824; - st.local.u32 [%rd3+260], %r824; - st.local.u32 [%rd3+264], %r824; - st.local.u32 [%rd3+268], %r824; - st.local.u32 [%rd3+272], %r824; - st.local.u32 [%rd3+276], %r824; - st.local.u32 [%rd3+280], %r824; - st.local.u32 [%rd3+284], %r824; - st.local.u32 [%rd3+288], %r824; - st.local.u32 [%rd3+292], %r824; - st.local.u32 [%rd3+296], %r824; - st.local.u32 [%rd3+300], %r824; - st.local.u32 [%rd3+304], %r824; - st.local.u32 [%rd3+308], %r824; - st.local.u32 [%rd3+312], %r824; - st.local.u32 [%rd3+316], %r824; - st.local.u32 [%rd3+320], %r824; - st.local.u32 [%rd3+324], %r824; - st.local.u32 [%rd3+328], %r824; - st.local.u32 [%rd3+332], %r824; - st.local.u32 [%rd3+336], %r824; - st.local.u32 [%rd3+340], %r824; - st.local.u32 [%rd3+344], %r824; - st.local.u32 [%rd3+348], %r824; - st.local.u32 [%rd3+352], %r824; - st.local.u32 [%rd3+356], %r824; - st.local.u32 [%rd3+360], %r824; - st.local.u32 [%rd3+364], %r824; - st.local.u32 [%rd3+368], %r824; - st.local.u32 [%rd3+372], %r824; - st.local.u32 [%rd3+376], %r824; - st.local.u32 [%rd3+380], %r824; - st.local.u32 [%rd3+384], %r824; - st.local.u32 [%rd3+388], %r824; - st.local.u32 [%rd3+392], %r824; - st.local.u32 [%rd3+396], %r824; - st.local.u32 [%rd3+400], %r824; - st.local.u32 [%rd3+404], %r824; - st.local.u32 [%rd3+408], %r824; - st.local.u32 [%rd3+412], %r824; - st.local.u32 [%rd3+416], %r824; - st.local.u32 [%rd3+420], %r824; - st.local.u32 [%rd3+424], %r824; - st.local.u32 [%rd3+428], %r824; - st.local.u32 [%rd3+432], %r824; - st.local.u32 [%rd3+436], %r824; - st.local.u32 [%rd3+440], %r824; - st.local.u32 [%rd3+444], %r824; - st.local.u32 [%rd3+448], %r824; - st.local.u32 [%rd3+452], %r824; - st.local.u32 [%rd3+456], %r824; - st.local.u32 [%rd3+460], %r824; - st.local.u32 [%rd3+464], %r824; - st.local.u32 [%rd3+468], %r824; - st.local.u32 [%rd3+472], %r824; - st.local.u32 [%rd3+476], %r824; - st.local.u32 [%rd3+480], %r824; - add.u64 %rd6, %SPL, 1452; - add.u64 %rd7, %SPL, 1552; - mov.u32 %r293, %ntid.x; - mov.u32 %r294, %ctaid.x; - mov.u32 %r295, %tid.x; - mad.lo.s32 %r296, %r293, %r294, %r295; - mul.lo.s32 %r1, %r296, %r290; - cvt.s64.s32 %rd8, %r1; - setp.ge.s32 %p5, %r296, %r291; - @%p5 bra BB9_311; - - setp.lt.s32 %p6, %r289, 1; - @%p6 bra BB9_92; - - mov.f32 %f197, 0f3F000000; - div.rn.f32 %f198, %f197, %f196; - div.rn.f32 %f1, %f198, %f196; - div.rn.f32 %f199, %f195, 0fC0206C98; - div.rn.f32 %f2, %f199, %f196; - mov.u32 %r298, 0; - cvta.to.global.u64 %rd72, %rd62; - mov.u32 %r819, %r298; - -BB9_3: - add.s32 %r304, %r296, %r295; - mul.wide.s32 %rd73, %r304, 4; - add.s64 %rd74, %rd72, %rd73; - ld.global.f32 %f3, [%rd74]; - mov.u32 %r820, %r298; - -BB9_4: - setp.lt.s32 %p7, %r290, 1; - mov.f32 %f1422, %f3; - @%p7 bra BB9_19; - - sqrt.rn.f32 %f5, %f1; - cvt.rn.f32.s32 %f6, %r820; - cvta.to.global.u64 %rd9, %rd61; - cvta.to.global.u64 %rd10, %rd60; - mov.u32 %r821, 0; - mov.f32 %f1422, %f3; - -BB9_6: - cvt.rn.f32.s32 %f1375, %r819; - add.s32 %r306, %r821, %r1; - mul.wide.s32 %rd75, %r306, 4; - add.s64 %rd76, %rd10, %rd75; - add.s64 %rd77, %rd9, %rd75; - ld.global.f32 %f8, [%rd77]; - ld.global.f32 %f9, [%rd76]; - sub.f32 %f10, %f1375, %f9; - add.f32 %f200, %f10, 0f3F800000; - mul.f32 %f11, %f200, %f5; - abs.f32 %f12, %f11; - setp.ltu.f32 %p8, %f12, 0f3F800000; - @%p8 bra BB9_8; - bra.uni BB9_7; - -BB9_8: - mul.f32 %f219, %f11, %f11; - mov.f32 %f220, 0f3BA0C9F8; - mov.f32 %f221, 0fBA1268FB; - fma.rn.f32 %f222, %f221, %f219, %f220; - mov.f32 %f223, 0fBCDABFD4; - fma.rn.f32 %f224, %f222, %f219, %f223; - mov.f32 %f225, 0f3DE70331; - fma.rn.f32 %f226, %f224, %f219, %f225; - mov.f32 %f227, 0fBEC09330; - fma.rn.f32 %f228, %f226, %f219, %f227; - mov.f32 %f229, 0f3F906EBA; - fma.rn.f32 %f230, %f228, %f219, %f229; - mul.f32 %f1418, %f11, %f230; - bra.uni BB9_9; - -BB9_7: - mov.f32 %f201, 0f3A03BB71; - mov.f32 %f202, 0fB7B730FB; - fma.rn.f32 %f203, %f202, %f12, %f201; - mov.f32 %f204, 0fBBACA3B3; - fma.rn.f32 %f205, %f203, %f12, %f204; - mov.f32 %f206, 0f3D0A7445; - fma.rn.f32 %f207, %f205, %f12, %f206; - mov.f32 %f208, 0fBE1B3B75; - fma.rn.f32 %f209, %f207, %f12, %f208; - mov.f32 %f210, 0fBF6B385A; - fma.rn.f32 %f211, %f209, %f12, %f210; - mov.f32 %f212, 0fBFD0316E; - fma.rn.f32 %f213, %f211, %f12, %f212; - mov.f32 %f214, 0fBA031CCE; - fma.rn.f32 %f215, %f213, %f12, %f214; - ex2.approx.ftz.f32 %f216, %f215; - mov.f32 %f217, 0f3F800000; - sub.f32 %f218, %f217, %f216; - mov.b32 %r307, %f218; - setp.ltu.f32 %p9, %f12, 0f407AD445; - selp.b32 %r308, %r307, 1065353216, %p9; - mov.b32 %r309, %f11; - and.b32 %r310, %r309, -2147483648; - or.b32 %r311, %r308, %r310; - mov.b32 %f1418, %r311; - -BB9_9: - mul.f32 %f16, %f10, %f5; - abs.f32 %f17, %f16; - setp.ltu.f32 %p10, %f17, 0f3F800000; - @%p10 bra BB9_11; - bra.uni BB9_10; - -BB9_11: - mul.f32 %f249, %f16, %f16; - mov.f32 %f250, 0f3BA0C9F8; - mov.f32 %f251, 0fBA1268FB; - fma.rn.f32 %f252, %f251, %f249, %f250; - mov.f32 %f253, 0fBCDABFD4; - fma.rn.f32 %f254, %f252, %f249, %f253; - mov.f32 %f255, 0f3DE70331; - fma.rn.f32 %f256, %f254, %f249, %f255; - mov.f32 %f257, 0fBEC09330; - fma.rn.f32 %f258, %f256, %f249, %f257; - mov.f32 %f259, 0f3F906EBA; - fma.rn.f32 %f260, %f258, %f249, %f259; - mul.f32 %f1419, %f16, %f260; - bra.uni BB9_12; - -BB9_10: - mov.f32 %f231, 0f3A03BB71; - mov.f32 %f232, 0fB7B730FB; - fma.rn.f32 %f233, %f232, %f17, %f231; - mov.f32 %f234, 0fBBACA3B3; - fma.rn.f32 %f235, %f233, %f17, %f234; - mov.f32 %f236, 0f3D0A7445; - fma.rn.f32 %f237, %f235, %f17, %f236; - mov.f32 %f238, 0fBE1B3B75; - fma.rn.f32 %f239, %f237, %f17, %f238; - mov.f32 %f240, 0fBF6B385A; - fma.rn.f32 %f241, %f239, %f17, %f240; - mov.f32 %f242, 0fBFD0316E; - fma.rn.f32 %f243, %f241, %f17, %f242; - mov.f32 %f244, 0fBA031CCE; - fma.rn.f32 %f245, %f243, %f17, %f244; - ex2.approx.ftz.f32 %f246, %f245; - mov.f32 %f247, 0f3F800000; - sub.f32 %f248, %f247, %f246; - mov.b32 %r312, %f248; - setp.ltu.f32 %p11, %f17, 0f407AD445; - selp.b32 %r313, %r312, 1065353216, %p11; - mov.b32 %r314, %f16; - and.b32 %r315, %r314, -2147483648; - or.b32 %r316, %r313, %r315; - mov.b32 %f1419, %r316; - -BB9_12: - setp.neu.f32 %p12, %f9, 0f00000000; - selp.f32 %f21, 0f3F800000, 0f00000000, %p12; - sub.f32 %f22, %f1418, %f1419; - sub.f32 %f23, %f6, %f8; - add.f32 %f261, %f23, 0f3F800000; - mul.f32 %f24, %f261, %f5; - abs.f32 %f25, %f24; - setp.ltu.f32 %p13, %f25, 0f3F800000; - @%p13 bra BB9_14; - bra.uni BB9_13; - -BB9_14: - mul.f32 %f280, %f24, %f24; - mov.f32 %f281, 0f3BA0C9F8; - mov.f32 %f282, 0fBA1268FB; - fma.rn.f32 %f283, %f282, %f280, %f281; - mov.f32 %f284, 0fBCDABFD4; - fma.rn.f32 %f285, %f283, %f280, %f284; - mov.f32 %f286, 0f3DE70331; - fma.rn.f32 %f287, %f285, %f280, %f286; - mov.f32 %f288, 0fBEC09330; - fma.rn.f32 %f289, %f287, %f280, %f288; - mov.f32 %f290, 0f3F906EBA; - fma.rn.f32 %f291, %f289, %f280, %f290; - mul.f32 %f1420, %f24, %f291; - bra.uni BB9_15; - -BB9_13: - mov.f32 %f262, 0f3A03BB71; - mov.f32 %f263, 0fB7B730FB; - fma.rn.f32 %f264, %f263, %f25, %f262; - mov.f32 %f265, 0fBBACA3B3; - fma.rn.f32 %f266, %f264, %f25, %f265; - mov.f32 %f267, 0f3D0A7445; - fma.rn.f32 %f268, %f266, %f25, %f267; - mov.f32 %f269, 0fBE1B3B75; - fma.rn.f32 %f270, %f268, %f25, %f269; - mov.f32 %f271, 0fBF6B385A; - fma.rn.f32 %f272, %f270, %f25, %f271; - mov.f32 %f273, 0fBFD0316E; - fma.rn.f32 %f274, %f272, %f25, %f273; - mov.f32 %f275, 0fBA031CCE; - fma.rn.f32 %f276, %f274, %f25, %f275; - ex2.approx.ftz.f32 %f277, %f276; - mov.f32 %f278, 0f3F800000; - sub.f32 %f279, %f278, %f277; - mov.b32 %r317, %f279; - setp.ltu.f32 %p14, %f25, 0f407AD445; - selp.b32 %r318, %r317, 1065353216, %p14; - mov.b32 %r319, %f24; - and.b32 %r320, %r319, -2147483648; - or.b32 %r321, %r318, %r320; - mov.b32 %f1420, %r321; - -BB9_15: - mul.f32 %f29, %f23, %f5; - abs.f32 %f30, %f29; - setp.ltu.f32 %p15, %f30, 0f3F800000; - @%p15 bra BB9_17; - bra.uni BB9_16; - -BB9_17: - mul.f32 %f310, %f29, %f29; - mov.f32 %f311, 0f3BA0C9F8; - mov.f32 %f312, 0fBA1268FB; - fma.rn.f32 %f313, %f312, %f310, %f311; - mov.f32 %f314, 0fBCDABFD4; + ld.param.u32 %r213, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_0]; + ld.param.u32 %r214, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_1]; + ld.param.f32 %f181, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_2]; + ld.param.f32 %f182, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_3]; + ld.param.u64 %rd57, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_4]; + ld.param.u64 %rd58, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_5]; + ld.param.u64 %rd56, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_6]; + ld.param.u64 %rd59, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_7]; + ld.param.u64 %rd60, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_8]; + ld.param.u32 %r215, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_9]; + ld.param.u64 %rd61, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_10]; + ld.param.u64 %rd62, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_11]; + cvta.to.global.u64 %rd1, %rd62; + cvta.to.global.u64 %rd2, %rd61; + cvta.to.global.u64 %rd3, %rd60; + add.u64 %rd4, %SPL, 0; + add.u64 %rd5, %SPL, 484; + add.u64 %rd6, %SPL, 968; + mov.u32 %r545, 0; + st.local.u32 [%rd5], %r545; + st.local.u32 [%rd4], %r545; + st.local.u32 [%rd5+4], %r545; + st.local.u32 [%rd4+4], %r545; + st.local.u32 [%rd5+8], %r545; + st.local.u32 [%rd4+8], %r545; + st.local.u32 [%rd5+12], %r545; + st.local.u32 [%rd4+12], %r545; + st.local.u32 [%rd5+16], %r545; + st.local.u32 [%rd4+16], %r545; + st.local.u32 [%rd5+20], %r545; + st.local.u32 [%rd4+20], %r545; + st.local.u32 [%rd5+24], %r545; + st.local.u32 [%rd4+24], %r545; + st.local.u32 [%rd5+28], %r545; + st.local.u32 [%rd4+28], %r545; + st.local.u32 [%rd5+32], %r545; + st.local.u32 [%rd4+32], %r545; + st.local.u32 [%rd5+36], %r545; + st.local.u32 [%rd4+36], %r545; + st.local.u32 [%rd5+40], %r545; + st.local.u32 [%rd4+40], %r545; + st.local.u32 [%rd5+44], %r545; + st.local.u32 [%rd4+44], %r545; + st.local.u32 [%rd5+48], %r545; + st.local.u32 [%rd4+48], %r545; + st.local.u32 [%rd5+52], %r545; + st.local.u32 [%rd4+52], %r545; + st.local.u32 [%rd5+56], %r545; + st.local.u32 [%rd4+56], %r545; + st.local.u32 [%rd5+60], %r545; + st.local.u32 [%rd4+60], %r545; + st.local.u32 [%rd5+64], %r545; + st.local.u32 [%rd4+64], %r545; + st.local.u32 [%rd5+68], %r545; + st.local.u32 [%rd4+68], %r545; + st.local.u32 [%rd5+72], %r545; + st.local.u32 [%rd4+72], %r545; + st.local.u32 [%rd5+76], %r545; + st.local.u32 [%rd4+76], %r545; + st.local.u32 [%rd5+80], %r545; + st.local.u32 [%rd4+80], %r545; + st.local.u32 [%rd5+84], %r545; + st.local.u32 [%rd4+84], %r545; + st.local.u32 [%rd5+88], %r545; + st.local.u32 [%rd4+88], %r545; + st.local.u32 [%rd5+92], %r545; + st.local.u32 [%rd4+92], %r545; + st.local.u32 [%rd5+96], %r545; + st.local.u32 [%rd4+96], %r545; + st.local.u32 [%rd5+100], %r545; + st.local.u32 [%rd4+100], %r545; + st.local.u32 [%rd5+104], %r545; + st.local.u32 [%rd4+104], %r545; + st.local.u32 [%rd5+108], %r545; + st.local.u32 [%rd4+108], %r545; + st.local.u32 [%rd5+112], %r545; + st.local.u32 [%rd4+112], %r545; + st.local.u32 [%rd5+116], %r545; + st.local.u32 [%rd4+116], %r545; + st.local.u32 [%rd5+120], %r545; + st.local.u32 [%rd4+120], %r545; + st.local.u32 [%rd5+124], %r545; + st.local.u32 [%rd4+124], %r545; + st.local.u32 [%rd5+128], %r545; + st.local.u32 [%rd4+128], %r545; + st.local.u32 [%rd5+132], %r545; + st.local.u32 [%rd4+132], %r545; + st.local.u32 [%rd5+136], %r545; + st.local.u32 [%rd4+136], %r545; + st.local.u32 [%rd5+140], %r545; + st.local.u32 [%rd4+140], %r545; + st.local.u32 [%rd5+144], %r545; + st.local.u32 [%rd4+144], %r545; + st.local.u32 [%rd5+148], %r545; + st.local.u32 [%rd4+148], %r545; + st.local.u32 [%rd5+152], %r545; + st.local.u32 [%rd4+152], %r545; + st.local.u32 [%rd5+156], %r545; + st.local.u32 [%rd4+156], %r545; + st.local.u32 [%rd5+160], %r545; + st.local.u32 [%rd4+160], %r545; + st.local.u32 [%rd5+164], %r545; + st.local.u32 [%rd4+164], %r545; + st.local.u32 [%rd5+168], %r545; + st.local.u32 [%rd4+168], %r545; + st.local.u32 [%rd5+172], %r545; + st.local.u32 [%rd4+172], %r545; + st.local.u32 [%rd5+176], %r545; + st.local.u32 [%rd4+176], %r545; + st.local.u32 [%rd5+180], %r545; + st.local.u32 [%rd4+180], %r545; + st.local.u32 [%rd5+184], %r545; + st.local.u32 [%rd4+184], %r545; + st.local.u32 [%rd5+188], %r545; + st.local.u32 [%rd4+188], %r545; + st.local.u32 [%rd5+192], %r545; + st.local.u32 [%rd4+192], %r545; + st.local.u32 [%rd5+196], %r545; + st.local.u32 [%rd4+196], %r545; + st.local.u32 [%rd5+200], %r545; + st.local.u32 [%rd4+200], %r545; + st.local.u32 [%rd5+204], %r545; + st.local.u32 [%rd4+204], %r545; + st.local.u32 [%rd5+208], %r545; + st.local.u32 [%rd4+208], %r545; + st.local.u32 [%rd5+212], %r545; + st.local.u32 [%rd4+212], %r545; + st.local.u32 [%rd5+216], %r545; + st.local.u32 [%rd4+216], %r545; + st.local.u32 [%rd5+220], %r545; + st.local.u32 [%rd4+220], %r545; + st.local.u32 [%rd5+224], %r545; + st.local.u32 [%rd4+224], %r545; + st.local.u32 [%rd5+228], %r545; + st.local.u32 [%rd4+228], %r545; + st.local.u32 [%rd5+232], %r545; + st.local.u32 [%rd4+232], %r545; + st.local.u32 [%rd5+236], %r545; + st.local.u32 [%rd4+236], %r545; + st.local.u32 [%rd5+240], %r545; + st.local.u32 [%rd4+240], %r545; + st.local.u32 [%rd5+244], %r545; + st.local.u32 [%rd4+244], %r545; + st.local.u32 [%rd5+248], %r545; + st.local.u32 [%rd4+248], %r545; + st.local.u32 [%rd5+252], %r545; + st.local.u32 [%rd4+252], %r545; + st.local.u32 [%rd5+256], %r545; + st.local.u32 [%rd4+256], %r545; + st.local.u32 [%rd5+260], %r545; + st.local.u32 [%rd4+260], %r545; + st.local.u32 [%rd5+264], %r545; + st.local.u32 [%rd4+264], %r545; + st.local.u32 [%rd5+268], %r545; + st.local.u32 [%rd4+268], %r545; + st.local.u32 [%rd5+272], %r545; + st.local.u32 [%rd4+272], %r545; + st.local.u32 [%rd5+276], %r545; + st.local.u32 [%rd4+276], %r545; + st.local.u32 [%rd5+280], %r545; + st.local.u32 [%rd4+280], %r545; + st.local.u32 [%rd5+284], %r545; + st.local.u32 [%rd4+284], %r545; + st.local.u32 [%rd5+288], %r545; + st.local.u32 [%rd4+288], %r545; + st.local.u32 [%rd5+292], %r545; + st.local.u32 [%rd4+292], %r545; + st.local.u32 [%rd5+296], %r545; + st.local.u32 [%rd4+296], %r545; + st.local.u32 [%rd5+300], %r545; + st.local.u32 [%rd4+300], %r545; + st.local.u32 [%rd5+304], %r545; + st.local.u32 [%rd4+304], %r545; + st.local.u32 [%rd5+308], %r545; + st.local.u32 [%rd4+308], %r545; + st.local.u32 [%rd5+312], %r545; + st.local.u32 [%rd4+312], %r545; + st.local.u32 [%rd5+316], %r545; + st.local.u32 [%rd4+316], %r545; + st.local.u32 [%rd5+320], %r545; + st.local.u32 [%rd4+320], %r545; + st.local.u32 [%rd5+324], %r545; + st.local.u32 [%rd4+324], %r545; + st.local.u32 [%rd5+328], %r545; + st.local.u32 [%rd4+328], %r545; + st.local.u32 [%rd5+332], %r545; + st.local.u32 [%rd4+332], %r545; + st.local.u32 [%rd5+336], %r545; + st.local.u32 [%rd4+336], %r545; + st.local.u32 [%rd5+340], %r545; + st.local.u32 [%rd4+340], %r545; + st.local.u32 [%rd5+344], %r545; + st.local.u32 [%rd4+344], %r545; + st.local.u32 [%rd5+348], %r545; + st.local.u32 [%rd4+348], %r545; + st.local.u32 [%rd5+352], %r545; + st.local.u32 [%rd4+352], %r545; + st.local.u32 [%rd5+356], %r545; + st.local.u32 [%rd4+356], %r545; + st.local.u32 [%rd5+360], %r545; + st.local.u32 [%rd4+360], %r545; + st.local.u32 [%rd5+364], %r545; + st.local.u32 [%rd4+364], %r545; + st.local.u32 [%rd5+368], %r545; + st.local.u32 [%rd4+368], %r545; + st.local.u32 [%rd5+372], %r545; + st.local.u32 [%rd4+372], %r545; + st.local.u32 [%rd5+376], %r545; + st.local.u32 [%rd4+376], %r545; + st.local.u32 [%rd5+380], %r545; + st.local.u32 [%rd4+380], %r545; + st.local.u32 [%rd5+384], %r545; + st.local.u32 [%rd4+384], %r545; + st.local.u32 [%rd5+388], %r545; + st.local.u32 [%rd4+388], %r545; + st.local.u32 [%rd5+392], %r545; + st.local.u32 [%rd4+392], %r545; + st.local.u32 [%rd5+396], %r545; + st.local.u32 [%rd4+396], %r545; + st.local.u32 [%rd5+400], %r545; + st.local.u32 [%rd4+400], %r545; + st.local.u32 [%rd5+404], %r545; + st.local.u32 [%rd4+404], %r545; + st.local.u32 [%rd5+408], %r545; + st.local.u32 [%rd4+408], %r545; + st.local.u32 [%rd5+412], %r545; + st.local.u32 [%rd4+412], %r545; + st.local.u32 [%rd5+416], %r545; + st.local.u32 [%rd4+416], %r545; + st.local.u32 [%rd5+420], %r545; + st.local.u32 [%rd4+420], %r545; + st.local.u32 [%rd5+424], %r545; + st.local.u32 [%rd4+424], %r545; + st.local.u32 [%rd5+428], %r545; + st.local.u32 [%rd4+428], %r545; + st.local.u32 [%rd5+432], %r545; + st.local.u32 [%rd4+432], %r545; + st.local.u32 [%rd5+436], %r545; + st.local.u32 [%rd4+436], %r545; + st.local.u32 [%rd5+440], %r545; + st.local.u32 [%rd4+440], %r545; + st.local.u32 [%rd5+444], %r545; + st.local.u32 [%rd4+444], %r545; + st.local.u32 [%rd5+448], %r545; + st.local.u32 [%rd4+448], %r545; + st.local.u32 [%rd5+452], %r545; + st.local.u32 [%rd4+452], %r545; + st.local.u32 [%rd5+456], %r545; + st.local.u32 [%rd4+456], %r545; + st.local.u32 [%rd5+460], %r545; + st.local.u32 [%rd4+460], %r545; + st.local.u32 [%rd5+464], %r545; + st.local.u32 [%rd4+464], %r545; + st.local.u32 [%rd5+468], %r545; + st.local.u32 [%rd4+468], %r545; + st.local.u32 [%rd5+472], %r545; + st.local.u32 [%rd4+472], %r545; + st.local.u32 [%rd5+476], %r545; + st.local.u32 [%rd4+476], %r545; + st.local.u32 [%rd5+480], %r545; + st.local.u32 [%rd4+480], %r545; + cvta.to.global.u64 %rd7, %rd58; + cvta.to.global.u64 %rd8, %rd57; + cvta.to.global.u64 %rd9, %rd59; + add.u64 %rd10, %SPL, 1452; + mov.u32 %r217, %ntid.x; + mov.u32 %r218, %ctaid.x; + mov.u32 %r1, %tid.x; + mad.lo.s32 %r2, %r217, %r218, %r1; + mul.lo.s32 %r3, %r2, %r214; + cvt.s64.s32 %rd11, %r3; + setp.ge.s32 %p7, %r2, %r215; + @%p7 bra $L__BB9_254; + + setp.lt.s32 %p8, %r213, 1; + @%p8 bra $L__BB9_85; + + cvta.to.global.u64 %rd67, %rd56; + add.s32 %r221, %r2, %r1; + mul.wide.s32 %rd68, %r221, 4; + add.s64 %rd69, %rd67, %rd68; + mov.f32 %f183, 0f3F000000; + div.rn.f32 %f184, %f183, %f182; + div.rn.f32 %f1, %f184, %f182; + div.rn.f32 %f185, %f181, 0fC0206C98; + div.rn.f32 %f2, %f185, %f182; + ld.global.f32 %f3, [%rd69]; + mov.u32 %r220, 0; + setp.lt.s32 %p9, %r214, 1; + mov.u32 %r540, %r220; + +$L__BB9_3: + mov.u32 %r541, %r220; + +$L__BB9_4: + mov.f32 %f1181, %f3; + @%p9 bra $L__BB9_15; + + sqrt.rn.f32 %f6, %f1; + cvt.rn.f32.s32 %f7, %r541; + mov.u32 %r542, 0; + mov.f32 %f1181, %f3; + +$L__BB9_6: + cvt.rn.f32.s32 %f1132, %r540; + add.s32 %r224, %r542, %r3; + mul.wide.s32 %rd70, %r224, 4; + add.s64 %rd71, %rd8, %rd70; + add.s64 %rd72, %rd7, %rd70; + ld.global.f32 %f9, [%rd72]; + ld.global.f32 %f10, [%rd71]; + sub.f32 %f11, %f1132, %f10; + add.f32 %f186, %f11, 0f3F000000; + mul.f32 %f12, %f186, %f6; + abs.f32 %f187, %f12; + setp.ltu.f32 %p10, %f187, 0f3F8060FE; + setp.ge.f32 %p11, %f187, 0f3F8060FE; + mul.f32 %f188, %f12, %f12; + selp.f32 %f189, %f187, %f188, %p11; + selp.f32 %f190, 0f3789CA3C, 0f38B1E96A, %p11; + selp.f32 %f191, 0fB9F560B9, 0fBA574D20, %p11; + fma.rn.f32 %f192, %f190, %f189, %f191; + selp.f32 %f193, 0f3BAC840B, 0f3BAAD5EA, %p11; + fma.rn.f32 %f194, %f192, %f189, %f193; + selp.f32 %f195, 0fBD0C8162, 0fBCDC1BE7, %p11; + fma.rn.f32 %f196, %f194, %f189, %f195; + selp.f32 %f197, 0f3E1CF906, 0f3DE718AF, %p11; + fma.rn.f32 %f198, %f196, %f189, %f197; + selp.f32 %f199, 0f3F6A937E, 0fBEC093AC, %p11; + fma.rn.f32 %f200, %f198, %f189, %f199; + selp.f32 %f201, 0f3F20D842, 0f3E0375D3, %p11; + fma.rn.f32 %f202, %f200, %f189, %f201; + neg.f32 %f203, %f187; + selp.f32 %f204, %f203, %f12, %p11; + fma.rn.f32 %f1177, %f202, %f204, %f204; + @%p10 bra $L__BB9_8; + + ex2.approx.ftz.f32 %f205, %f1177; + mov.f32 %f206, 0f3F800000; + sub.f32 %f207, %f206, %f205; + mov.b32 %r225, %f207; + mov.b32 %r226, %f12; + and.b32 %r227, %r226, -2147483648; + or.b32 %r228, %r227, %r225; + mov.b32 %f1177, %r228; + +$L__BB9_8: + add.f32 %f208, %f11, 0fBF000000; + mul.f32 %f16, %f208, %f6; + abs.f32 %f209, %f16; + setp.ltu.f32 %p12, %f209, 0f3F8060FE; + setp.ge.f32 %p13, %f209, 0f3F8060FE; + mul.f32 %f210, %f16, %f16; + selp.f32 %f211, %f209, %f210, %p13; + selp.f32 %f212, 0f3789CA3C, 0f38B1E96A, %p13; + selp.f32 %f213, 0fB9F560B9, 0fBA574D20, %p13; + fma.rn.f32 %f214, %f212, %f211, %f213; + selp.f32 %f215, 0f3BAC840B, 0f3BAAD5EA, %p13; + fma.rn.f32 %f216, %f214, %f211, %f215; + selp.f32 %f217, 0fBD0C8162, 0fBCDC1BE7, %p13; + fma.rn.f32 %f218, %f216, %f211, %f217; + selp.f32 %f219, 0f3E1CF906, 0f3DE718AF, %p13; + fma.rn.f32 %f220, %f218, %f211, %f219; + selp.f32 %f221, 0f3F6A937E, 0fBEC093AC, %p13; + fma.rn.f32 %f222, %f220, %f211, %f221; + selp.f32 %f223, 0f3F20D842, 0f3E0375D3, %p13; + fma.rn.f32 %f224, %f222, %f211, %f223; + neg.f32 %f225, %f209; + selp.f32 %f226, %f225, %f16, %p13; + fma.rn.f32 %f1178, %f224, %f226, %f226; + @%p12 bra $L__BB9_10; + + ex2.approx.ftz.f32 %f227, %f1178; + mov.f32 %f228, 0f3F800000; + sub.f32 %f229, %f228, %f227; + mov.b32 %r229, %f229; + mov.b32 %r230, %f16; + and.b32 %r231, %r230, -2147483648; + or.b32 %r232, %r231, %r229; + mov.b32 %f1178, %r232; + +$L__BB9_10: + sub.f32 %f20, %f1177, %f1178; + sub.f32 %f21, %f7, %f9; + add.f32 %f230, %f21, 0f3F000000; + mul.f32 %f22, %f230, %f6; + abs.f32 %f231, %f22; + setp.ltu.f32 %p14, %f231, 0f3F8060FE; + setp.ge.f32 %p15, %f231, 0f3F8060FE; + mul.f32 %f232, %f22, %f22; + selp.f32 %f233, %f231, %f232, %p15; + selp.f32 %f234, 0f3789CA3C, 0f38B1E96A, %p15; + selp.f32 %f235, 0fB9F560B9, 0fBA574D20, %p15; + fma.rn.f32 %f236, %f234, %f233, %f235; + selp.f32 %f237, 0f3BAC840B, 0f3BAAD5EA, %p15; + fma.rn.f32 %f238, %f236, %f233, %f237; + selp.f32 %f239, 0fBD0C8162, 0fBCDC1BE7, %p15; + fma.rn.f32 %f240, %f238, %f233, %f239; + selp.f32 %f241, 0f3E1CF906, 0f3DE718AF, %p15; + fma.rn.f32 %f242, %f240, %f233, %f241; + selp.f32 %f243, 0f3F6A937E, 0fBEC093AC, %p15; + fma.rn.f32 %f244, %f242, %f233, %f243; + selp.f32 %f245, 0f3F20D842, 0f3E0375D3, %p15; + fma.rn.f32 %f246, %f244, %f233, %f245; + neg.f32 %f247, %f231; + selp.f32 %f248, %f247, %f22, %p15; + fma.rn.f32 %f1179, %f246, %f248, %f248; + @%p14 bra $L__BB9_12; + + ex2.approx.ftz.f32 %f249, %f1179; + mov.f32 %f250, 0f3F800000; + sub.f32 %f251, %f250, %f249; + mov.b32 %r233, %f251; + mov.b32 %r234, %f22; + and.b32 %r235, %r234, -2147483648; + or.b32 %r236, %r235, %r233; + mov.b32 %f1179, %r236; + +$L__BB9_12: + add.f32 %f252, %f21, 0fBF000000; + mul.f32 %f26, %f252, %f6; + abs.f32 %f253, %f26; + setp.ltu.f32 %p16, %f253, 0f3F8060FE; + setp.ge.f32 %p17, %f253, 0f3F8060FE; + mul.f32 %f254, %f26, %f26; + selp.f32 %f255, %f253, %f254, %p17; + selp.f32 %f256, 0f3789CA3C, 0f38B1E96A, %p17; + selp.f32 %f257, 0fB9F560B9, 0fBA574D20, %p17; + fma.rn.f32 %f258, %f256, %f255, %f257; + selp.f32 %f259, 0f3BAC840B, 0f3BAAD5EA, %p17; + fma.rn.f32 %f260, %f258, %f255, %f259; + selp.f32 %f261, 0fBD0C8162, 0fBCDC1BE7, %p17; + fma.rn.f32 %f262, %f260, %f255, %f261; + selp.f32 %f263, 0f3E1CF906, 0f3DE718AF, %p17; + fma.rn.f32 %f264, %f262, %f255, %f263; + selp.f32 %f265, 0f3F6A937E, 0fBEC093AC, %p17; + fma.rn.f32 %f266, %f264, %f255, %f265; + selp.f32 %f267, 0f3F20D842, 0f3E0375D3, %p17; + fma.rn.f32 %f268, %f266, %f255, %f267; + neg.f32 %f269, %f253; + selp.f32 %f270, %f269, %f26, %p17; + fma.rn.f32 %f1180, %f268, %f270, %f270; + @%p16 bra $L__BB9_14; + + ex2.approx.ftz.f32 %f271, %f1180; + mov.f32 %f272, 0f3F800000; + sub.f32 %f273, %f272, %f271; + mov.b32 %r237, %f273; + mov.b32 %r238, %f26; + and.b32 %r239, %r238, -2147483648; + or.b32 %r240, %r239, %r237; + mov.b32 %f1180, %r240; + +$L__BB9_14: + setp.neu.f32 %p18, %f10, 0f00000000; + selp.f32 %f274, 0f3F800000, 0f00000000, %p18; + sub.f32 %f275, %f1179, %f1180; + mul.f32 %f276, %f275, 0f3F000000; + mul.f32 %f277, %f276, %f274; + mul.f32 %f278, %f20, 0f3F000000; + mul.f32 %f279, %f278, %f274; + mul.f32 %f280, %f279, %f181; + fma.rn.f32 %f1181, %f280, %f277, %f1181; + add.s32 %r542, %r542, 1; + setp.lt.s32 %p19, %r542, %r214; + @%p19 bra $L__BB9_6; + +$L__BB9_15: + mov.u32 %r545, 0; + @%p9 bra $L__BB9_74; + + sqrt.rn.f32 %f32, %f1; + mov.u32 %r544, %r545; + +$L__BB9_17: + cvt.rn.f32.s32 %f1133, %r540; + add.s32 %r244, %r544, %r3; + mul.wide.s32 %rd73, %r244, 4; + add.s64 %rd74, %rd8, %rd73; + add.s64 %rd75, %rd7, %rd73; + ld.global.f32 %f36, [%rd75]; + ld.global.f32 %f37, [%rd74]; + sub.f32 %f38, %f1133, %f37; + add.f32 %f286, %f38, 0f3F000000; + mul.f32 %f39, %f286, %f32; + abs.f32 %f287, %f39; + setp.ltu.f32 %p21, %f287, 0f3F8060FE; + setp.ge.f32 %p22, %f287, 0f3F8060FE; + mul.f32 %f288, %f39, %f39; + selp.f32 %f289, %f287, %f288, %p22; + selp.f32 %f290, 0f3789CA3C, 0f38B1E96A, %p22; + selp.f32 %f291, 0fB9F560B9, 0fBA574D20, %p22; + fma.rn.f32 %f292, %f290, %f289, %f291; + selp.f32 %f293, 0f3BAC840B, 0f3BAAD5EA, %p22; + fma.rn.f32 %f294, %f292, %f289, %f293; + selp.f32 %f295, 0fBD0C8162, 0fBCDC1BE7, %p22; + fma.rn.f32 %f296, %f294, %f289, %f295; + selp.f32 %f297, 0f3E1CF906, 0f3DE718AF, %p22; + fma.rn.f32 %f298, %f296, %f289, %f297; + selp.f32 %f299, 0f3F6A937E, 0fBEC093AC, %p22; + fma.rn.f32 %f300, %f298, %f289, %f299; + selp.f32 %f301, 0f3F20D842, 0f3E0375D3, %p22; + fma.rn.f32 %f302, %f300, %f289, %f301; + neg.f32 %f303, %f287; + selp.f32 %f304, %f303, %f39, %p22; + fma.rn.f32 %f1182, %f302, %f304, %f304; + @%p21 bra $L__BB9_19; + + mov.f32 %f1161, 0f3F800000; + ex2.approx.ftz.f32 %f305, %f1182; + sub.f32 %f307, %f1161, %f305; + mov.b32 %r245, %f307; + mov.b32 %r246, %f39; + and.b32 %r247, %r246, -2147483648; + or.b32 %r248, %r247, %r245; + mov.b32 %f1182, %r248; + +$L__BB9_19: + add.f32 %f43, %f38, 0fBF000000; + mul.f32 %f44, %f43, %f32; + abs.f32 %f308, %f44; + setp.ltu.f32 %p23, %f308, 0f3F8060FE; + setp.ge.f32 %p24, %f308, 0f3F8060FE; + mul.f32 %f309, %f44, %f44; + selp.f32 %f310, %f308, %f309, %p24; + selp.f32 %f311, 0f3789CA3C, 0f38B1E96A, %p24; + selp.f32 %f312, 0fB9F560B9, 0fBA574D20, %p24; + fma.rn.f32 %f313, %f311, %f310, %f312; + selp.f32 %f314, 0f3BAC840B, 0f3BAAD5EA, %p24; fma.rn.f32 %f315, %f313, %f310, %f314; - mov.f32 %f316, 0f3DE70331; + selp.f32 %f316, 0fBD0C8162, 0fBCDC1BE7, %p24; fma.rn.f32 %f317, %f315, %f310, %f316; - mov.f32 %f318, 0fBEC09330; + selp.f32 %f318, 0f3E1CF906, 0f3DE718AF, %p24; fma.rn.f32 %f319, %f317, %f310, %f318; - mov.f32 %f320, 0f3F906EBA; + selp.f32 %f320, 0f3F6A937E, 0fBEC093AC, %p24; fma.rn.f32 %f321, %f319, %f310, %f320; - mul.f32 %f1421, %f29, %f321; - bra.uni BB9_18; - -BB9_16: - mov.f32 %f292, 0f3A03BB71; - mov.f32 %f293, 0fB7B730FB; - fma.rn.f32 %f294, %f293, %f30, %f292; - mov.f32 %f295, 0fBBACA3B3; - fma.rn.f32 %f296, %f294, %f30, %f295; - mov.f32 %f297, 0f3D0A7445; - fma.rn.f32 %f298, %f296, %f30, %f297; - mov.f32 %f299, 0fBE1B3B75; - fma.rn.f32 %f300, %f298, %f30, %f299; - mov.f32 %f301, 0fBF6B385A; - fma.rn.f32 %f302, %f300, %f30, %f301; - mov.f32 %f303, 0fBFD0316E; - fma.rn.f32 %f304, %f302, %f30, %f303; - mov.f32 %f305, 0fBA031CCE; - fma.rn.f32 %f306, %f304, %f30, %f305; - ex2.approx.ftz.f32 %f307, %f306; - mov.f32 %f308, 0f3F800000; - sub.f32 %f309, %f308, %f307; - mov.b32 %r322, %f309; - setp.ltu.f32 %p16, %f30, 0f407AD445; - selp.b32 %r323, %r322, 1065353216, %p16; - mov.b32 %r324, %f29; - and.b32 %r325, %r324, -2147483648; - or.b32 %r326, %r323, %r325; - mov.b32 %f1421, %r326; - -BB9_18: - sub.f32 %f322, %f1420, %f1421; - mul.f32 %f323, %f322, 0f3F000000; - mul.f32 %f324, %f21, %f323; - mul.f32 %f325, %f22, 0f3F000000; - mul.f32 %f326, %f21, %f325; - mul.f32 %f327, %f326, %f195; - fma.rn.f32 %f1422, %f327, %f324, %f1422; - add.s32 %r821, %r821, 1; - setp.lt.s32 %p17, %r821, %r290; - @%p17 bra BB9_6; - -BB9_19: - mov.u32 %r824, 0; - @%p7 bra BB9_78; - - sqrt.rn.f32 %f36, %f1; - mov.u32 %r824, 0; - mov.u32 %r823, %r824; - -BB9_21: - cvt.rn.f32.s32 %f1376, %r819; - add.s32 %r330, %r823, %r1; - cvta.to.global.u64 %rd78, %rd60; - mul.wide.s32 %rd79, %r330, 4; - add.s64 %rd80, %rd78, %rd79; - cvta.to.global.u64 %rd81, %rd61; - add.s64 %rd82, %rd81, %rd79; - ld.global.f32 %f39, [%rd82]; - ld.global.f32 %f40, [%rd80]; - sub.f32 %f41, %f1376, %f40; - add.f32 %f333, %f41, 0f3F800000; - mul.f32 %f42, %f333, %f36; - abs.f32 %f43, %f42; - setp.ltu.f32 %p19, %f43, 0f3F800000; - @%p19 bra BB9_23; - bra.uni BB9_22; - -BB9_23: - mul.f32 %f352, %f42, %f42; - mov.f32 %f353, 0f3BA0C9F8; - mov.f32 %f354, 0fBA1268FB; - fma.rn.f32 %f355, %f354, %f352, %f353; - mov.f32 %f356, 0fBCDABFD4; - fma.rn.f32 %f357, %f355, %f352, %f356; - mov.f32 %f358, 0f3DE70331; - fma.rn.f32 %f359, %f357, %f352, %f358; - mov.f32 %f360, 0fBEC09330; - fma.rn.f32 %f361, %f359, %f352, %f360; - mov.f32 %f362, 0f3F906EBA; - fma.rn.f32 %f363, %f361, %f352, %f362; - mul.f32 %f1423, %f42, %f363; - bra.uni BB9_24; - -BB9_22: - mov.f32 %f1377, 0f3F800000; - mov.f32 %f334, 0f3A03BB71; - mov.f32 %f335, 0fB7B730FB; - fma.rn.f32 %f336, %f335, %f43, %f334; - mov.f32 %f337, 0fBBACA3B3; - fma.rn.f32 %f338, %f336, %f43, %f337; - mov.f32 %f339, 0f3D0A7445; - fma.rn.f32 %f340, %f338, %f43, %f339; - mov.f32 %f341, 0fBE1B3B75; - fma.rn.f32 %f342, %f340, %f43, %f341; - mov.f32 %f343, 0fBF6B385A; - fma.rn.f32 %f344, %f342, %f43, %f343; - mov.f32 %f345, 0fBFD0316E; - fma.rn.f32 %f346, %f344, %f43, %f345; - mov.f32 %f347, 0fBA031CCE; - fma.rn.f32 %f348, %f346, %f43, %f347; - ex2.approx.ftz.f32 %f349, %f348; - sub.f32 %f351, %f1377, %f349; - mov.b32 %r331, %f351; - setp.ltu.f32 %p20, %f43, 0f407AD445; - selp.b32 %r332, %r331, 1065353216, %p20; - mov.b32 %r333, %f42; - and.b32 %r334, %r333, -2147483648; - or.b32 %r335, %r332, %r334; - mov.b32 %f1423, %r335; - -BB9_24: - mul.f32 %f47, %f41, %f36; - abs.f32 %f48, %f47; - setp.ltu.f32 %p21, %f48, 0f3F800000; - @%p21 bra BB9_26; - bra.uni BB9_25; - -BB9_26: - mul.f32 %f382, %f47, %f47; - mov.f32 %f383, 0f3BA0C9F8; - mov.f32 %f384, 0fBA1268FB; - fma.rn.f32 %f385, %f384, %f382, %f383; - mov.f32 %f386, 0fBCDABFD4; - fma.rn.f32 %f387, %f385, %f382, %f386; - mov.f32 %f388, 0f3DE70331; - fma.rn.f32 %f389, %f387, %f382, %f388; - mov.f32 %f390, 0fBEC09330; - fma.rn.f32 %f391, %f389, %f382, %f390; - mov.f32 %f392, 0f3F906EBA; - fma.rn.f32 %f393, %f391, %f382, %f392; - mul.f32 %f1424, %f47, %f393; - bra.uni BB9_27; - -BB9_25: - mov.f32 %f1378, 0f3F800000; - mov.f32 %f364, 0f3A03BB71; - mov.f32 %f365, 0fB7B730FB; - fma.rn.f32 %f366, %f365, %f48, %f364; - mov.f32 %f367, 0fBBACA3B3; - fma.rn.f32 %f368, %f366, %f48, %f367; - mov.f32 %f369, 0f3D0A7445; - fma.rn.f32 %f370, %f368, %f48, %f369; - mov.f32 %f371, 0fBE1B3B75; - fma.rn.f32 %f372, %f370, %f48, %f371; - mov.f32 %f373, 0fBF6B385A; - fma.rn.f32 %f374, %f372, %f48, %f373; - mov.f32 %f375, 0fBFD0316E; - fma.rn.f32 %f376, %f374, %f48, %f375; - mov.f32 %f377, 0fBA031CCE; - fma.rn.f32 %f378, %f376, %f48, %f377; - ex2.approx.ftz.f32 %f379, %f378; - sub.f32 %f381, %f1378, %f379; - mov.b32 %r336, %f381; - setp.ltu.f32 %p22, %f48, 0f407AD445; - selp.b32 %r337, %r336, 1065353216, %p22; - mov.b32 %r338, %f47; - and.b32 %r339, %r338, -2147483648; - or.b32 %r340, %r337, %r339; - mov.b32 %f1424, %r340; - -BB9_27: - cvt.rn.f32.s32 %f1379, %r820; - sub.f32 %f52, %f1423, %f1424; - sub.f32 %f53, %f1379, %f39; - add.f32 %f394, %f53, 0f3F800000; - mul.f32 %f54, %f394, %f36; - abs.f32 %f55, %f54; - setp.ltu.f32 %p23, %f55, 0f3F800000; - @%p23 bra BB9_29; - bra.uni BB9_28; - -BB9_29: - mul.f32 %f413, %f54, %f54; - mov.f32 %f414, 0f3BA0C9F8; - mov.f32 %f415, 0fBA1268FB; - fma.rn.f32 %f416, %f415, %f413, %f414; - mov.f32 %f417, 0fBCDABFD4; - fma.rn.f32 %f418, %f416, %f413, %f417; - mov.f32 %f419, 0f3DE70331; - fma.rn.f32 %f420, %f418, %f413, %f419; - mov.f32 %f421, 0fBEC09330; - fma.rn.f32 %f422, %f420, %f413, %f421; - mov.f32 %f423, 0f3F906EBA; - fma.rn.f32 %f424, %f422, %f413, %f423; - mul.f32 %f1425, %f54, %f424; - bra.uni BB9_30; - -BB9_28: - mov.f32 %f1380, 0f3F800000; - mov.f32 %f395, 0f3A03BB71; - mov.f32 %f396, 0fB7B730FB; - fma.rn.f32 %f397, %f396, %f55, %f395; - mov.f32 %f398, 0fBBACA3B3; - fma.rn.f32 %f399, %f397, %f55, %f398; - mov.f32 %f400, 0f3D0A7445; - fma.rn.f32 %f401, %f399, %f55, %f400; - mov.f32 %f402, 0fBE1B3B75; - fma.rn.f32 %f403, %f401, %f55, %f402; - mov.f32 %f404, 0fBF6B385A; - fma.rn.f32 %f405, %f403, %f55, %f404; - mov.f32 %f406, 0fBFD0316E; - fma.rn.f32 %f407, %f405, %f55, %f406; - mov.f32 %f408, 0fBA031CCE; - fma.rn.f32 %f409, %f407, %f55, %f408; - ex2.approx.ftz.f32 %f410, %f409; - sub.f32 %f412, %f1380, %f410; - mov.b32 %r341, %f412; - setp.ltu.f32 %p24, %f55, 0f407AD445; - selp.b32 %r342, %r341, 1065353216, %p24; - mov.b32 %r343, %f54; - and.b32 %r344, %r343, -2147483648; - or.b32 %r345, %r342, %r344; - mov.b32 %f1425, %r345; - -BB9_30: - sub.f32 %f1381, %f1379, %f39; - mul.f32 %f59, %f1381, %f36; - abs.f32 %f60, %f59; - setp.ltu.f32 %p25, %f60, 0f3F800000; - @%p25 bra BB9_32; - bra.uni BB9_31; - -BB9_32: - mul.f32 %f443, %f59, %f59; - mov.f32 %f444, 0f3BA0C9F8; - mov.f32 %f445, 0fBA1268FB; - fma.rn.f32 %f446, %f445, %f443, %f444; - mov.f32 %f447, 0fBCDABFD4; - fma.rn.f32 %f448, %f446, %f443, %f447; - mov.f32 %f449, 0f3DE70331; - fma.rn.f32 %f450, %f448, %f443, %f449; - mov.f32 %f451, 0fBEC09330; - fma.rn.f32 %f452, %f450, %f443, %f451; - mov.f32 %f453, 0f3F906EBA; - fma.rn.f32 %f454, %f452, %f443, %f453; - mul.f32 %f1426, %f59, %f454; - bra.uni BB9_33; - -BB9_31: - mov.f32 %f1382, 0f3F800000; - mov.f32 %f425, 0f3A03BB71; - mov.f32 %f426, 0fB7B730FB; - fma.rn.f32 %f427, %f426, %f60, %f425; - mov.f32 %f428, 0fBBACA3B3; - fma.rn.f32 %f429, %f427, %f60, %f428; - mov.f32 %f430, 0f3D0A7445; - fma.rn.f32 %f431, %f429, %f60, %f430; - mov.f32 %f432, 0fBE1B3B75; - fma.rn.f32 %f433, %f431, %f60, %f432; - mov.f32 %f434, 0fBF6B385A; - fma.rn.f32 %f435, %f433, %f60, %f434; - mov.f32 %f436, 0fBFD0316E; - fma.rn.f32 %f437, %f435, %f60, %f436; - mov.f32 %f438, 0fBA031CCE; - fma.rn.f32 %f439, %f437, %f60, %f438; - ex2.approx.ftz.f32 %f440, %f439; - sub.f32 %f442, %f1382, %f440; - mov.b32 %r346, %f442; - setp.ltu.f32 %p26, %f60, 0f407AD445; - selp.b32 %r347, %r346, 1065353216, %p26; - mov.b32 %r348, %f59; - and.b32 %r349, %r348, -2147483648; - or.b32 %r350, %r347, %r349; - mov.b32 %f1426, %r350; - -BB9_33: - mov.f32 %f1384, 0f40000000; - cvt.rn.f32.s32 %f1383, %r819; - sub.f32 %f457, %f1425, %f1426; - mul.f32 %f458, %f457, 0f3F000000; - setp.neu.f32 %p27, %f40, 0f00000000; - selp.f32 %f64, %f458, 0f00000000, %p27; - add.f32 %f459, %f1383, 0f3F800000; - sub.f32 %f460, %f459, %f40; - div.rn.f32 %f65, %f460, %f196; - abs.f32 %f66, %f65; - setp.lt.f32 %p28, %f66, 0f00800000; - mul.f32 %f461, %f66, 0f4B800000; - selp.f32 %f462, 0fC3170000, 0fC2FE0000, %p28; - selp.f32 %f463, %f461, %f66, %p28; - mov.b32 %r351, %f463; - and.b32 %r352, %r351, 8388607; - or.b32 %r353, %r352, 1065353216; - mov.b32 %f464, %r353; - shr.u32 %r354, %r351, 23; - cvt.rn.f32.u32 %f465, %r354; - add.f32 %f466, %f462, %f465; - setp.gt.f32 %p29, %f464, 0f3FB504F3; - mul.f32 %f467, %f464, 0f3F000000; - add.f32 %f468, %f466, 0f3F800000; - selp.f32 %f469, %f467, %f464, %p29; - selp.f32 %f470, %f468, %f466, %p29; - add.f32 %f471, %f469, 0fBF800000; - add.f32 %f456, %f469, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f455,%f456; - // inline asm - add.f32 %f472, %f471, %f471; - mul.f32 %f473, %f455, %f472; - mul.f32 %f474, %f473, %f473; - mov.f32 %f475, 0f3C4CAF63; - mov.f32 %f476, 0f3B18F0FE; - fma.rn.f32 %f477, %f476, %f474, %f475; - mov.f32 %f478, 0f3DAAAABD; - fma.rn.f32 %f479, %f477, %f474, %f478; - mul.rn.f32 %f480, %f479, %f474; - mul.rn.f32 %f481, %f480, %f473; - sub.f32 %f482, %f471, %f473; - neg.f32 %f483, %f473; - add.f32 %f484, %f482, %f482; - fma.rn.f32 %f485, %f483, %f471, %f484; - mul.rn.f32 %f486, %f455, %f485; - add.f32 %f487, %f481, %f473; - sub.f32 %f488, %f473, %f487; - add.f32 %f489, %f481, %f488; - add.f32 %f490, %f486, %f489; - add.f32 %f491, %f487, %f490; - sub.f32 %f492, %f487, %f491; - add.f32 %f493, %f490, %f492; - mov.f32 %f494, 0f3F317200; - mul.rn.f32 %f495, %f470, %f494; - mov.f32 %f496, 0f35BFBE8E; - mul.rn.f32 %f497, %f470, %f496; - add.f32 %f498, %f495, %f491; - sub.f32 %f499, %f495, %f498; - add.f32 %f500, %f491, %f499; - add.f32 %f501, %f493, %f500; - add.f32 %f502, %f497, %f501; - add.f32 %f503, %f498, %f502; - sub.f32 %f504, %f498, %f503; - add.f32 %f505, %f502, %f504; - mul.rn.f32 %f507, %f1384, %f503; - neg.f32 %f508, %f507; - fma.rn.f32 %f509, %f1384, %f503, %f508; - fma.rn.f32 %f510, %f1384, %f505, %f509; - mov.f32 %f511, 0f00000000; - fma.rn.f32 %f512, %f511, %f503, %f510; - add.rn.f32 %f513, %f507, %f512; - neg.f32 %f514, %f513; - add.rn.f32 %f515, %f507, %f514; - add.rn.f32 %f516, %f515, %f512; - mov.b32 %r355, %f513; - setp.eq.s32 %p30, %r355, 1118925336; - add.s32 %r356, %r355, -1; - mov.b32 %f517, %r356; - add.f32 %f518, %f516, 0f37000000; - selp.f32 %f519, %f517, %f513, %p30; - selp.f32 %f67, %f518, %f516, %p30; - mul.f32 %f520, %f519, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f521, %f520; - mov.f32 %f522, 0fBF317200; - fma.rn.f32 %f523, %f521, %f522, %f519; - mov.f32 %f524, 0fB5BFBE8E; - fma.rn.f32 %f525, %f521, %f524, %f523; - mul.f32 %f526, %f525, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f527, %f526; - add.f32 %f528, %f521, 0f00000000; - ex2.approx.f32 %f529, %f528; - mul.f32 %f530, %f527, %f529; - setp.lt.f32 %p31, %f519, 0fC2D20000; - selp.f32 %f531, 0f00000000, %f530, %p31; - setp.gt.f32 %p32, %f519, 0f42D20000; - selp.f32 %f1427, 0f7F800000, %f531, %p32; - setp.eq.f32 %p33, %f1427, 0f7F800000; - @%p33 bra BB9_35; - - fma.rn.f32 %f1427, %f1427, %f67, %f1427; - -BB9_35: - mov.f32 %f1390, 0f3F800000; - cvt.rzi.f32.f32 %f1389, %f1390; - add.f32 %f1388, %f1389, %f1389; - mov.f32 %f1387, 0f40000000; - sub.f32 %f1386, %f1387, %f1388; - abs.f32 %f1385, %f1386; - setp.lt.f32 %p34, %f65, 0f00000000; - setp.eq.f32 %p35, %f1385, 0f3F800000; - and.pred %p1, %p34, %p35; - mov.b32 %r357, %f1427; - xor.b32 %r358, %r357, -2147483648; - mov.b32 %f532, %r358; - selp.f32 %f1429, %f532, %f1427, %p1; - setp.eq.f32 %p36, %f65, 0f00000000; - @%p36 bra BB9_38; - bra.uni BB9_36; - -BB9_38: - add.f32 %f535, %f65, %f65; - selp.f32 %f1429, %f535, 0f00000000, %p35; - bra.uni BB9_39; - -BB9_36: - setp.geu.f32 %p37, %f65, 0f00000000; - @%p37 bra BB9_39; - - mov.f32 %f1400, 0f40000000; - cvt.rzi.f32.f32 %f534, %f1400; - setp.neu.f32 %p38, %f534, 0f40000000; - selp.f32 %f1429, 0f7FFFFFFF, %f1429, %p38; - -BB9_39: - abs.f32 %f1406, %f65; - add.f32 %f536, %f1406, 0f40000000; - mov.b32 %r359, %f536; - setp.lt.s32 %p40, %r359, 2139095040; - @%p40 bra BB9_44; - - abs.f32 %f1409, %f65; - setp.gtu.f32 %p41, %f1409, 0f7F800000; - @%p41 bra BB9_43; - bra.uni BB9_41; - -BB9_43: - add.f32 %f1429, %f65, 0f40000000; - bra.uni BB9_44; - -BB9_41: - abs.f32 %f1410, %f65; - setp.neu.f32 %p42, %f1410, 0f7F800000; - @%p42 bra BB9_44; - - selp.f32 %f1429, 0fFF800000, 0f7F800000, %p1; - -BB9_44: - cvt.rn.f32.s32 %f1408, %r819; - sub.f32 %f1407, %f1408, %f40; - mov.f32 %f1399, 0f00000000; - mov.f32 %f1398, 0f35BFBE8E; - mov.f32 %f1397, 0f3F317200; - mov.f32 %f1396, 0f3DAAAABD; - mov.f32 %f1395, 0f3C4CAF63; - mov.f32 %f1394, 0f3B18F0FE; - mov.f32 %f1393, 0fB5BFBE8E; - mov.f32 %f1392, 0fBF317200; - mov.f32 %f1391, 0f40000000; - mul.f32 %f539, %f1429, 0fBF000000; - setp.eq.f32 %p43, %f65, 0f3F800000; - selp.f32 %f540, 0fBF000000, %f539, %p43; - mul.f32 %f541, %f540, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f542, %f541; - fma.rn.f32 %f544, %f542, %f1392, %f540; - fma.rn.f32 %f546, %f542, %f1393, %f544; - mul.f32 %f547, %f546, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f548, %f547; - add.f32 %f549, %f542, 0f00000000; - ex2.approx.f32 %f550, %f549; - mul.f32 %f551, %f548, %f550; - setp.lt.f32 %p44, %f540, 0fC2D20000; - selp.f32 %f552, 0f00000000, %f551, %p44; - setp.gt.f32 %p45, %f540, 0f42D20000; - selp.f32 %f78, 0f7F800000, %f552, %p45; - div.rn.f32 %f79, %f1407, %f196; - abs.f32 %f80, %f79; - setp.lt.f32 %p46, %f80, 0f00800000; - mul.f32 %f553, %f80, 0f4B800000; - selp.f32 %f554, 0fC3170000, 0fC2FE0000, %p46; - selp.f32 %f555, %f553, %f80, %p46; - mov.b32 %r360, %f555; - and.b32 %r361, %r360, 8388607; - or.b32 %r362, %r361, 1065353216; - mov.b32 %f556, %r362; - shr.u32 %r363, %r360, 23; - cvt.rn.f32.u32 %f557, %r363; - add.f32 %f558, %f554, %f557; - setp.gt.f32 %p47, %f556, 0f3FB504F3; - mul.f32 %f559, %f556, 0f3F000000; - add.f32 %f560, %f558, 0f3F800000; - selp.f32 %f561, %f559, %f556, %p47; - selp.f32 %f562, %f560, %f558, %p47; - add.f32 %f563, %f561, 0fBF800000; - add.f32 %f538, %f561, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f537,%f538; - // inline asm - add.f32 %f564, %f563, %f563; - mul.f32 %f565, %f537, %f564; - mul.f32 %f566, %f565, %f565; - fma.rn.f32 %f569, %f1394, %f566, %f1395; - fma.rn.f32 %f571, %f569, %f566, %f1396; - mul.rn.f32 %f572, %f571, %f566; - mul.rn.f32 %f573, %f572, %f565; - sub.f32 %f574, %f563, %f565; - neg.f32 %f575, %f565; - add.f32 %f576, %f574, %f574; - fma.rn.f32 %f577, %f575, %f563, %f576; - mul.rn.f32 %f578, %f537, %f577; - add.f32 %f579, %f573, %f565; - sub.f32 %f580, %f565, %f579; - add.f32 %f581, %f573, %f580; - add.f32 %f582, %f578, %f581; - add.f32 %f583, %f579, %f582; - sub.f32 %f584, %f579, %f583; - add.f32 %f585, %f582, %f584; - mul.rn.f32 %f587, %f562, %f1397; - mul.rn.f32 %f589, %f562, %f1398; - add.f32 %f590, %f587, %f583; - sub.f32 %f591, %f587, %f590; - add.f32 %f592, %f583, %f591; - add.f32 %f593, %f585, %f592; - add.f32 %f594, %f589, %f593; - add.f32 %f595, %f590, %f594; - sub.f32 %f596, %f590, %f595; - add.f32 %f597, %f594, %f596; - mul.rn.f32 %f599, %f1391, %f595; - neg.f32 %f600, %f599; - fma.rn.f32 %f601, %f1391, %f595, %f600; - fma.rn.f32 %f602, %f1391, %f597, %f601; - fma.rn.f32 %f604, %f1399, %f595, %f602; - add.rn.f32 %f605, %f599, %f604; - neg.f32 %f606, %f605; - add.rn.f32 %f607, %f599, %f606; - add.rn.f32 %f608, %f607, %f604; - mov.b32 %r364, %f605; - setp.eq.s32 %p48, %r364, 1118925336; - add.s32 %r365, %r364, -1; - mov.b32 %f609, %r365; - add.f32 %f610, %f608, 0f37000000; - selp.f32 %f611, %f609, %f605, %p48; - selp.f32 %f81, %f610, %f608, %p48; - mul.f32 %f612, %f611, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f613, %f612; - fma.rn.f32 %f614, %f613, %f1392, %f611; - fma.rn.f32 %f615, %f613, %f1393, %f614; - mul.f32 %f616, %f615, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f617, %f616; - add.f32 %f618, %f613, 0f00000000; - ex2.approx.f32 %f619, %f618; - mul.f32 %f620, %f617, %f619; - setp.lt.f32 %p49, %f611, 0fC2D20000; - selp.f32 %f621, 0f00000000, %f620, %p49; - setp.gt.f32 %p50, %f611, 0f42D20000; - selp.f32 %f1430, 0f7F800000, %f621, %p50; - setp.eq.f32 %p51, %f1430, 0f7F800000; - @%p51 bra BB9_46; - - fma.rn.f32 %f1430, %f1430, %f81, %f1430; - -BB9_46: - setp.lt.f32 %p52, %f79, 0f00000000; - and.pred %p2, %p52, %p35; - mov.b32 %r366, %f1430; - xor.b32 %r367, %r366, -2147483648; - mov.b32 %f622, %r367; - selp.f32 %f1432, %f622, %f1430, %p2; - setp.eq.f32 %p54, %f79, 0f00000000; - @%p54 bra BB9_49; - bra.uni BB9_47; - -BB9_49: - add.f32 %f625, %f79, %f79; - selp.f32 %f1432, %f625, 0f00000000, %p35; - bra.uni BB9_50; - -BB9_47: - setp.geu.f32 %p55, %f79, 0f00000000; - @%p55 bra BB9_50; - - mov.f32 %f1405, 0f40000000; - cvt.rzi.f32.f32 %f624, %f1405; - setp.neu.f32 %p56, %f624, 0f40000000; - selp.f32 %f1432, 0f7FFFFFFF, %f1432, %p56; - -BB9_50: - abs.f32 %f1351, %f79; - add.f32 %f626, %f1351, 0f40000000; - mov.b32 %r368, %f626; - setp.lt.s32 %p58, %r368, 2139095040; - @%p58 bra BB9_55; - - abs.f32 %f1403, %f79; - setp.gtu.f32 %p59, %f1403, 0f7F800000; - @%p59 bra BB9_54; - bra.uni BB9_52; - -BB9_54: - add.f32 %f1432, %f79, 0f40000000; - bra.uni BB9_55; - -BB9_52: - abs.f32 %f1404, %f79; - setp.neu.f32 %p60, %f1404, 0f7F800000; - @%p60 bra BB9_55; - - selp.f32 %f1432, 0fFF800000, 0f7F800000, %p2; - -BB9_55: - mov.f32 %f1361, 0f00000000; - mov.f32 %f1360, 0f35BFBE8E; - mov.f32 %f1359, 0f3F317200; - mov.f32 %f1358, 0f3DAAAABD; - mov.f32 %f1357, 0f3C4CAF63; - mov.f32 %f1356, 0f3B18F0FE; - mov.f32 %f1355, 0fB5BFBE8E; - mov.f32 %f1354, 0fBF317200; - mov.f32 %f1353, 0f40000000; - cvt.rn.f32.s32 %f1352, %r820; - mul.f32 %f629, %f1432, 0fBF000000; - setp.eq.f32 %p61, %f79, 0f3F800000; - selp.f32 %f630, 0fBF000000, %f629, %p61; - mul.f32 %f631, %f630, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f632, %f631; - fma.rn.f32 %f634, %f632, %f1354, %f630; - fma.rn.f32 %f636, %f632, %f1355, %f634; - mul.f32 %f637, %f636, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f638, %f637; - add.f32 %f639, %f632, 0f00000000; - ex2.approx.f32 %f640, %f639; - mul.f32 %f641, %f638, %f640; - setp.lt.f32 %p62, %f630, 0fC2D20000; - selp.f32 %f642, 0f00000000, %f641, %p62; - setp.gt.f32 %p63, %f630, 0f42D20000; - selp.f32 %f643, 0f7F800000, %f642, %p63; - sub.f32 %f644, %f78, %f643; - mul.f32 %f645, %f2, %f644; - mul.f32 %f92, %f64, %f645; - add.f32 %f646, %f1352, 0f3F800000; - sub.f32 %f647, %f646, %f39; - div.rn.f32 %f93, %f647, %f196; - abs.f32 %f94, %f93; - setp.lt.f32 %p64, %f94, 0f00800000; - mul.f32 %f648, %f94, 0f4B800000; - selp.f32 %f649, 0fC3170000, 0fC2FE0000, %p64; - selp.f32 %f650, %f648, %f94, %p64; - mov.b32 %r369, %f650; - and.b32 %r370, %r369, 8388607; - or.b32 %r371, %r370, 1065353216; - mov.b32 %f651, %r371; - shr.u32 %r372, %r369, 23; - cvt.rn.f32.u32 %f652, %r372; - add.f32 %f653, %f649, %f652; - setp.gt.f32 %p65, %f651, 0f3FB504F3; - mul.f32 %f654, %f651, 0f3F000000; - add.f32 %f655, %f653, 0f3F800000; - selp.f32 %f656, %f654, %f651, %p65; - selp.f32 %f657, %f655, %f653, %p65; - add.f32 %f658, %f656, 0fBF800000; - add.f32 %f628, %f656, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f627,%f628; - // inline asm - add.f32 %f659, %f658, %f658; - mul.f32 %f660, %f627, %f659; - mul.f32 %f661, %f660, %f660; - fma.rn.f32 %f664, %f1356, %f661, %f1357; - fma.rn.f32 %f666, %f664, %f661, %f1358; - mul.rn.f32 %f667, %f666, %f661; - mul.rn.f32 %f668, %f667, %f660; - sub.f32 %f669, %f658, %f660; - neg.f32 %f670, %f660; - add.f32 %f671, %f669, %f669; - fma.rn.f32 %f672, %f670, %f658, %f671; - mul.rn.f32 %f673, %f627, %f672; - add.f32 %f674, %f668, %f660; - sub.f32 %f675, %f660, %f674; - add.f32 %f676, %f668, %f675; - add.f32 %f677, %f673, %f676; - add.f32 %f678, %f674, %f677; - sub.f32 %f679, %f674, %f678; + selp.f32 %f322, 0f3F20D842, 0f3E0375D3, %p24; + fma.rn.f32 %f323, %f321, %f310, %f322; + neg.f32 %f324, %f308; + selp.f32 %f325, %f324, %f44, %p24; + fma.rn.f32 %f1183, %f323, %f325, %f325; + @%p23 bra $L__BB9_21; + + mov.f32 %f1160, 0f3F800000; + ex2.approx.ftz.f32 %f326, %f1183; + sub.f32 %f328, %f1160, %f326; + mov.b32 %r249, %f328; + mov.b32 %r250, %f44; + and.b32 %r251, %r250, -2147483648; + or.b32 %r252, %r251, %r249; + mov.b32 %f1183, %r252; + +$L__BB9_21: + cvt.rn.f32.s32 %f1134, %r541; + sub.f32 %f48, %f1182, %f1183; + sub.f32 %f50, %f1134, %f36; + add.f32 %f329, %f50, 0f3F000000; + mul.f32 %f51, %f329, %f32; + abs.f32 %f330, %f51; + setp.ltu.f32 %p26, %f330, 0f3F8060FE; + setp.ge.f32 %p27, %f330, 0f3F8060FE; + mul.f32 %f331, %f51, %f51; + selp.f32 %f332, %f330, %f331, %p27; + selp.f32 %f333, 0f3789CA3C, 0f38B1E96A, %p27; + selp.f32 %f334, 0fB9F560B9, 0fBA574D20, %p27; + fma.rn.f32 %f335, %f333, %f332, %f334; + selp.f32 %f336, 0f3BAC840B, 0f3BAAD5EA, %p27; + fma.rn.f32 %f337, %f335, %f332, %f336; + selp.f32 %f338, 0fBD0C8162, 0fBCDC1BE7, %p27; + fma.rn.f32 %f339, %f337, %f332, %f338; + selp.f32 %f340, 0f3E1CF906, 0f3DE718AF, %p27; + fma.rn.f32 %f341, %f339, %f332, %f340; + selp.f32 %f342, 0f3F6A937E, 0fBEC093AC, %p27; + fma.rn.f32 %f343, %f341, %f332, %f342; + selp.f32 %f344, 0f3F20D842, 0f3E0375D3, %p27; + fma.rn.f32 %f345, %f343, %f332, %f344; + neg.f32 %f346, %f330; + selp.f32 %f347, %f346, %f51, %p27; + fma.rn.f32 %f1184, %f345, %f347, %f347; + @%p26 bra $L__BB9_23; + + mov.f32 %f1159, 0f3F800000; + ex2.approx.ftz.f32 %f348, %f1184; + sub.f32 %f350, %f1159, %f348; + mov.b32 %r253, %f350; + mov.b32 %r254, %f51; + and.b32 %r255, %r254, -2147483648; + or.b32 %r256, %r255, %r253; + mov.b32 %f1184, %r256; + +$L__BB9_23: + cvt.rn.f32.s32 %f1136, %r541; + sub.f32 %f1135, %f1134, %f36; + add.f32 %f55, %f1135, 0fBF000000; + mul.f32 %f56, %f55, %f32; + abs.f32 %f351, %f56; + setp.ltu.f32 %p28, %f351, 0f3F8060FE; + setp.ge.f32 %p29, %f351, 0f3F8060FE; + mul.f32 %f352, %f56, %f56; + selp.f32 %f353, %f351, %f352, %p29; + selp.f32 %f354, 0f3789CA3C, 0f38B1E96A, %p29; + selp.f32 %f355, 0fB9F560B9, 0fBA574D20, %p29; + fma.rn.f32 %f356, %f354, %f353, %f355; + selp.f32 %f357, 0f3BAC840B, 0f3BAAD5EA, %p29; + fma.rn.f32 %f358, %f356, %f353, %f357; + selp.f32 %f359, 0fBD0C8162, 0fBCDC1BE7, %p29; + fma.rn.f32 %f360, %f358, %f353, %f359; + selp.f32 %f361, 0f3E1CF906, 0f3DE718AF, %p29; + fma.rn.f32 %f362, %f360, %f353, %f361; + selp.f32 %f363, 0f3F6A937E, 0fBEC093AC, %p29; + fma.rn.f32 %f364, %f362, %f353, %f363; + selp.f32 %f365, 0f3F20D842, 0f3E0375D3, %p29; + fma.rn.f32 %f366, %f364, %f353, %f365; + neg.f32 %f367, %f351; + selp.f32 %f368, %f367, %f56, %p29; + fma.rn.f32 %f1185, %f366, %f368, %f368; + @%p28 bra $L__BB9_25; + + mov.f32 %f1158, 0f3F800000; + ex2.approx.ftz.f32 %f369, %f1185; + sub.f32 %f371, %f1158, %f369; + mov.b32 %r257, %f371; + mov.b32 %r258, %f56; + and.b32 %r259, %r258, -2147483648; + or.b32 %r260, %r259, %r257; + mov.b32 %f1185, %r260; + +$L__BB9_25: + setp.neu.f32 %p248, %f37, 0f00000000; + selp.f32 %f1140, 0f3F800000, 0f00000000, %p248; + mov.f32 %f1139, 0f40000000; + cvt.rn.f32.s32 %f1138, %r540; + add.f32 %f1137, %f1138, 0f3F000000; + sub.f32 %f373, %f1184, %f1185; + mul.f32 %f374, %f373, 0f3F000000; + mul.f32 %f60, %f374, %f1140; + sub.f32 %f375, %f1137, %f37; + div.rn.f32 %f61, %f375, %f182; + abs.f32 %f62, %f61; + setp.lt.f32 %p30, %f62, 0f00800000; + mul.f32 %f376, %f62, 0f4B800000; + selp.f32 %f377, %f376, %f62, %p30; + selp.f32 %f378, 0fC3170000, 0fC2FE0000, %p30; + mov.b32 %r261, %f377; + and.b32 %r262, %r261, 8388607; + or.b32 %r263, %r262, 1065353216; + mov.b32 %f379, %r263; + shr.u32 %r264, %r261, 23; + cvt.rn.f32.u32 %f380, %r264; + add.f32 %f381, %f378, %f380; + setp.gt.f32 %p31, %f379, 0f3FB504F3; + mul.f32 %f382, %f379, 0f3F000000; + add.f32 %f383, %f381, 0f3F800000; + selp.f32 %f384, %f383, %f381, %p31; + selp.f32 %f385, %f382, %f379, %p31; + add.f32 %f386, %f385, 0fBF800000; + add.f32 %f387, %f385, 0f3F800000; + rcp.approx.ftz.f32 %f388, %f387; + add.f32 %f389, %f386, %f386; + mul.f32 %f391, %f389, %f388; + mul.f32 %f392, %f391, %f391; + mov.f32 %f393, 0f3C4CAF63; + mov.f32 %f394, 0f3B18F0FE; + fma.rn.f32 %f395, %f394, %f392, %f393; + mov.f32 %f396, 0f3DAAAABD; + fma.rn.f32 %f397, %f395, %f392, %f396; + mul.rn.f32 %f398, %f397, %f392; + mul.rn.f32 %f399, %f398, %f391; + sub.f32 %f400, %f386, %f391; + add.f32 %f401, %f400, %f400; + neg.f32 %f402, %f391; + fma.rn.f32 %f403, %f402, %f386, %f401; + mul.rn.f32 %f404, %f388, %f403; + add.f32 %f405, %f399, %f391; + sub.f32 %f406, %f391, %f405; + add.f32 %f407, %f399, %f406; + add.f32 %f408, %f404, %f407; + add.f32 %f409, %f405, %f408; + sub.f32 %f410, %f405, %f409; + add.f32 %f411, %f408, %f410; + mov.f32 %f412, 0f3F317200; + mul.rn.f32 %f413, %f384, %f412; + mov.f32 %f414, 0f35BFBE8E; + mul.rn.f32 %f415, %f384, %f414; + add.f32 %f416, %f413, %f409; + sub.f32 %f417, %f413, %f416; + add.f32 %f418, %f409, %f417; + add.f32 %f419, %f411, %f418; + add.f32 %f420, %f415, %f419; + add.f32 %f421, %f416, %f420; + sub.f32 %f422, %f416, %f421; + add.f32 %f423, %f420, %f422; + mul.rn.f32 %f424, %f1139, %f421; + neg.f32 %f425, %f424; + fma.rn.f32 %f426, %f1139, %f421, %f425; + fma.rn.f32 %f427, %f1139, %f423, %f426; + mov.f32 %f428, 0f00000000; + fma.rn.f32 %f429, %f428, %f421, %f427; + add.rn.f32 %f430, %f424, %f429; + neg.f32 %f431, %f430; + add.rn.f32 %f432, %f424, %f431; + add.rn.f32 %f433, %f432, %f429; + mov.b32 %r265, %f430; + setp.eq.s32 %p32, %r265, 1118925336; + add.s32 %r266, %r265, -1; + mov.b32 %f434, %r266; + add.f32 %f435, %f433, 0f37000000; + selp.f32 %f63, %f435, %f433, %p32; + selp.f32 %f436, %f434, %f430, %p32; + mov.f32 %f437, 0f3FB8AA3B; + mul.rn.f32 %f438, %f436, %f437; + cvt.rzi.f32.f32 %f439, %f438; + abs.f32 %f440, %f439; + setp.gt.f32 %p33, %f440, 0f42FC0000; + mov.b32 %r267, %f439; + and.b32 %r268, %r267, -2147483648; + or.b32 %r269, %r268, 1123811328; + mov.b32 %f441, %r269; + selp.f32 %f442, %f441, %f439, %p33; + mov.f32 %f443, 0fBF317218; + fma.rn.f32 %f444, %f442, %f443, %f436; + mov.f32 %f445, 0f3102E308; + fma.rn.f32 %f446, %f442, %f445, %f444; + mul.f32 %f447, %f446, 0f3FB8AA3B; + add.f32 %f448, %f442, 0f4B40007F; + mov.b32 %r270, %f448; + shl.b32 %r271, %r270, 23; + mov.b32 %f449, %r271; + ex2.approx.ftz.f32 %f450, %f447; + mul.f32 %f64, %f450, %f449; + setp.eq.f32 %p34, %f64, 0f7F800000; + mov.f32 %f1186, 0f7F800000; + @%p34 bra $L__BB9_27; + + fma.rn.f32 %f1186, %f64, %f63, %f64; + +$L__BB9_27: + mov.f32 %f1146, 0f3F800000; + cvt.rzi.f32.f32 %f1145, %f1146; + add.f32 %f1144, %f1145, %f1145; + mov.f32 %f1143, 0f40000000; + sub.f32 %f1142, %f1143, %f1144; + abs.f32 %f1141, %f1142; + setp.lt.f32 %p35, %f61, 0f00000000; + setp.eq.f32 %p36, %f1141, 0f3F800000; + and.pred %p1, %p35, %p36; + setp.eq.f32 %p37, %f61, 0f00000000; + @%p37 bra $L__BB9_31; + bra.uni $L__BB9_28; + +$L__BB9_31: + add.f32 %f455, %f61, %f61; + selp.f32 %f1188, %f455, 0f00000000, %p36; + bra.uni $L__BB9_32; + +$L__BB9_28: + mov.b32 %r272, %f1186; + xor.b32 %r273, %r272, -2147483648; + mov.b32 %f451, %r273; + selp.f32 %f1188, %f451, %f1186, %p1; + setp.geu.f32 %p38, %f61, 0f00000000; + @%p38 bra $L__BB9_32; + + mov.f32 %f1157, 0f40000000; + cvt.rzi.f32.f32 %f453, %f1157; + setp.eq.f32 %p39, %f453, 0f40000000; + @%p39 bra $L__BB9_32; + + mov.f32 %f1188, 0f7FFFFFFF; + +$L__BB9_32: + abs.f32 %f1167, %f61; + add.f32 %f456, %f1167, 0f40000000; + mov.b32 %r274, %f456; + setp.lt.s32 %p41, %r274, 2139095040; + @%p41 bra $L__BB9_37; + + abs.f32 %f1168, %f61; + setp.gtu.f32 %p42, %f1168, 0f7F800000; + @%p42 bra $L__BB9_36; + bra.uni $L__BB9_34; + +$L__BB9_36: + add.f32 %f1188, %f61, 0f40000000; + bra.uni $L__BB9_37; + +$L__BB9_34: + abs.f32 %f1169, %f61; + setp.neu.f32 %p43, %f1169, 0f7F800000; + @%p43 bra $L__BB9_37; + + selp.f32 %f1188, 0fFF800000, 0f7F800000, %p1; + +$L__BB9_37: + mov.f32 %f1156, 0f3102E308; + mov.f32 %f1155, 0fBF317218; + mov.f32 %f1154, 0f00000000; + mov.f32 %f1153, 0f35BFBE8E; + mov.f32 %f1152, 0f3F317200; + mov.f32 %f1151, 0f3DAAAABD; + mov.f32 %f1150, 0f3C4CAF63; + mov.f32 %f1149, 0f3B18F0FE; + mov.f32 %f1148, 0f3FB8AA3B; + mov.f32 %f1147, 0f40000000; + mul.f32 %f458, %f1188, 0fBF000000; + setp.eq.f32 %p44, %f61, 0f3F800000; + selp.f32 %f459, 0fBF000000, %f458, %p44; + mov.f32 %f461, 0f3BBB989D; + fma.rn.f32 %f462, %f459, %f461, %f183; + mov.f32 %f464, 0f437C0000; + cvt.sat.f32.f32 %f465, %f462; + mov.f32 %f466, 0f4B400001; + fma.rm.f32 %f467, %f465, %f464, %f466; + add.f32 %f468, %f467, 0fCB40007F; + neg.f32 %f469, %f468; + fma.rn.f32 %f470, %f459, %f1148, %f469; + mov.f32 %f471, 0f32A57060; + fma.rn.f32 %f472, %f459, %f471, %f470; + mov.b32 %r275, %f467; + shl.b32 %r276, %r275, 23; + mov.b32 %f473, %r276; + ex2.approx.ftz.f32 %f474, %f472; + mul.f32 %f73, %f474, %f473; + div.rn.f32 %f74, %f43, %f182; + abs.f32 %f75, %f74; + setp.lt.f32 %p45, %f75, 0f00800000; + mul.f32 %f475, %f75, 0f4B800000; + selp.f32 %f476, %f475, %f75, %p45; + selp.f32 %f477, 0fC3170000, 0fC2FE0000, %p45; + mov.b32 %r277, %f476; + and.b32 %r278, %r277, 8388607; + or.b32 %r279, %r278, 1065353216; + mov.b32 %f478, %r279; + shr.u32 %r280, %r277, 23; + cvt.rn.f32.u32 %f479, %r280; + add.f32 %f480, %f477, %f479; + setp.gt.f32 %p46, %f478, 0f3FB504F3; + mul.f32 %f481, %f478, 0f3F000000; + add.f32 %f482, %f480, 0f3F800000; + selp.f32 %f483, %f482, %f480, %p46; + selp.f32 %f484, %f481, %f478, %p46; + add.f32 %f485, %f484, 0fBF800000; + add.f32 %f486, %f484, 0f3F800000; + rcp.approx.ftz.f32 %f487, %f486; + add.f32 %f488, %f485, %f485; + mul.f32 %f490, %f488, %f487; + mul.f32 %f491, %f490, %f490; + fma.rn.f32 %f494, %f1149, %f491, %f1150; + fma.rn.f32 %f496, %f494, %f491, %f1151; + mul.rn.f32 %f497, %f496, %f491; + mul.rn.f32 %f498, %f497, %f490; + sub.f32 %f499, %f485, %f490; + add.f32 %f500, %f499, %f499; + neg.f32 %f501, %f490; + fma.rn.f32 %f502, %f501, %f485, %f500; + mul.rn.f32 %f503, %f487, %f502; + add.f32 %f504, %f498, %f490; + sub.f32 %f505, %f490, %f504; + add.f32 %f506, %f498, %f505; + add.f32 %f507, %f503, %f506; + add.f32 %f508, %f504, %f507; + sub.f32 %f509, %f504, %f508; + add.f32 %f510, %f507, %f509; + mul.rn.f32 %f512, %f483, %f1152; + mul.rn.f32 %f514, %f483, %f1153; + add.f32 %f515, %f512, %f508; + sub.f32 %f516, %f512, %f515; + add.f32 %f517, %f508, %f516; + add.f32 %f518, %f510, %f517; + add.f32 %f519, %f514, %f518; + add.f32 %f520, %f515, %f519; + sub.f32 %f521, %f515, %f520; + add.f32 %f522, %f519, %f521; + mul.rn.f32 %f523, %f1147, %f520; + neg.f32 %f524, %f523; + fma.rn.f32 %f525, %f1147, %f520, %f524; + fma.rn.f32 %f526, %f1147, %f522, %f525; + fma.rn.f32 %f528, %f1154, %f520, %f526; + add.rn.f32 %f529, %f523, %f528; + neg.f32 %f530, %f529; + add.rn.f32 %f531, %f523, %f530; + add.rn.f32 %f532, %f531, %f528; + mov.b32 %r281, %f529; + setp.eq.s32 %p47, %r281, 1118925336; + add.s32 %r282, %r281, -1; + mov.b32 %f533, %r282; + add.f32 %f534, %f532, 0f37000000; + selp.f32 %f76, %f534, %f532, %p47; + selp.f32 %f535, %f533, %f529, %p47; + mul.rn.f32 %f536, %f535, %f1148; + cvt.rzi.f32.f32 %f537, %f536; + abs.f32 %f538, %f537; + setp.gt.f32 %p48, %f538, 0f42FC0000; + mov.b32 %r283, %f537; + and.b32 %r284, %r283, -2147483648; + or.b32 %r285, %r284, 1123811328; + mov.b32 %f539, %r285; + selp.f32 %f540, %f539, %f537, %p48; + fma.rn.f32 %f542, %f540, %f1155, %f535; + fma.rn.f32 %f544, %f540, %f1156, %f542; + mul.f32 %f545, %f544, 0f3FB8AA3B; + add.f32 %f546, %f540, 0f4B40007F; + mov.b32 %r286, %f546; + shl.b32 %r287, %r286, 23; + mov.b32 %f547, %r287; + ex2.approx.ftz.f32 %f548, %f545; + mul.f32 %f77, %f548, %f547; + setp.eq.f32 %p49, %f77, 0f7F800000; + mov.f32 %f1189, 0f7F800000; + @%p49 bra $L__BB9_39; + + fma.rn.f32 %f1189, %f77, %f76, %f77; + +$L__BB9_39: + setp.lt.f32 %p50, %f74, 0f00000000; + and.pred %p2, %p50, %p36; + setp.eq.f32 %p52, %f74, 0f00000000; + @%p52 bra $L__BB9_43; + bra.uni $L__BB9_40; + +$L__BB9_43: + add.f32 %f553, %f74, %f74; + selp.f32 %f1191, %f553, 0f00000000, %p36; + bra.uni $L__BB9_44; + +$L__BB9_40: + mov.b32 %r288, %f1189; + xor.b32 %r289, %r288, -2147483648; + mov.b32 %f549, %r289; + selp.f32 %f1191, %f549, %f1189, %p2; + setp.geu.f32 %p53, %f74, 0f00000000; + @%p53 bra $L__BB9_44; + + mov.f32 %f1166, 0f40000000; + cvt.rzi.f32.f32 %f551, %f1166; + setp.eq.f32 %p54, %f551, 0f40000000; + @%p54 bra $L__BB9_44; + + mov.f32 %f1191, 0f7FFFFFFF; + +$L__BB9_44: + abs.f32 %f1092, %f74; + add.f32 %f554, %f1092, 0f40000000; + mov.b32 %r290, %f554; + setp.lt.s32 %p56, %r290, 2139095040; + @%p56 bra $L__BB9_49; + + abs.f32 %f1164, %f74; + setp.gtu.f32 %p57, %f1164, 0f7F800000; + @%p57 bra $L__BB9_48; + bra.uni $L__BB9_46; + +$L__BB9_48: + add.f32 %f1191, %f74, 0f40000000; + bra.uni $L__BB9_49; + +$L__BB9_46: + abs.f32 %f1165, %f74; + setp.neu.f32 %p58, %f1165, 0f7F800000; + @%p58 bra $L__BB9_49; + + selp.f32 %f1191, 0fFF800000, 0f7F800000, %p2; + +$L__BB9_49: + cvt.rn.f32.s32 %f1108, %r541; + add.f32 %f1107, %f1108, 0f3F000000; + mov.f32 %f1106, 0f32A57060; + mov.f32 %f1105, 0f4B400001; + mov.f32 %f1104, 0f437C0000; + mov.f32 %f1103, 0f3BBB989D; + mov.f32 %f1102, 0f3102E308; + mov.f32 %f1101, 0fBF317218; + mov.f32 %f1100, 0f00000000; + mov.f32 %f1099, 0f35BFBE8E; + mov.f32 %f1098, 0f3F317200; + mov.f32 %f1097, 0f3DAAAABD; + mov.f32 %f1096, 0f3C4CAF63; + mov.f32 %f1095, 0f3B18F0FE; + mov.f32 %f1094, 0f3FB8AA3B; + mov.f32 %f1093, 0f40000000; + mul.f32 %f556, %f1191, 0fBF000000; + setp.eq.f32 %p59, %f74, 0f3F800000; + selp.f32 %f557, 0fBF000000, %f556, %p59; + fma.rn.f32 %f560, %f557, %f1103, %f183; + cvt.sat.f32.f32 %f563, %f560; + fma.rm.f32 %f565, %f563, %f1104, %f1105; + add.f32 %f566, %f565, 0fCB40007F; + neg.f32 %f567, %f566; + fma.rn.f32 %f568, %f557, %f1094, %f567; + fma.rn.f32 %f570, %f557, %f1106, %f568; + mov.b32 %r291, %f565; + shl.b32 %r292, %r291, 23; + mov.b32 %f571, %r292; + ex2.approx.ftz.f32 %f572, %f570; + mul.f32 %f573, %f572, %f571; + sub.f32 %f574, %f73, %f573; + mul.f32 %f575, %f2, %f574; + mul.f32 %f86, %f60, %f575; + sub.f32 %f576, %f1107, %f36; + div.rn.f32 %f87, %f576, %f182; + abs.f32 %f88, %f87; + setp.lt.f32 %p60, %f88, 0f00800000; + mul.f32 %f577, %f88, 0f4B800000; + selp.f32 %f578, %f577, %f88, %p60; + selp.f32 %f579, 0fC3170000, 0fC2FE0000, %p60; + mov.b32 %r293, %f578; + and.b32 %r294, %r293, 8388607; + or.b32 %r295, %r294, 1065353216; + mov.b32 %f580, %r295; + shr.u32 %r296, %r293, 23; + cvt.rn.f32.u32 %f581, %r296; + add.f32 %f582, %f579, %f581; + setp.gt.f32 %p61, %f580, 0f3FB504F3; + mul.f32 %f583, %f580, 0f3F000000; + add.f32 %f584, %f582, 0f3F800000; + selp.f32 %f585, %f584, %f582, %p61; + selp.f32 %f586, %f583, %f580, %p61; + add.f32 %f587, %f586, 0fBF800000; + add.f32 %f588, %f586, 0f3F800000; + rcp.approx.ftz.f32 %f589, %f588; + add.f32 %f590, %f587, %f587; + mul.f32 %f592, %f590, %f589; + mul.f32 %f593, %f592, %f592; + fma.rn.f32 %f596, %f1095, %f593, %f1096; + fma.rn.f32 %f598, %f596, %f593, %f1097; + mul.rn.f32 %f599, %f598, %f593; + mul.rn.f32 %f600, %f599, %f592; + sub.f32 %f601, %f587, %f592; + add.f32 %f602, %f601, %f601; + neg.f32 %f603, %f592; + fma.rn.f32 %f604, %f603, %f587, %f602; + mul.rn.f32 %f605, %f589, %f604; + add.f32 %f606, %f600, %f592; + sub.f32 %f607, %f592, %f606; + add.f32 %f608, %f600, %f607; + add.f32 %f609, %f605, %f608; + add.f32 %f610, %f606, %f609; + sub.f32 %f611, %f606, %f610; + add.f32 %f612, %f609, %f611; + mul.rn.f32 %f614, %f585, %f1098; + mul.rn.f32 %f616, %f585, %f1099; + add.f32 %f617, %f614, %f610; + sub.f32 %f618, %f614, %f617; + add.f32 %f619, %f610, %f618; + add.f32 %f620, %f612, %f619; + add.f32 %f621, %f616, %f620; + add.f32 %f622, %f617, %f621; + sub.f32 %f623, %f617, %f622; + add.f32 %f624, %f621, %f623; + mul.rn.f32 %f625, %f1093, %f622; + neg.f32 %f626, %f625; + fma.rn.f32 %f627, %f1093, %f622, %f626; + fma.rn.f32 %f628, %f1093, %f624, %f627; + fma.rn.f32 %f630, %f1100, %f622, %f628; + add.rn.f32 %f631, %f625, %f630; + neg.f32 %f632, %f631; + add.rn.f32 %f633, %f625, %f632; + add.rn.f32 %f634, %f633, %f630; + mov.b32 %r297, %f631; + setp.eq.s32 %p62, %r297, 1118925336; + add.s32 %r298, %r297, -1; + mov.b32 %f635, %r298; + add.f32 %f636, %f634, 0f37000000; + selp.f32 %f89, %f636, %f634, %p62; + selp.f32 %f637, %f635, %f631, %p62; + mul.rn.f32 %f638, %f637, %f1094; + cvt.rzi.f32.f32 %f639, %f638; + abs.f32 %f640, %f639; + setp.gt.f32 %p63, %f640, 0f42FC0000; + mov.b32 %r299, %f639; + and.b32 %r300, %r299, -2147483648; + or.b32 %r301, %r300, 1123811328; + mov.b32 %f641, %r301; + selp.f32 %f642, %f641, %f639, %p63; + fma.rn.f32 %f644, %f642, %f1101, %f637; + fma.rn.f32 %f646, %f642, %f1102, %f644; + mul.f32 %f647, %f646, 0f3FB8AA3B; + add.f32 %f648, %f642, 0f4B40007F; + mov.b32 %r302, %f648; + shl.b32 %r303, %r302, 23; + mov.b32 %f649, %r303; + ex2.approx.ftz.f32 %f650, %f647; + mul.f32 %f90, %f650, %f649; + setp.eq.f32 %p64, %f90, 0f7F800000; + mov.f32 %f1192, 0f7F800000; + @%p64 bra $L__BB9_51; + + fma.rn.f32 %f1192, %f90, %f89, %f90; + +$L__BB9_51: + setp.lt.f32 %p65, %f87, 0f00000000; + and.pred %p3, %p65, %p36; + setp.eq.f32 %p67, %f87, 0f00000000; + @%p67 bra $L__BB9_55; + bra.uni $L__BB9_52; + +$L__BB9_55: + add.f32 %f655, %f87, %f87; + selp.f32 %f1194, %f655, 0f00000000, %p36; + bra.uni $L__BB9_56; + +$L__BB9_52: + mov.b32 %r304, %f1192; + xor.b32 %r305, %r304, -2147483648; + mov.b32 %f651, %r305; + selp.f32 %f1194, %f651, %f1192, %p3; + setp.geu.f32 %p68, %f87, 0f00000000; + @%p68 bra $L__BB9_56; + + mov.f32 %f1163, 0f40000000; + cvt.rzi.f32.f32 %f653, %f1163; + setp.eq.f32 %p69, %f653, 0f40000000; + @%p69 bra $L__BB9_56; + + mov.f32 %f1194, 0f7FFFFFFF; + +$L__BB9_56: + abs.f32 %f1170, %f87; + add.f32 %f656, %f1170, 0f40000000; + mov.b32 %r306, %f656; + setp.lt.s32 %p71, %r306, 2139095040; + @%p71 bra $L__BB9_61; + + abs.f32 %f1171, %f87; + setp.gtu.f32 %p72, %f1171, 0f7F800000; + @%p72 bra $L__BB9_60; + bra.uni $L__BB9_58; + +$L__BB9_60: + add.f32 %f1194, %f87, 0f40000000; + bra.uni $L__BB9_61; + +$L__BB9_58: + abs.f32 %f1172, %f87; + setp.neu.f32 %p73, %f1172, 0f7F800000; + @%p73 bra $L__BB9_61; + + selp.f32 %f1194, 0fFF800000, 0f7F800000, %p3; + +$L__BB9_61: + cvt.rn.f32.s32 %f1125, %r541; + sub.f32 %f1124, %f1125, %f36; + add.f32 %f1123, %f1124, 0fBF000000; + mov.f32 %f1122, 0f32A57060; + mov.f32 %f1121, 0f4B400001; + mov.f32 %f1120, 0f437C0000; + mov.f32 %f1119, 0f3BBB989D; + mov.f32 %f1118, 0f3102E308; + mov.f32 %f1117, 0fBF317218; + mov.f32 %f1116, 0f00000000; + mov.f32 %f1115, 0f35BFBE8E; + mov.f32 %f1114, 0f3F317200; + mov.f32 %f1113, 0f3DAAAABD; + mov.f32 %f1112, 0f3C4CAF63; + mov.f32 %f1111, 0f3B18F0FE; + mov.f32 %f1110, 0f3FB8AA3B; + mov.f32 %f1109, 0f40000000; + mul.f32 %f658, %f1194, 0fBF000000; + setp.eq.f32 %p74, %f87, 0f3F800000; + selp.f32 %f659, 0fBF000000, %f658, %p74; + fma.rn.f32 %f662, %f659, %f1119, %f183; + cvt.sat.f32.f32 %f665, %f662; + fma.rm.f32 %f667, %f665, %f1120, %f1121; + add.f32 %f668, %f667, 0fCB40007F; + neg.f32 %f669, %f668; + fma.rn.f32 %f670, %f659, %f1110, %f669; + fma.rn.f32 %f672, %f659, %f1122, %f670; + mov.b32 %r307, %f667; + shl.b32 %r308, %r307, 23; + mov.b32 %f673, %r308; + ex2.approx.ftz.f32 %f674, %f672; + mul.f32 %f99, %f674, %f673; + div.rn.f32 %f100, %f1123, %f182; + abs.f32 %f101, %f100; + setp.lt.f32 %p75, %f101, 0f00800000; + mul.f32 %f675, %f101, 0f4B800000; + selp.f32 %f676, %f675, %f101, %p75; + selp.f32 %f677, 0fC3170000, 0fC2FE0000, %p75; + mov.b32 %r309, %f676; + and.b32 %r310, %r309, 8388607; + or.b32 %r311, %r310, 1065353216; + mov.b32 %f678, %r311; + shr.u32 %r312, %r309, 23; + cvt.rn.f32.u32 %f679, %r312; add.f32 %f680, %f677, %f679; - mul.rn.f32 %f682, %f657, %f1359; - mul.rn.f32 %f684, %f657, %f1360; - add.f32 %f685, %f682, %f678; - sub.f32 %f686, %f682, %f685; - add.f32 %f687, %f678, %f686; - add.f32 %f688, %f680, %f687; - add.f32 %f689, %f684, %f688; - add.f32 %f690, %f685, %f689; - sub.f32 %f691, %f685, %f690; - add.f32 %f692, %f689, %f691; - mul.rn.f32 %f694, %f1353, %f690; - neg.f32 %f695, %f694; - fma.rn.f32 %f696, %f1353, %f690, %f695; - fma.rn.f32 %f697, %f1353, %f692, %f696; - fma.rn.f32 %f699, %f1361, %f690, %f697; - add.rn.f32 %f700, %f694, %f699; - neg.f32 %f701, %f700; - add.rn.f32 %f702, %f694, %f701; - add.rn.f32 %f703, %f702, %f699; - mov.b32 %r373, %f700; - setp.eq.s32 %p66, %r373, 1118925336; - add.s32 %r374, %r373, -1; - mov.b32 %f704, %r374; - add.f32 %f705, %f703, 0f37000000; - selp.f32 %f706, %f704, %f700, %p66; - selp.f32 %f95, %f705, %f703, %p66; - mul.f32 %f707, %f706, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f708, %f707; - fma.rn.f32 %f709, %f708, %f1354, %f706; - fma.rn.f32 %f710, %f708, %f1355, %f709; - mul.f32 %f711, %f710, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f712, %f711; - add.f32 %f713, %f708, 0f00000000; - ex2.approx.f32 %f714, %f713; - mul.f32 %f715, %f712, %f714; - setp.lt.f32 %p67, %f706, 0fC2D20000; - selp.f32 %f716, 0f00000000, %f715, %p67; - setp.gt.f32 %p68, %f706, 0f42D20000; - selp.f32 %f1433, 0f7F800000, %f716, %p68; - setp.eq.f32 %p69, %f1433, 0f7F800000; - @%p69 bra BB9_57; - - fma.rn.f32 %f1433, %f1433, %f95, %f1433; - -BB9_57: - setp.lt.f32 %p70, %f93, 0f00000000; - and.pred %p3, %p70, %p35; - mov.b32 %r375, %f1433; - xor.b32 %r376, %r375, -2147483648; - mov.b32 %f717, %r376; - selp.f32 %f1435, %f717, %f1433, %p3; - setp.eq.f32 %p72, %f93, 0f00000000; - @%p72 bra BB9_60; - bra.uni BB9_58; - -BB9_60: - add.f32 %f720, %f93, %f93; - selp.f32 %f1435, %f720, 0f00000000, %p35; - bra.uni BB9_61; - -BB9_58: - setp.geu.f32 %p73, %f93, 0f00000000; - @%p73 bra BB9_61; - - mov.f32 %f1402, 0f40000000; - cvt.rzi.f32.f32 %f719, %f1402; - setp.neu.f32 %p74, %f719, 0f40000000; - selp.f32 %f1435, 0f7FFFFFFF, %f1435, %p74; - -BB9_61: - abs.f32 %f1411, %f93; - add.f32 %f721, %f1411, 0f40000000; - mov.b32 %r377, %f721; - setp.lt.s32 %p76, %r377, 2139095040; - @%p76 bra BB9_66; - - abs.f32 %f1412, %f93; - setp.gtu.f32 %p77, %f1412, 0f7F800000; - @%p77 bra BB9_65; - bra.uni BB9_63; - -BB9_65: - add.f32 %f1435, %f93, 0f40000000; - bra.uni BB9_66; - -BB9_63: - abs.f32 %f1413, %f93; - setp.neu.f32 %p78, %f1413, 0f7F800000; - @%p78 bra BB9_66; - - selp.f32 %f1435, 0fFF800000, 0f7F800000, %p3; - -BB9_66: - mov.f32 %f1372, 0f00000000; - mov.f32 %f1371, 0f35BFBE8E; - mov.f32 %f1370, 0f3F317200; - mov.f32 %f1369, 0f3DAAAABD; - mov.f32 %f1368, 0f3C4CAF63; - mov.f32 %f1367, 0f3B18F0FE; - mov.f32 %f1366, 0fB5BFBE8E; - mov.f32 %f1365, 0fBF317200; - mov.f32 %f1364, 0f40000000; - cvt.rn.f32.s32 %f1363, %r820; - sub.f32 %f1362, %f1363, %f39; - mul.f32 %f724, %f1435, 0fBF000000; - setp.eq.f32 %p79, %f93, 0f3F800000; - selp.f32 %f725, 0fBF000000, %f724, %p79; - mul.f32 %f726, %f725, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f727, %f726; - fma.rn.f32 %f729, %f727, %f1365, %f725; - fma.rn.f32 %f731, %f727, %f1366, %f729; - mul.f32 %f732, %f731, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f733, %f732; - add.f32 %f734, %f727, 0f00000000; - ex2.approx.f32 %f735, %f734; - mul.f32 %f736, %f733, %f735; - setp.lt.f32 %p80, %f725, 0fC2D20000; - selp.f32 %f737, 0f00000000, %f736, %p80; - setp.gt.f32 %p81, %f725, 0f42D20000; - selp.f32 %f106, 0f7F800000, %f737, %p81; - div.rn.f32 %f107, %f1362, %f196; - abs.f32 %f108, %f107; - setp.lt.f32 %p82, %f108, 0f00800000; - mul.f32 %f738, %f108, 0f4B800000; - selp.f32 %f739, 0fC3170000, 0fC2FE0000, %p82; - selp.f32 %f740, %f738, %f108, %p82; - mov.b32 %r378, %f740; - and.b32 %r379, %r378, 8388607; - or.b32 %r380, %r379, 1065353216; - mov.b32 %f741, %r380; - shr.u32 %r381, %r378, 23; - cvt.rn.f32.u32 %f742, %r381; - add.f32 %f743, %f739, %f742; - setp.gt.f32 %p83, %f741, 0f3FB504F3; - mul.f32 %f744, %f741, 0f3F000000; - add.f32 %f745, %f743, 0f3F800000; - selp.f32 %f746, %f744, %f741, %p83; - selp.f32 %f747, %f745, %f743, %p83; - add.f32 %f748, %f746, 0fBF800000; - add.f32 %f723, %f746, 0f3F800000; - // inline asm - rcp.approx.ftz.f32 %f722,%f723; - // inline asm - add.f32 %f749, %f748, %f748; - mul.f32 %f750, %f722, %f749; - mul.f32 %f751, %f750, %f750; - fma.rn.f32 %f754, %f1367, %f751, %f1368; - fma.rn.f32 %f756, %f754, %f751, %f1369; - mul.rn.f32 %f757, %f756, %f751; - mul.rn.f32 %f758, %f757, %f750; - sub.f32 %f759, %f748, %f750; - neg.f32 %f760, %f750; - add.f32 %f761, %f759, %f759; - fma.rn.f32 %f762, %f760, %f748, %f761; - mul.rn.f32 %f763, %f722, %f762; - add.f32 %f764, %f758, %f750; - sub.f32 %f765, %f750, %f764; - add.f32 %f766, %f758, %f765; - add.f32 %f767, %f763, %f766; - add.f32 %f768, %f764, %f767; - sub.f32 %f769, %f764, %f768; - add.f32 %f770, %f767, %f769; - mul.rn.f32 %f772, %f747, %f1370; - mul.rn.f32 %f774, %f747, %f1371; - add.f32 %f775, %f772, %f768; - sub.f32 %f776, %f772, %f775; - add.f32 %f777, %f768, %f776; - add.f32 %f778, %f770, %f777; - add.f32 %f779, %f774, %f778; - add.f32 %f780, %f775, %f779; - sub.f32 %f781, %f775, %f780; - add.f32 %f782, %f779, %f781; - mul.rn.f32 %f784, %f1364, %f780; - neg.f32 %f785, %f784; - fma.rn.f32 %f786, %f1364, %f780, %f785; - fma.rn.f32 %f787, %f1364, %f782, %f786; - fma.rn.f32 %f789, %f1372, %f780, %f787; - add.rn.f32 %f790, %f784, %f789; - neg.f32 %f791, %f790; - add.rn.f32 %f792, %f784, %f791; - add.rn.f32 %f793, %f792, %f789; - mov.b32 %r382, %f790; - setp.eq.s32 %p84, %r382, 1118925336; - add.s32 %r383, %r382, -1; - mov.b32 %f794, %r383; - add.f32 %f795, %f793, 0f37000000; - selp.f32 %f796, %f794, %f790, %p84; - selp.f32 %f109, %f795, %f793, %p84; - mul.f32 %f797, %f796, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f798, %f797; - fma.rn.f32 %f799, %f798, %f1365, %f796; - fma.rn.f32 %f800, %f798, %f1366, %f799; - mul.f32 %f801, %f800, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f802, %f801; - add.f32 %f803, %f798, 0f00000000; - ex2.approx.f32 %f804, %f803; - mul.f32 %f805, %f802, %f804; - setp.lt.f32 %p85, %f796, 0fC2D20000; - selp.f32 %f806, 0f00000000, %f805, %p85; - setp.gt.f32 %p86, %f796, 0f42D20000; - selp.f32 %f1436, 0f7F800000, %f806, %p86; - setp.eq.f32 %p87, %f1436, 0f7F800000; - @%p87 bra BB9_68; - - fma.rn.f32 %f1436, %f1436, %f109, %f1436; - -BB9_68: - setp.lt.f32 %p88, %f107, 0f00000000; - and.pred %p4, %p88, %p35; - mov.b32 %r384, %f1436; - xor.b32 %r385, %r384, -2147483648; - mov.b32 %f807, %r385; - selp.f32 %f1438, %f807, %f1436, %p4; - setp.eq.f32 %p90, %f107, 0f00000000; - @%p90 bra BB9_71; - bra.uni BB9_69; - -BB9_71: - add.f32 %f810, %f107, %f107; - selp.f32 %f1438, %f810, 0f00000000, %p35; - bra.uni BB9_72; - -BB9_69: - setp.geu.f32 %p91, %f107, 0f00000000; - @%p91 bra BB9_72; - - mov.f32 %f1401, 0f40000000; - cvt.rzi.f32.f32 %f809, %f1401; - setp.neu.f32 %p92, %f809, 0f40000000; - selp.f32 %f1438, 0f7FFFFFFF, %f1438, %p92; - -BB9_72: - abs.f32 %f1414, %f107; - add.f32 %f811, %f1414, 0f40000000; - mov.b32 %r386, %f811; - setp.lt.s32 %p94, %r386, 2139095040; - @%p94 bra BB9_77; - - abs.f32 %f1415, %f107; - setp.gtu.f32 %p95, %f1415, 0f7F800000; - @%p95 bra BB9_76; - bra.uni BB9_74; - -BB9_76: - add.f32 %f1438, %f107, 0f40000000; - bra.uni BB9_77; - -BB9_74: - abs.f32 %f1416, %f107; - setp.neu.f32 %p96, %f1416, 0f7F800000; - @%p96 bra BB9_77; - - selp.f32 %f1438, 0fFF800000, 0f7F800000, %p4; - -BB9_77: - setp.neu.f32 %p248, %f40, 0f00000000; - mov.f32 %f1374, 0fB5BFBE8E; - mov.f32 %f1373, 0fBF317200; - mul.f32 %f812, %f1438, 0fBF000000; - setp.eq.f32 %p97, %f107, 0f3F800000; - selp.f32 %f813, 0fBF000000, %f812, %p97; - mul.f32 %f814, %f813, 0f3FB8AA3B; - cvt.rzi.f32.f32 %f815, %f814; - fma.rn.f32 %f817, %f815, %f1373, %f813; - fma.rn.f32 %f819, %f815, %f1374, %f817; - mul.f32 %f820, %f819, 0f3FB8AA3B; - ex2.approx.ftz.f32 %f821, %f820; - add.f32 %f822, %f815, 0f00000000; - ex2.approx.f32 %f823, %f822; - mul.f32 %f824, %f821, %f823; - setp.lt.f32 %p98, %f813, 0fC2D20000; - selp.f32 %f825, 0f00000000, %f824, %p98; - setp.gt.f32 %p99, %f813, 0f42D20000; - selp.f32 %f826, 0f7F800000, %f825, %p99; - sub.f32 %f827, %f106, %f826; - mul.f32 %f828, %f2, %f827; - mul.f32 %f829, %f52, 0f3F000000; - selp.f32 %f830, %f829, 0f00000000, %p248; - mul.f32 %f831, %f830, %f828; - selp.f32 %f832, %f92, 0f00000000, %p248; - selp.f32 %f833, %f831, 0f00000000, %p248; - shl.b32 %r387, %r823, 1; - mul.wide.s32 %rd83, %r387, 4; - add.s64 %rd84, %rd3, %rd83; - st.local.f32 [%rd84], %f832; - st.local.f32 [%rd84+4], %f833; - selp.u32 %r388, 1, 0, %p248; - add.s32 %r824, %r388, %r824; - add.s32 %r823, %r823, 1; - setp.lt.s32 %p101, %r823, %r290; - @%p101 bra BB9_21; - -BB9_78: - setp.lt.s32 %p102, %r824, 1; - @%p102 bra BB9_80; - - shl.b32 %r389, %r824, 1; - mul.wide.s32 %rd85, %r389, 4; - add.s64 %rd86, %rd3, %rd85; - mov.u32 %r390, 1065353216; - st.local.u32 [%rd86], %r390; - -BB9_80: - shl.b32 %r391, %r824, 1; - add.s32 %r11, %r391, 1; - setp.lt.s32 %p103, %r824, 0; - @%p103 bra BB9_90; - - mov.u32 %r393, 1; - max.s32 %r12, %r11, %r393; - and.b32 %r13, %r12, 3; - mov.u32 %r392, 0; - mov.u32 %r825, %r392; - -BB9_82: - mul.wide.s32 %rd87, %r825, 4; - add.s64 %rd88, %rd3, %rd87; - ld.local.f32 %f120, [%rd88]; - mul.lo.s32 %r15, %r825, %r11; - setp.eq.s32 %p104, %r13, 1; - mov.u32 %r827, %r392; - @%p104 bra BB9_86; - - setp.eq.s32 %p105, %r13, 2; - mov.u32 %r826, 0; - @%p105 bra BB9_85; - - ld.local.f32 %f834, [%rd3]; - mul.f32 %f835, %f120, %f834; - div.rn.f32 %f836, %f835, %f1422; - mul.wide.s32 %rd89, %r15, 4; - add.s64 %rd90, %rd4, %rd89; - ld.local.f32 %f837, [%rd90]; - add.f32 %f838, %f836, %f837; - st.local.f32 [%rd90], %f838; - mov.u32 %r826, %r393; - -BB9_85: - mul.wide.u32 %rd91, %r826, 4; - add.s64 %rd92, %rd3, %rd91; - ld.local.f32 %f839, [%rd92]; - mul.f32 %f840, %f120, %f839; - div.rn.f32 %f841, %f840, %f1422; - add.s32 %r397, %r826, %r15; - mul.wide.s32 %rd93, %r397, 4; - add.s64 %rd94, %rd4, %rd93; - ld.local.f32 %f842, [%rd94]; - add.f32 %f843, %f841, %f842; - st.local.f32 [%rd94], %f843; - add.s32 %r827, %r826, 1; - -BB9_86: - mul.wide.s32 %rd95, %r827, 4; - add.s64 %rd96, %rd3, %rd95; - ld.local.f32 %f844, [%rd96]; - mul.f32 %f845, %f120, %f844; - div.rn.f32 %f846, %f845, %f1422; - add.s32 %r398, %r827, %r15; - mul.wide.s32 %rd97, %r398, 4; - add.s64 %rd98, %rd4, %rd97; - ld.local.f32 %f847, [%rd98]; - add.f32 %f848, %f846, %f847; - st.local.f32 [%rd98], %f848; - setp.lt.u32 %p106, %r12, 4; - @%p106 bra BB9_89; - - add.s32 %r828, %r827, 1; - -BB9_88: - mul.wide.s32 %rd99, %r828, 4; - add.s64 %rd100, %rd3, %rd99; - ld.local.f32 %f849, [%rd100]; - mul.f32 %f850, %f120, %f849; - div.rn.f32 %f851, %f850, %f1422; - add.s32 %r399, %r828, %r15; - mul.wide.s32 %rd101, %r399, 4; - add.s64 %rd102, %rd4, %rd101; - ld.local.f32 %f852, [%rd102]; - add.f32 %f853, %f851, %f852; - ld.local.f32 %f854, [%rd100+4]; - ld.local.f32 %f855, [%rd102+4]; - ld.local.f32 %f856, [%rd100+8]; - ld.local.f32 %f857, [%rd102+8]; - ld.local.f32 %f858, [%rd100+12]; - ld.local.f32 %f859, [%rd102+12]; - st.local.f32 [%rd102], %f853; - mul.f32 %f860, %f120, %f854; - div.rn.f32 %f861, %f860, %f1422; - add.f32 %f862, %f861, %f855; - st.local.f32 [%rd102+4], %f862; - mul.f32 %f863, %f120, %f856; - div.rn.f32 %f864, %f863, %f1422; - add.f32 %f865, %f864, %f857; - st.local.f32 [%rd102+8], %f865; - mul.f32 %f866, %f120, %f858; - div.rn.f32 %f867, %f866, %f1422; - add.f32 %f868, %f867, %f859; - st.local.f32 [%rd102+12], %f868; - add.s32 %r828, %r828, 4; - setp.lt.s32 %p107, %r828, %r11; - @%p107 bra BB9_88; - -BB9_89: - add.s32 %r825, %r825, 1; - setp.lt.s32 %p108, %r825, %r11; - @%p108 bra BB9_82; - -BB9_90: - add.s32 %r820, %r820, 1; - setp.lt.s32 %p109, %r820, %r289; - @%p109 bra BB9_4; - - add.s32 %r819, %r819, 1; - setp.lt.s32 %p110, %r819, %r289; - @%p110 bra BB9_3; - -BB9_92: - mov.u32 %r818, 0; - mov.u64 %rd103, 0; - st.local.u32 [%rd3+4], %rd103; - st.local.u32 [%rd3], %rd103; - st.local.u32 [%rd3+12], %rd103; - st.local.u32 [%rd3+8], %rd103; - st.local.u32 [%rd3+20], %rd103; - st.local.u32 [%rd3+16], %rd103; - st.local.u32 [%rd3+28], %rd103; - st.local.u32 [%rd3+24], %rd103; - st.local.u32 [%rd3+36], %rd103; - st.local.u32 [%rd3+32], %rd103; - st.local.u32 [%rd3+44], %rd103; - st.local.u32 [%rd3+40], %rd103; - st.local.u32 [%rd3+52], %rd103; - st.local.u32 [%rd3+48], %rd103; - st.local.u32 [%rd3+60], %rd103; - st.local.u32 [%rd3+56], %rd103; - st.local.u32 [%rd3+68], %rd103; - st.local.u32 [%rd3+64], %rd103; - st.local.u32 [%rd3+76], %rd103; - st.local.u32 [%rd3+72], %rd103; - st.local.u32 [%rd3+84], %rd103; - st.local.u32 [%rd3+80], %rd103; - st.local.u32 [%rd3+92], %rd103; - st.local.u32 [%rd3+88], %rd103; - st.local.u32 [%rd3+100], %rd103; - st.local.u32 [%rd3+96], %rd103; - st.local.u32 [%rd3+108], %rd103; - st.local.u32 [%rd3+104], %rd103; - st.local.u32 [%rd3+116], %rd103; - st.local.u32 [%rd3+112], %rd103; - st.local.u32 [%rd3+124], %rd103; - st.local.u32 [%rd3+120], %rd103; - st.local.u32 [%rd3+132], %rd103; - st.local.u32 [%rd3+128], %rd103; - st.local.u32 [%rd3+140], %rd103; - st.local.u32 [%rd3+136], %rd103; - st.local.u32 [%rd3+148], %rd103; - st.local.u32 [%rd3+144], %rd103; - st.local.u32 [%rd3+156], %rd103; - st.local.u32 [%rd3+152], %rd103; - st.local.u32 [%rd3+164], %rd103; - st.local.u32 [%rd3+160], %rd103; - st.local.u32 [%rd3+172], %rd103; - st.local.u32 [%rd3+168], %rd103; - st.local.u32 [%rd3+180], %rd103; - st.local.u32 [%rd3+176], %rd103; - st.local.u32 [%rd3+188], %rd103; - st.local.u32 [%rd3+184], %rd103; - st.local.u32 [%rd3+196], %rd103; - st.local.u32 [%rd3+192], %rd103; - st.local.u32 [%rd3+204], %rd103; - st.local.u32 [%rd3+200], %rd103; - st.local.u32 [%rd3+212], %rd103; - st.local.u32 [%rd3+208], %rd103; - st.local.u32 [%rd3+220], %rd103; - st.local.u32 [%rd3+216], %rd103; - st.local.u32 [%rd3+228], %rd103; - st.local.u32 [%rd3+224], %rd103; - st.local.u32 [%rd3+236], %rd103; - st.local.u32 [%rd3+232], %rd103; - st.local.u32 [%rd3+244], %rd103; - st.local.u32 [%rd3+240], %rd103; - st.local.u32 [%rd3+252], %rd103; - st.local.u32 [%rd3+248], %rd103; - st.local.u32 [%rd3+260], %rd103; - st.local.u32 [%rd3+256], %rd103; - st.local.u32 [%rd3+268], %rd103; - st.local.u32 [%rd3+264], %rd103; - st.local.u32 [%rd3+276], %rd103; - st.local.u32 [%rd3+272], %rd103; - st.local.u32 [%rd3+284], %rd103; - st.local.u32 [%rd3+280], %rd103; - st.local.u32 [%rd3+292], %rd103; - st.local.u32 [%rd3+288], %rd103; - st.local.u32 [%rd3+300], %rd103; - st.local.u32 [%rd3+296], %rd103; - st.local.u32 [%rd3+308], %rd103; - st.local.u32 [%rd3+304], %rd103; - st.local.u32 [%rd3+316], %rd103; - st.local.u32 [%rd3+312], %rd103; - st.local.u32 [%rd3+324], %rd103; - st.local.u32 [%rd3+320], %rd103; - st.local.u32 [%rd3+332], %rd103; - st.local.u32 [%rd3+328], %rd103; - st.local.u32 [%rd3+340], %rd103; - st.local.u32 [%rd3+336], %rd103; - st.local.u32 [%rd3+348], %rd103; - st.local.u32 [%rd3+344], %rd103; - st.local.u32 [%rd3+356], %rd103; - st.local.u32 [%rd3+352], %rd103; - st.local.u32 [%rd3+364], %rd103; - st.local.u32 [%rd3+360], %rd103; - st.local.u32 [%rd3+372], %rd103; - st.local.u32 [%rd3+368], %rd103; - st.local.u32 [%rd3+380], %rd103; - st.local.u32 [%rd3+376], %rd103; - st.local.u32 [%rd3+388], %rd103; - st.local.u32 [%rd3+384], %rd103; - st.local.u32 [%rd3+396], %rd103; - st.local.u32 [%rd3+392], %rd103; - st.local.u32 [%rd3+404], %rd103; - st.local.u32 [%rd3+400], %rd103; - st.local.u32 [%rd3+412], %rd103; - st.local.u32 [%rd3+408], %rd103; - st.local.u32 [%rd3+420], %rd103; - st.local.u32 [%rd3+416], %rd103; - st.local.u32 [%rd3+428], %rd103; - st.local.u32 [%rd3+424], %rd103; - st.local.u32 [%rd3+436], %rd103; - st.local.u32 [%rd3+432], %rd103; - st.local.u32 [%rd3+444], %rd103; - st.local.u32 [%rd3+440], %rd103; - st.local.u32 [%rd3+452], %rd103; - st.local.u32 [%rd3+448], %rd103; - st.local.u32 [%rd3+460], %rd103; - st.local.u32 [%rd3+456], %rd103; - st.local.u32 [%rd3+468], %rd103; - st.local.u32 [%rd3+464], %rd103; - st.local.u32 [%rd3+476], %rd103; - st.local.u32 [%rd3+472], %rd103; - st.local.u32 [%rd3+480], %r818; - shl.b32 %r26, %r824, 1; - setp.lt.s32 %p111, %r824, 1; - @%p111 bra BB9_291; - - setp.lt.s32 %p112, %r824, 0; - @%p112 bra BB9_103; - - add.s32 %r402, %r26, 1; - mov.u32 %r403, 1; - max.s32 %r27, %r402, %r403; - and.b32 %r28, %r27, 3; - mov.u32 %r401, 0; - mov.u32 %r830, %r401; - -BB9_95: - setp.eq.s32 %p113, %r28, 1; - mov.u32 %r832, %r401; - @%p113 bra BB9_99; - - setp.eq.s32 %p114, %r28, 2; - mov.u32 %r831, 0; - @%p114 bra BB9_98; - - mul.wide.s32 %rd104, %r830, 4; - add.s64 %rd105, %rd4, %rd104; - ld.local.f32 %f869, [%rd105]; - add.s64 %rd106, %rd5, %rd104; - st.local.f32 [%rd106], %f869; - mov.u32 %r831, %r403; - -BB9_98: - neg.s32 %r408, %r831; - and.b32 %r409, %r402, %r408; - add.s32 %r410, %r409, %r830; - mul.wide.s32 %rd107, %r410, 4; - add.s64 %rd108, %rd4, %rd107; - ld.local.f32 %f870, [%rd108]; - add.s64 %rd109, %rd5, %rd107; - st.local.f32 [%rd109], %f870; - add.s32 %r832, %r831, 1; - -BB9_99: - mad.lo.s32 %r412, %r832, %r402, %r830; - mul.wide.s32 %rd110, %r412, 4; - add.s64 %rd111, %rd4, %rd110; - ld.local.f32 %f871, [%rd111]; - add.s64 %rd112, %rd5, %rd110; - st.local.f32 [%rd112], %f871; - setp.lt.u32 %p115, %r27, 4; - @%p115 bra BB9_102; - - add.s32 %r833, %r832, 1; - -BB9_101: - mad.lo.s32 %r414, %r833, %r402, %r830; - mul.wide.s32 %rd113, %r414, 4; - add.s64 %rd114, %rd4, %rd113; - ld.local.f32 %f872, [%rd114]; - add.s64 %rd115, %rd5, %rd113; - st.local.f32 [%rd115], %f872; - add.s32 %r415, %r414, %r402; - mul.wide.s32 %rd116, %r415, 4; - add.s64 %rd117, %rd4, %rd116; - ld.local.f32 %f873, [%rd117]; - add.s64 %rd118, %rd5, %rd116; - st.local.f32 [%rd118], %f873; - add.s32 %r416, %r833, 2; - mad.lo.s32 %r417, %r416, %r402, %r830; - mul.wide.s32 %rd119, %r417, 4; - add.s64 %rd120, %rd4, %rd119; - ld.local.f32 %f874, [%rd120]; - add.s64 %rd121, %rd5, %rd119; - st.local.f32 [%rd121], %f874; - add.s32 %r418, %r833, 3; - mad.lo.s32 %r419, %r418, %r402, %r830; - mul.wide.s32 %rd122, %r419, 4; - add.s64 %rd123, %rd4, %rd122; - ld.local.f32 %f875, [%rd123]; - add.s64 %rd124, %rd5, %rd122; - st.local.f32 [%rd124], %f875; - add.s32 %r833, %r833, 4; - setp.lt.s32 %p116, %r833, %r402; - @%p116 bra BB9_101; - -BB9_102: - add.s32 %r830, %r830, 1; - setp.lt.s32 %p117, %r830, %r402; - @%p117 bra BB9_95; - -BB9_103: - and.b32 %r37, %r824, 3; - mov.u32 %r421, 0; - mov.u32 %r834, %r421; - -BB9_104: - shl.b32 %r423, %r834, 1; - add.s32 %r424, %r423, 1; - add.s32 %r425, %r26, 1; - mad.lo.s32 %r39, %r424, %r425, 1; - mul.lo.s32 %r40, %r423, %r425; - setp.eq.s32 %p118, %r37, 0; - mov.u32 %r838, %r421; - @%p118 bra BB9_115; - - setp.eq.s32 %p119, %r37, 1; - mov.u32 %r836, 0; - @%p119 bra BB9_112; - - setp.eq.s32 %p120, %r37, 2; - mov.u32 %r835, 0; - @%p120 bra BB9_109; - - setp.eq.s32 %p121, %r834, 0; - mov.u32 %r835, 1; - @%p121 bra BB9_109; - - mul.wide.s32 %rd125, %r39, 4; - add.s64 %rd126, %rd5, %rd125; - mov.u32 %r430, 0; - st.local.u32 [%rd126], %r430; - mul.wide.s32 %rd127, %r40, 4; - add.s64 %rd128, %rd5, %rd127; - st.local.u32 [%rd128], %r430; - -BB9_109: - setp.eq.s32 %p122, %r834, %r835; - @%p122 bra BB9_111; - - shl.b32 %r431, %r835, 1; - add.s32 %r432, %r39, %r431; - mul.wide.s32 %rd129, %r432, 4; - add.s64 %rd130, %rd5, %rd129; - mov.u32 %r433, 0; - st.local.u32 [%rd130], %r433; - add.s32 %r434, %r431, %r40; - mul.wide.s32 %rd131, %r434, 4; - add.s64 %rd132, %rd5, %rd131; - st.local.u32 [%rd132], %r433; - -BB9_111: - add.s32 %r836, %r835, 1; - -BB9_112: - setp.eq.s32 %p123, %r834, %r836; - @%p123 bra BB9_114; - - shl.b32 %r435, %r836, 1; - add.s32 %r436, %r39, %r435; - mul.wide.s32 %rd133, %r436, 4; - add.s64 %rd134, %rd5, %rd133; - mov.u32 %r437, 0; - st.local.u32 [%rd134], %r437; - add.s32 %r438, %r435, %r40; - mul.wide.s32 %rd135, %r438, 4; - add.s64 %rd136, %rd5, %rd135; - st.local.u32 [%rd136], %r437; - -BB9_114: - add.s32 %r838, %r836, 1; - -BB9_115: - setp.lt.u32 %p124, %r824, 4; - @%p124 bra BB9_125; - -BB9_116: - shl.b32 %r439, %r838, 1; - add.s32 %r440, %r39, %r439; - mul.wide.s32 %rd137, %r440, 4; - add.s64 %rd11, %rd5, %rd137; - add.s32 %r441, %r439, %r40; - mul.wide.s32 %rd138, %r441, 4; - add.s64 %rd12, %rd5, %rd138; - setp.eq.s32 %p125, %r834, %r838; - @%p125 bra BB9_118; - - mov.u32 %r442, 0; - st.local.u32 [%rd11], %r442; - st.local.u32 [%rd12], %r442; - -BB9_118: - add.s32 %r443, %r838, 1; - setp.eq.s32 %p126, %r834, %r443; - @%p126 bra BB9_120; - - mov.u32 %r444, 0; - st.local.u32 [%rd11+8], %r444; - st.local.u32 [%rd12+8], %r444; - -BB9_120: - add.s32 %r445, %r838, 2; - setp.eq.s32 %p127, %r834, %r445; - @%p127 bra BB9_122; - - mov.u32 %r446, 0; - st.local.u32 [%rd11+16], %r446; - st.local.u32 [%rd12+16], %r446; - -BB9_122: - add.s32 %r447, %r838, 3; - setp.eq.s32 %p128, %r834, %r447; - @%p128 bra BB9_124; - - mov.u32 %r448, 0; - st.local.u32 [%rd11+24], %r448; - st.local.u32 [%rd12+24], %r448; - -BB9_124: - add.s32 %r838, %r838, 4; - setp.lt.s32 %p129, %r838, %r824; - @%p129 bra BB9_116; - -BB9_125: - add.s32 %r834, %r834, 1; - setp.lt.s32 %p130, %r834, %r824; - @%p130 bra BB9_104; - - mov.u32 %r839, 0; - @%p112 bra BB9_157; - -BB9_127: - mul.lo.s32 %r50, %r839, %r425; - mul.wide.s32 %rd139, %r50, 4; - add.s64 %rd13, %rd5, %rd139; - setp.lt.s32 %p132, %r839, 0; - @%p132 bra BB9_141; - - mov.u32 %r840, 0; - -BB9_129: - setp.lt.s32 %p133, %r840, 1; - @%p133 bra BB9_140; - - add.s32 %r53, %r840, -1; - setp.lt.s32 %p134, %r53, 0; - mov.f32 %f1443, 0f00000000; - @%p134 bra BB9_139; - - and.b32 %r54, %r840, 3; - setp.eq.s32 %p135, %r54, 0; - mov.u32 %r844, 0; - mov.f32 %f1443, 0f00000000; - @%p135 bra BB9_137; - - setp.eq.s32 %p136, %r54, 1; - mov.u32 %r842, 0; - mov.f32 %f1440, 0f00000000; - @%p136 bra BB9_136; - - setp.eq.s32 %p137, %r54, 2; - mov.u32 %r841, 0; - mov.f32 %f1439, 0f00000000; - @%p137 bra BB9_135; - - mul.wide.s32 %rd140, %r840, 4; - add.s64 %rd141, %rd5, %rd140; - ld.local.f32 %f880, [%rd13]; - ld.local.f32 %f881, [%rd141]; - fma.rn.f32 %f1439, %f881, %f880, 0f00000000; - mov.u32 %r841, 1; - -BB9_135: - neg.s32 %r458, %r841; - and.b32 %r459, %r425, %r458; - add.s32 %r460, %r459, %r840; - mul.wide.s32 %rd142, %r460, 4; - add.s64 %rd143, %rd5, %rd142; - add.s32 %r461, %r841, %r50; - mul.wide.s32 %rd144, %r461, 4; - add.s64 %rd145, %rd5, %rd144; - ld.local.f32 %f882, [%rd145]; - ld.local.f32 %f883, [%rd143]; - fma.rn.f32 %f1440, %f883, %f882, %f1439; - add.s32 %r842, %r841, 1; - -BB9_136: - mad.lo.s32 %r463, %r842, %r425, %r840; - mul.wide.s32 %rd146, %r463, 4; - add.s64 %rd147, %rd5, %rd146; - add.s32 %r464, %r842, %r50; - mul.wide.s32 %rd148, %r464, 4; - add.s64 %rd149, %rd5, %rd148; - ld.local.f32 %f884, [%rd149]; - ld.local.f32 %f885, [%rd147]; - fma.rn.f32 %f1443, %f885, %f884, %f1440; - add.s32 %r844, %r842, 1; - -BB9_137: - setp.lt.u32 %p138, %r840, 4; - @%p138 bra BB9_139; - -BB9_138: - mad.lo.s32 %r466, %r844, %r425, %r840; - mul.wide.s32 %rd150, %r466, 4; - add.s64 %rd151, %rd5, %rd150; - add.s32 %r467, %r844, %r50; - mul.wide.s32 %rd152, %r467, 4; - add.s64 %rd153, %rd5, %rd152; - ld.local.f32 %f886, [%rd153]; - ld.local.f32 %f887, [%rd151]; - fma.rn.f32 %f888, %f887, %f886, %f1443; - add.s32 %r468, %r466, %r425; - mul.wide.s32 %rd154, %r468, 4; - add.s64 %rd155, %rd5, %rd154; - ld.local.f32 %f889, [%rd153+4]; - ld.local.f32 %f890, [%rd155]; - fma.rn.f32 %f891, %f890, %f889, %f888; - add.s32 %r469, %r844, 2; - mad.lo.s32 %r470, %r469, %r425, %r840; - mul.wide.s32 %rd156, %r470, 4; - add.s64 %rd157, %rd5, %rd156; - ld.local.f32 %f892, [%rd153+8]; - ld.local.f32 %f893, [%rd157]; - fma.rn.f32 %f894, %f893, %f892, %f891; - add.s32 %r471, %r844, 3; - mad.lo.s32 %r472, %r471, %r425, %r840; - mul.wide.s32 %rd158, %r472, 4; - add.s64 %rd159, %rd5, %rd158; - ld.local.f32 %f895, [%rd153+12]; - ld.local.f32 %f896, [%rd159]; - fma.rn.f32 %f1443, %f896, %f895, %f894; - add.s32 %r844, %r844, 4; - setp.lt.s32 %p139, %r471, %r53; - @%p139 bra BB9_138; - -BB9_139: - add.s32 %r473, %r840, %r50; - mul.wide.s32 %rd160, %r473, 4; - add.s64 %rd161, %rd5, %rd160; - ld.local.f32 %f897, [%rd161]; - sub.f32 %f898, %f897, %f1443; - st.local.f32 [%rd161], %f898; - -BB9_140: - add.s32 %r62, %r840, 1; - setp.lt.s32 %p140, %r840, %r839; - mov.u32 %r840, %r62; - @%p140 bra BB9_129; - -BB9_141: - add.s32 %r63, %r839, 1; - setp.ge.s32 %p141, %r63, %r425; - @%p141 bra BB9_156; - - add.s32 %r64, %r839, -1; - add.s32 %r475, %r50, %r839; - mul.wide.s32 %rd162, %r475, 4; - add.s64 %rd14, %rd5, %rd162; - and.b32 %r65, %r839, 3; - mov.u32 %r845, %r63; - -BB9_143: - add.s32 %r476, %r845, %r50; - mul.wide.s32 %rd163, %r476, 4; - add.s64 %rd15, %rd5, %rd163; - setp.gt.s32 %p142, %r839, 0; - @%p142 bra BB9_145; - bra.uni BB9_144; - -BB9_145: - setp.lt.s32 %p143, %r64, 0; - mov.f32 %f1448, 0f00000000; - @%p143 bra BB9_154; - - setp.eq.s32 %p144, %r65, 0; - mov.u32 %r849, 0; - mov.f32 %f1448, 0f00000000; - @%p144 bra BB9_152; - - setp.eq.s32 %p145, %r65, 1; - mov.u32 %r847, 0; - mov.f32 %f1445, 0f00000000; - @%p145 bra BB9_151; - - setp.eq.s32 %p146, %r65, 2; - mov.u32 %r846, 0; - mov.f32 %f1444, 0f00000000; - @%p146 bra BB9_150; - - mul.wide.s32 %rd164, %r845, 4; - add.s64 %rd165, %rd5, %rd164; - ld.local.f32 %f907, [%rd13]; - ld.local.f32 %f908, [%rd165]; - fma.rn.f32 %f1444, %f908, %f907, 0f00000000; - mov.u32 %r846, 1; - -BB9_150: - neg.s32 %r482, %r846; - and.b32 %r483, %r425, %r482; - add.s32 %r484, %r483, %r845; - mul.wide.s32 %rd166, %r484, 4; - add.s64 %rd167, %rd5, %rd166; - add.s32 %r485, %r846, %r50; - mul.wide.s32 %rd168, %r485, 4; - add.s64 %rd169, %rd5, %rd168; - ld.local.f32 %f909, [%rd169]; - ld.local.f32 %f910, [%rd167]; - fma.rn.f32 %f1445, %f910, %f909, %f1444; - add.s32 %r847, %r846, 1; - -BB9_151: - mad.lo.s32 %r487, %r847, %r425, %r845; - mul.wide.s32 %rd170, %r487, 4; - add.s64 %rd171, %rd5, %rd170; - add.s32 %r488, %r847, %r50; - mul.wide.s32 %rd172, %r488, 4; - add.s64 %rd173, %rd5, %rd172; - ld.local.f32 %f911, [%rd173]; - ld.local.f32 %f912, [%rd171]; - fma.rn.f32 %f1448, %f912, %f911, %f1445; - add.s32 %r849, %r847, 1; - -BB9_152: - setp.lt.u32 %p147, %r839, 4; - @%p147 bra BB9_154; - -BB9_153: - mad.lo.s32 %r490, %r849, %r425, %r845; - mul.wide.s32 %rd174, %r490, 4; - add.s64 %rd175, %rd5, %rd174; - add.s32 %r491, %r849, %r50; - mul.wide.s32 %rd176, %r491, 4; - add.s64 %rd177, %rd5, %rd176; - ld.local.f32 %f913, [%rd177]; - ld.local.f32 %f914, [%rd175]; - fma.rn.f32 %f915, %f914, %f913, %f1448; - add.s32 %r492, %r490, %r425; - mul.wide.s32 %rd178, %r492, 4; - add.s64 %rd179, %rd5, %rd178; - ld.local.f32 %f916, [%rd177+4]; - ld.local.f32 %f917, [%rd179]; - fma.rn.f32 %f918, %f917, %f916, %f915; - add.s32 %r493, %r849, 2; - mad.lo.s32 %r494, %r493, %r425, %r845; - mul.wide.s32 %rd180, %r494, 4; - add.s64 %rd181, %rd5, %rd180; - ld.local.f32 %f919, [%rd177+8]; - ld.local.f32 %f920, [%rd181]; - fma.rn.f32 %f921, %f920, %f919, %f918; - add.s32 %r495, %r849, 3; - mad.lo.s32 %r496, %r495, %r425, %r845; - mul.wide.s32 %rd182, %r496, 4; - add.s64 %rd183, %rd5, %rd182; - ld.local.f32 %f922, [%rd177+12]; - ld.local.f32 %f923, [%rd183]; - fma.rn.f32 %f1448, %f923, %f922, %f921; - add.s32 %r849, %r849, 4; - setp.lt.s32 %p148, %r495, %r64; - @%p148 bra BB9_153; - -BB9_154: - ld.local.f32 %f924, [%rd14]; - rcp.rn.f32 %f925, %f924; - ld.local.f32 %f926, [%rd15]; - sub.f32 %f927, %f926, %f1448; - mul.f32 %f928, %f925, %f927; - st.local.f32 [%rd15], %f928; - bra.uni BB9_155; - -BB9_144: - ld.local.f32 %f899, [%rd14]; - rcp.rn.f32 %f900, %f899; - ld.local.f32 %f901, [%rd15]; - mul.f32 %f902, %f900, %f901; - st.local.f32 [%rd15], %f902; - -BB9_155: - add.s32 %r845, %r845, 1; - setp.lt.s32 %p149, %r845, %r425; - @%p149 bra BB9_143; - -BB9_156: - setp.lt.s32 %p150, %r63, %r425; - mov.u32 %r839, %r63; - @%p150 bra BB9_127; - -BB9_157: - mul.wide.s32 %rd184, %r26, 4; - add.s64 %rd17, %rd6, %rd184; - @%p112 bra BB9_182; - - mad.lo.s32 %r501, %r425, %r26, %r26; - mul.wide.s32 %rd185, %r501, 4; - add.s64 %rd186, %rd5, %rd185; - ld.local.f32 %f139, [%rd186]; - add.s32 %r75, %r26, -1; - mov.u32 %r850, 0; - -BB9_159: - setp.eq.s32 %p152, %r850, 0; - selp.f32 %f929, 0f3F800000, 0f00000000, %p152; - st.local.f32 [%rd6], %f929; - mov.u32 %r851, 1; - -BB9_160: - add.s32 %r503, %r851, -1; - setp.lt.s32 %p153, %r503, 0; - mov.f32 %f1453, 0f00000000; - @%p153 bra BB9_169; - - and.b32 %r78, %r851, 3; - setp.eq.s32 %p154, %r78, 0; - mov.u32 %r855, 0; - mov.f32 %f1453, 0f00000000; - @%p154 bra BB9_167; - - setp.eq.s32 %p155, %r78, 1; - mov.u32 %r853, 0; - mov.f32 %f1450, 0f00000000; - @%p155 bra BB9_166; - - setp.eq.s32 %p156, %r78, 2; - mov.u32 %r852, 0; - mov.f32 %f1449, 0f00000000; - @%p156 bra BB9_165; - - mul.wide.s32 %rd187, %r851, 4; - add.s64 %rd188, %rd5, %rd187; - ld.local.f32 %f934, [%rd6]; - ld.local.f32 %f935, [%rd188]; - fma.rn.f32 %f1449, %f935, %f934, 0f00000000; - mov.u32 %r852, 1; - -BB9_165: - neg.s32 %r509, %r852; - and.b32 %r510, %r425, %r509; - add.s32 %r511, %r510, %r851; - mul.wide.s32 %rd189, %r511, 4; - add.s64 %rd190, %rd5, %rd189; - mul.wide.u32 %rd191, %r852, 4; - add.s64 %rd192, %rd6, %rd191; - ld.local.f32 %f936, [%rd192]; - ld.local.f32 %f937, [%rd190]; - fma.rn.f32 %f1450, %f937, %f936, %f1449; - add.s32 %r853, %r852, 1; - -BB9_166: - mad.lo.s32 %r513, %r853, %r425, %r851; - mul.wide.s32 %rd193, %r513, 4; - add.s64 %rd194, %rd5, %rd193; - mul.wide.s32 %rd195, %r853, 4; - add.s64 %rd196, %rd6, %rd195; + setp.gt.f32 %p76, %f678, 0f3FB504F3; + mul.f32 %f681, %f678, 0f3F000000; + add.f32 %f682, %f680, 0f3F800000; + selp.f32 %f683, %f682, %f680, %p76; + selp.f32 %f684, %f681, %f678, %p76; + add.f32 %f685, %f684, 0fBF800000; + add.f32 %f686, %f684, 0f3F800000; + rcp.approx.ftz.f32 %f687, %f686; + add.f32 %f688, %f685, %f685; + mul.f32 %f690, %f688, %f687; + mul.f32 %f691, %f690, %f690; + fma.rn.f32 %f694, %f1111, %f691, %f1112; + fma.rn.f32 %f696, %f694, %f691, %f1113; + mul.rn.f32 %f697, %f696, %f691; + mul.rn.f32 %f698, %f697, %f690; + sub.f32 %f699, %f685, %f690; + add.f32 %f700, %f699, %f699; + neg.f32 %f701, %f690; + fma.rn.f32 %f702, %f701, %f685, %f700; + mul.rn.f32 %f703, %f687, %f702; + add.f32 %f704, %f698, %f690; + sub.f32 %f705, %f690, %f704; + add.f32 %f706, %f698, %f705; + add.f32 %f707, %f703, %f706; + add.f32 %f708, %f704, %f707; + sub.f32 %f709, %f704, %f708; + add.f32 %f710, %f707, %f709; + mul.rn.f32 %f712, %f683, %f1114; + mul.rn.f32 %f714, %f683, %f1115; + add.f32 %f715, %f712, %f708; + sub.f32 %f716, %f712, %f715; + add.f32 %f717, %f708, %f716; + add.f32 %f718, %f710, %f717; + add.f32 %f719, %f714, %f718; + add.f32 %f720, %f715, %f719; + sub.f32 %f721, %f715, %f720; + add.f32 %f722, %f719, %f721; + mul.rn.f32 %f723, %f1109, %f720; + neg.f32 %f724, %f723; + fma.rn.f32 %f725, %f1109, %f720, %f724; + fma.rn.f32 %f726, %f1109, %f722, %f725; + fma.rn.f32 %f728, %f1116, %f720, %f726; + add.rn.f32 %f729, %f723, %f728; + neg.f32 %f730, %f729; + add.rn.f32 %f731, %f723, %f730; + add.rn.f32 %f732, %f731, %f728; + mov.b32 %r313, %f729; + setp.eq.s32 %p77, %r313, 1118925336; + add.s32 %r314, %r313, -1; + mov.b32 %f733, %r314; + add.f32 %f734, %f732, 0f37000000; + selp.f32 %f102, %f734, %f732, %p77; + selp.f32 %f735, %f733, %f729, %p77; + mul.rn.f32 %f736, %f735, %f1110; + cvt.rzi.f32.f32 %f737, %f736; + abs.f32 %f738, %f737; + setp.gt.f32 %p78, %f738, 0f42FC0000; + mov.b32 %r315, %f737; + and.b32 %r316, %r315, -2147483648; + or.b32 %r317, %r316, 1123811328; + mov.b32 %f739, %r317; + selp.f32 %f740, %f739, %f737, %p78; + fma.rn.f32 %f742, %f740, %f1117, %f735; + fma.rn.f32 %f744, %f740, %f1118, %f742; + mul.f32 %f745, %f744, 0f3FB8AA3B; + add.f32 %f746, %f740, 0f4B40007F; + mov.b32 %r318, %f746; + shl.b32 %r319, %r318, 23; + mov.b32 %f747, %r319; + ex2.approx.ftz.f32 %f748, %f745; + mul.f32 %f103, %f748, %f747; + setp.eq.f32 %p79, %f103, 0f7F800000; + mov.f32 %f1195, 0f7F800000; + @%p79 bra $L__BB9_63; + + fma.rn.f32 %f1195, %f103, %f102, %f103; + +$L__BB9_63: + setp.lt.f32 %p80, %f100, 0f00000000; + and.pred %p4, %p80, %p36; + setp.eq.f32 %p82, %f100, 0f00000000; + @%p82 bra $L__BB9_67; + bra.uni $L__BB9_64; + +$L__BB9_67: + add.f32 %f753, %f100, %f100; + selp.f32 %f1197, %f753, 0f00000000, %p36; + bra.uni $L__BB9_68; + +$L__BB9_64: + mov.b32 %r320, %f1195; + xor.b32 %r321, %r320, -2147483648; + mov.b32 %f749, %r321; + selp.f32 %f1197, %f749, %f1195, %p4; + setp.geu.f32 %p83, %f100, 0f00000000; + @%p83 bra $L__BB9_68; + + mov.f32 %f1162, 0f40000000; + cvt.rzi.f32.f32 %f751, %f1162; + setp.eq.f32 %p84, %f751, 0f40000000; + @%p84 bra $L__BB9_68; + + mov.f32 %f1197, 0f7FFFFFFF; + +$L__BB9_68: + abs.f32 %f1173, %f100; + add.f32 %f754, %f1173, 0f40000000; + mov.b32 %r322, %f754; + setp.lt.s32 %p86, %r322, 2139095040; + @%p86 bra $L__BB9_73; + + abs.f32 %f1174, %f100; + setp.gtu.f32 %p87, %f1174, 0f7F800000; + @%p87 bra $L__BB9_72; + bra.uni $L__BB9_70; + +$L__BB9_72: + add.f32 %f1197, %f100, 0f40000000; + bra.uni $L__BB9_73; + +$L__BB9_70: + abs.f32 %f1175, %f100; + setp.neu.f32 %p88, %f1175, 0f7F800000; + @%p88 bra $L__BB9_73; + + selp.f32 %f1197, 0fFF800000, 0f7F800000, %p4; + +$L__BB9_73: + setp.neu.f32 %p247, %f37, 0f00000000; + setp.neu.f32 %p246, %f37, 0f00000000; + selp.f32 %f1131, 0f3F800000, 0f00000000, %p246; + mov.f32 %f1130, 0f32A57060; + mov.f32 %f1129, 0f4B400001; + mov.f32 %f1128, 0f437C0000; + mov.f32 %f1127, 0f3BBB989D; + mov.f32 %f1126, 0f3FB8AA3B; + mul.f32 %f755, %f1197, 0fBF000000; + setp.eq.f32 %p89, %f100, 0f3F800000; + selp.f32 %f756, 0fBF000000, %f755, %p89; + fma.rn.f32 %f759, %f756, %f1127, %f183; + cvt.sat.f32.f32 %f762, %f759; + fma.rm.f32 %f764, %f762, %f1128, %f1129; + add.f32 %f765, %f764, 0fCB40007F; + neg.f32 %f766, %f765; + fma.rn.f32 %f767, %f756, %f1126, %f766; + fma.rn.f32 %f769, %f756, %f1130, %f767; + mov.b32 %r323, %f764; + shl.b32 %r324, %r323, 23; + mov.b32 %f770, %r324; + ex2.approx.ftz.f32 %f771, %f769; + mul.f32 %f772, %f771, %f770; + sub.f32 %f773, %f99, %f772; + mul.f32 %f774, %f2, %f773; + mul.f32 %f775, %f48, 0f3F000000; + mul.f32 %f776, %f775, %f1131; + mul.f32 %f777, %f776, %f774; + mul.f32 %f778, %f777, %f1131; + shl.b32 %r325, %r544, 1; + mul.wide.s32 %rd76, %r325, 4; + add.s64 %rd77, %rd4, %rd76; + mul.f32 %f779, %f86, %f1131; + st.local.f32 [%rd77], %f779; + st.local.f32 [%rd77+4], %f778; + selp.u32 %r326, 1, 0, %p246; + add.s32 %r545, %r545, %r326; + add.s32 %r544, %r544, 1; + setp.lt.s32 %p91, %r544, %r214; + @%p91 bra $L__BB9_17; + +$L__BB9_74: + shl.b32 %r13, %r545, 1; + setp.eq.s32 %p92, %r545, 0; + @%p92 bra $L__BB9_76; + + mul.wide.s32 %rd78, %r13, 4; + add.s64 %rd79, %rd4, %rd78; + mov.u32 %r327, 1065353216; + st.local.u32 [%rd79], %r327; + +$L__BB9_76: + add.s32 %r14, %r13, 1; + mov.u32 %r328, 0; + max.s32 %r15, %r13, 0; + add.s32 %r329, %r15, 1; + and.b32 %r16, %r329, 3; + sub.s32 %r17, %r329, %r16; + mov.u32 %r546, %r328; + +$L__BB9_77: + mul.wide.s32 %rd80, %r546, 4; + add.s64 %rd81, %rd4, %rd80; + ld.local.f32 %f112, [%rd81]; + mul.lo.s32 %r19, %r546, %r14; + setp.lt.u32 %p93, %r15, 3; + mov.u32 %r549, %r328; + @%p93 bra $L__BB9_80; + + mov.u32 %r549, %r328; + mov.u32 %r548, %r17; + +$L__BB9_79: + mul.wide.s32 %rd82, %r549, 4; + add.s64 %rd83, %rd4, %rd82; + ld.local.f32 %f780, [%rd83]; + mul.f32 %f781, %f112, %f780; + div.rn.f32 %f782, %f781, %f1181; + add.s32 %r332, %r549, %r19; + mul.wide.s32 %rd84, %r332, 4; + add.s64 %rd85, %rd5, %rd84; + ld.local.f32 %f783, [%rd85]; + add.f32 %f784, %f783, %f782; + st.local.f32 [%rd85], %f784; + ld.local.f32 %f785, [%rd83+4]; + mul.f32 %f786, %f112, %f785; + div.rn.f32 %f787, %f786, %f1181; + ld.local.f32 %f788, [%rd85+4]; + add.f32 %f789, %f788, %f787; + st.local.f32 [%rd85+4], %f789; + ld.local.f32 %f790, [%rd83+8]; + mul.f32 %f791, %f112, %f790; + div.rn.f32 %f792, %f791, %f1181; + ld.local.f32 %f793, [%rd85+8]; + add.f32 %f794, %f793, %f792; + st.local.f32 [%rd85+8], %f794; + ld.local.f32 %f795, [%rd83+12]; + mul.f32 %f796, %f112, %f795; + div.rn.f32 %f797, %f796, %f1181; + ld.local.f32 %f798, [%rd85+12]; + add.f32 %f799, %f798, %f797; + st.local.f32 [%rd85+12], %f799; + add.s32 %r549, %r549, 4; + add.s32 %r548, %r548, -4; + setp.ne.s32 %p94, %r548, 0; + @%p94 bra $L__BB9_79; + +$L__BB9_80: + mul.wide.s32 %rd86, %r549, 4; + add.s64 %rd13, %rd4, %rd86; + ld.local.f32 %f800, [%rd13]; + mul.f32 %f801, %f112, %f800; + div.rn.f32 %f802, %f801, %f1181; + add.s32 %r333, %r549, %r19; + mul.wide.s32 %rd87, %r333, 4; + add.s64 %rd14, %rd5, %rd87; + ld.local.f32 %f803, [%rd14]; + add.f32 %f804, %f803, %f802; + st.local.f32 [%rd14], %f804; + setp.eq.s32 %p95, %r16, 1; + @%p95 bra $L__BB9_82; + + ld.local.f32 %f805, [%rd13+4]; + mul.f32 %f806, %f112, %f805; + div.rn.f32 %f807, %f806, %f1181; + ld.local.f32 %f808, [%rd14+4]; + add.f32 %f809, %f808, %f807; + st.local.f32 [%rd14+4], %f809; + ld.local.f32 %f810, [%rd13+8]; + mul.f32 %f811, %f112, %f810; + div.rn.f32 %f812, %f811, %f1181; + ld.local.f32 %f813, [%rd14+8]; + add.f32 %f814, %f813, %f812; + st.local.f32 [%rd14+8], %f814; + +$L__BB9_82: + add.s32 %r25, %r546, 1; + setp.lt.s32 %p96, %r546, %r13; + mov.u32 %r546, %r25; + @%p96 bra $L__BB9_77; + + add.s32 %r541, %r541, 1; + setp.lt.s32 %p97, %r541, %r213; + @%p97 bra $L__BB9_4; + + add.s32 %r540, %r540, 1; + setp.lt.s32 %p98, %r540, %r213; + @%p98 bra $L__BB9_3; + +$L__BB9_85: + mov.u32 %r535, 0; + st.local.u32 [%rd4], %r535; + st.local.u32 [%rd4+4], %r535; + st.local.u32 [%rd4+8], %r535; + st.local.u32 [%rd4+12], %r535; + st.local.u32 [%rd4+16], %r535; + st.local.u32 [%rd4+20], %r535; + st.local.u32 [%rd4+24], %r535; + st.local.u32 [%rd4+28], %r535; + st.local.u32 [%rd4+32], %r535; + st.local.u32 [%rd4+36], %r535; + st.local.u32 [%rd4+40], %r535; + st.local.u32 [%rd4+44], %r535; + st.local.u32 [%rd4+48], %r535; + st.local.u32 [%rd4+52], %r535; + st.local.u32 [%rd4+56], %r535; + st.local.u32 [%rd4+60], %r535; + st.local.u32 [%rd4+64], %r535; + st.local.u32 [%rd4+68], %r535; + st.local.u32 [%rd4+72], %r535; + st.local.u32 [%rd4+76], %r535; + st.local.u32 [%rd4+80], %r535; + st.local.u32 [%rd4+84], %r535; + st.local.u32 [%rd4+88], %r535; + st.local.u32 [%rd4+92], %r535; + st.local.u32 [%rd4+96], %r535; + st.local.u32 [%rd4+100], %r535; + st.local.u32 [%rd4+104], %r535; + st.local.u32 [%rd4+108], %r535; + st.local.u32 [%rd4+112], %r535; + st.local.u32 [%rd4+116], %r535; + st.local.u32 [%rd4+120], %r535; + st.local.u32 [%rd4+124], %r535; + st.local.u32 [%rd4+128], %r535; + st.local.u32 [%rd4+132], %r535; + st.local.u32 [%rd4+136], %r535; + st.local.u32 [%rd4+140], %r535; + st.local.u32 [%rd4+144], %r535; + st.local.u32 [%rd4+148], %r535; + st.local.u32 [%rd4+152], %r535; + st.local.u32 [%rd4+156], %r535; + st.local.u32 [%rd4+160], %r535; + st.local.u32 [%rd4+164], %r535; + st.local.u32 [%rd4+168], %r535; + st.local.u32 [%rd4+172], %r535; + st.local.u32 [%rd4+176], %r535; + st.local.u32 [%rd4+180], %r535; + st.local.u32 [%rd4+184], %r535; + st.local.u32 [%rd4+188], %r535; + st.local.u32 [%rd4+192], %r535; + st.local.u32 [%rd4+196], %r535; + st.local.u32 [%rd4+200], %r535; + st.local.u32 [%rd4+204], %r535; + st.local.u32 [%rd4+208], %r535; + st.local.u32 [%rd4+212], %r535; + st.local.u32 [%rd4+216], %r535; + st.local.u32 [%rd4+220], %r535; + st.local.u32 [%rd4+224], %r535; + st.local.u32 [%rd4+228], %r535; + st.local.u32 [%rd4+232], %r535; + st.local.u32 [%rd4+236], %r535; + st.local.u32 [%rd4+240], %r535; + st.local.u32 [%rd4+244], %r535; + st.local.u32 [%rd4+248], %r535; + st.local.u32 [%rd4+252], %r535; + st.local.u32 [%rd4+256], %r535; + st.local.u32 [%rd4+260], %r535; + st.local.u32 [%rd4+264], %r535; + st.local.u32 [%rd4+268], %r535; + st.local.u32 [%rd4+272], %r535; + st.local.u32 [%rd4+276], %r535; + st.local.u32 [%rd4+280], %r535; + st.local.u32 [%rd4+284], %r535; + st.local.u32 [%rd4+288], %r535; + st.local.u32 [%rd4+292], %r535; + st.local.u32 [%rd4+296], %r535; + st.local.u32 [%rd4+300], %r535; + st.local.u32 [%rd4+304], %r535; + st.local.u32 [%rd4+308], %r535; + st.local.u32 [%rd4+312], %r535; + st.local.u32 [%rd4+316], %r535; + st.local.u32 [%rd4+320], %r535; + st.local.u32 [%rd4+324], %r535; + st.local.u32 [%rd4+328], %r535; + st.local.u32 [%rd4+332], %r535; + st.local.u32 [%rd4+336], %r535; + st.local.u32 [%rd4+340], %r535; + st.local.u32 [%rd4+344], %r535; + st.local.u32 [%rd4+348], %r535; + st.local.u32 [%rd4+352], %r535; + st.local.u32 [%rd4+356], %r535; + st.local.u32 [%rd4+360], %r535; + st.local.u32 [%rd4+364], %r535; + st.local.u32 [%rd4+368], %r535; + st.local.u32 [%rd4+372], %r535; + st.local.u32 [%rd4+376], %r535; + st.local.u32 [%rd4+380], %r535; + st.local.u32 [%rd4+384], %r535; + st.local.u32 [%rd4+388], %r535; + st.local.u32 [%rd4+392], %r535; + st.local.u32 [%rd4+396], %r535; + st.local.u32 [%rd4+400], %r535; + st.local.u32 [%rd4+404], %r535; + st.local.u32 [%rd4+408], %r535; + st.local.u32 [%rd4+412], %r535; + st.local.u32 [%rd4+416], %r535; + st.local.u32 [%rd4+420], %r535; + st.local.u32 [%rd4+424], %r535; + st.local.u32 [%rd4+428], %r535; + st.local.u32 [%rd4+432], %r535; + st.local.u32 [%rd4+436], %r535; + st.local.u32 [%rd4+440], %r535; + st.local.u32 [%rd4+444], %r535; + st.local.u32 [%rd4+448], %r535; + st.local.u32 [%rd4+452], %r535; + st.local.u32 [%rd4+456], %r535; + st.local.u32 [%rd4+460], %r535; + st.local.u32 [%rd4+464], %r535; + st.local.u32 [%rd4+468], %r535; + st.local.u32 [%rd4+472], %r535; + st.local.u32 [%rd4+476], %r535; + st.local.u32 [%rd4+480], %r535; + shl.b32 %r29, %r545, 1; + or.b32 %r30, %r29, 1; + setp.lt.s32 %p99, %r545, 1; + @%p99 bra $L__BB9_247; + + add.s64 %rd15, %rd5, 4; + mov.u32 %r335, 0; + max.s32 %r31, %r29, 0; + add.s32 %r336, %r31, 1; + and.b32 %r32, %r336, 3; + sub.s32 %r33, %r336, %r32; + mul.wide.s32 %rd88, %r29, 4; + add.s64 %rd16, %rd88, 4; + setp.lt.u32 %p100, %r31, 3; + setp.eq.s32 %p102, %r32, 1; + mov.u32 %r551, %r335; + +$L__BB9_87: + mov.u32 %r554, %r335; + @%p100 bra $L__BB9_90; + + mov.u32 %r554, %r335; + mov.u32 %r553, %r33; + +$L__BB9_89: + mad.lo.s32 %r339, %r554, %r30, %r551; + mul.wide.s32 %rd89, %r339, 4; + add.s64 %rd90, %rd5, %rd89; + ld.local.f32 %f815, [%rd90]; + add.s64 %rd91, %rd6, %rd89; + st.local.f32 [%rd91], %f815; + add.s64 %rd92, %rd90, %rd16; + ld.local.f32 %f816, [%rd92]; + add.s64 %rd93, %rd91, %rd16; + st.local.f32 [%rd93], %f816; + add.s64 %rd94, %rd92, %rd16; + ld.local.f32 %f817, [%rd94]; + add.s64 %rd95, %rd93, %rd16; + st.local.f32 [%rd95], %f817; + add.s64 %rd96, %rd94, %rd16; + ld.local.f32 %f818, [%rd96]; + add.s64 %rd97, %rd95, %rd16; + st.local.f32 [%rd97], %f818; + add.s32 %r554, %r554, 4; + add.s32 %r553, %r553, -4; + setp.ne.s32 %p101, %r553, 0; + @%p101 bra $L__BB9_89; + +$L__BB9_90: + mad.lo.s32 %r40, %r554, %r30, %r551; + mul.wide.s32 %rd98, %r40, 4; + add.s64 %rd99, %rd5, %rd98; + ld.local.f32 %f819, [%rd99]; + add.s64 %rd100, %rd6, %rd98; + st.local.f32 [%rd100], %f819; + @%p102 bra $L__BB9_92; + + add.s32 %r340, %r40, %r30; + mul.wide.s32 %rd101, %r340, 4; + add.s64 %rd102, %rd5, %rd101; + ld.local.f32 %f820, [%rd102]; + add.s64 %rd103, %rd6, %rd101; + st.local.f32 [%rd103], %f820; + add.s32 %r341, %r340, %r30; + mul.wide.s32 %rd104, %r341, 4; + add.s64 %rd105, %rd5, %rd104; + ld.local.f32 %f821, [%rd105]; + add.s64 %rd106, %rd6, %rd104; + st.local.f32 [%rd106], %f821; + +$L__BB9_92: + add.s32 %r41, %r551, 1; + setp.lt.s32 %p103, %r551, %r29; + mov.u32 %r551, %r41; + @%p103 bra $L__BB9_87; + + add.s32 %r42, %r545, -1; + and.b32 %r582, %r545, 3; + sub.s32 %r579, %r545, %r582; + mov.u32 %r342, 0; + setp.lt.u32 %p104, %r42, 3; + setp.eq.s32 %p110, %r582, 0; + setp.eq.s32 %p112, %r582, 1; + setp.eq.s32 %p114, %r582, 2; + mov.u32 %r555, %r342; + +$L__BB9_94: + shl.b32 %r344, %r555, 1; + or.b32 %r345, %r344, 1; + mad.lo.s32 %r46, %r345, %r30, 1; + mul.lo.s32 %r47, %r344, %r30; + mov.u32 %r558, %r342; + @%p104 bra $L__BB9_105; + + mov.u32 %r558, %r342; + mov.u32 %r557, %r579; + +$L__BB9_96: + shl.b32 %r347, %r558, 1; + add.s32 %r348, %r46, %r347; + mul.wide.s32 %rd107, %r348, 4; + add.s64 %rd17, %rd6, %rd107; + add.s32 %r349, %r347, %r47; + mul.wide.s32 %rd108, %r349, 4; + add.s64 %rd18, %rd6, %rd108; + setp.eq.s32 %p105, %r555, %r558; + @%p105 bra $L__BB9_98; + + mov.u32 %r350, 0; + st.local.u32 [%rd17], %r350; + st.local.u32 [%rd18], %r350; + +$L__BB9_98: + add.s32 %r351, %r558, 1; + setp.eq.s32 %p106, %r555, %r351; + @%p106 bra $L__BB9_100; + + mov.u32 %r352, 0; + st.local.u32 [%rd17+8], %r352; + st.local.u32 [%rd18+8], %r352; + +$L__BB9_100: + add.s32 %r353, %r558, 2; + setp.eq.s32 %p107, %r555, %r353; + @%p107 bra $L__BB9_102; + + mov.u32 %r354, 0; + st.local.u32 [%rd17+16], %r354; + st.local.u32 [%rd18+16], %r354; + +$L__BB9_102: + add.s32 %r355, %r558, 3; + setp.eq.s32 %p108, %r555, %r355; + @%p108 bra $L__BB9_104; + + mov.u32 %r356, 0; + st.local.u32 [%rd17+24], %r356; + st.local.u32 [%rd18+24], %r356; + +$L__BB9_104: + add.s32 %r558, %r558, 4; + add.s32 %r557, %r557, -4; + setp.ne.s32 %p109, %r557, 0; + @%p109 bra $L__BB9_96; + +$L__BB9_105: + @%p110 bra $L__BB9_113; + + setp.eq.s32 %p111, %r555, %r558; + shl.b32 %r357, %r558, 1; + add.s32 %r358, %r46, %r357; + mul.wide.s32 %rd109, %r358, 4; + add.s64 %rd19, %rd6, %rd109; + add.s32 %r359, %r357, %r47; + mul.wide.s32 %rd110, %r359, 4; + add.s64 %rd20, %rd6, %rd110; + @%p111 bra $L__BB9_108; + + mov.u32 %r360, 0; + st.local.u32 [%rd19], %r360; + st.local.u32 [%rd20], %r360; + +$L__BB9_108: + @%p112 bra $L__BB9_113; + + add.s32 %r361, %r558, 1; + setp.eq.s32 %p113, %r555, %r361; + @%p113 bra $L__BB9_111; + + mov.u32 %r362, 0; + st.local.u32 [%rd19+8], %r362; + st.local.u32 [%rd20+8], %r362; + +$L__BB9_111: + add.s32 %r363, %r558, 2; + setp.eq.s32 %p115, %r555, %r363; + or.pred %p116, %p114, %p115; + @%p116 bra $L__BB9_113; + + mov.u32 %r364, 0; + st.local.u32 [%rd19+16], %r364; + st.local.u32 [%rd20+16], %r364; + +$L__BB9_113: + add.s32 %r555, %r555, 1; + setp.lt.s32 %p117, %r555, %r545; + @%p117 bra $L__BB9_94; + + mov.u32 %r365, 0; + mov.f32 %f845, 0f00000000; + mov.u32 %r559, %r365; + +$L__BB9_115: + add.s32 %r55, %r559, -1; + mul.lo.s32 %r56, %r559, %r30; + mov.u32 %r560, %r365; + +$L__BB9_116: + setp.eq.s32 %p118, %r560, 0; + @%p118 bra $L__BB9_125; + + add.s32 %r368, %r560, -1; + and.b32 %r58, %r560, 3; + setp.lt.u32 %p119, %r368, 3; + mov.u32 %r563, 0; + mov.f32 %f1201, 0f00000000; + @%p119 bra $L__BB9_120; + + sub.s32 %r562, %r560, %r58; + +$L__BB9_119: + mad.lo.s32 %r370, %r563, %r30, %r560; + mul.wide.s32 %rd112, %r370, 4; + add.s64 %rd113, %rd6, %rd112; + add.s32 %r371, %r563, %r56; + mul.wide.s32 %rd114, %r371, 4; + add.s64 %rd115, %rd6, %rd114; + ld.local.f32 %f825, [%rd115]; + ld.local.f32 %f826, [%rd113]; + fma.rn.f32 %f827, %f826, %f825, %f1201; + add.s64 %rd116, %rd113, %rd16; + ld.local.f32 %f828, [%rd115+4]; + ld.local.f32 %f829, [%rd116]; + fma.rn.f32 %f830, %f829, %f828, %f827; + add.s64 %rd117, %rd116, %rd16; + ld.local.f32 %f831, [%rd115+8]; + ld.local.f32 %f832, [%rd117]; + fma.rn.f32 %f833, %f832, %f831, %f830; + add.s64 %rd118, %rd117, %rd16; + ld.local.f32 %f834, [%rd115+12]; + ld.local.f32 %f835, [%rd118]; + fma.rn.f32 %f1201, %f835, %f834, %f833; + add.s32 %r563, %r563, 4; + add.s32 %r562, %r562, -4; + setp.ne.s32 %p120, %r562, 0; + @%p120 bra $L__BB9_119; + +$L__BB9_120: + setp.eq.s32 %p121, %r58, 0; + @%p121 bra $L__BB9_124; + + mad.lo.s32 %r65, %r563, %r30, %r560; + mul.wide.s32 %rd119, %r65, 4; + add.s64 %rd120, %rd6, %rd119; + add.s32 %r372, %r563, %r56; + mul.wide.s32 %rd121, %r372, 4; + add.s64 %rd22, %rd6, %rd121; + ld.local.f32 %f836, [%rd22]; + ld.local.f32 %f837, [%rd120]; + fma.rn.f32 %f1201, %f837, %f836, %f1201; + setp.eq.s32 %p122, %r58, 1; + @%p122 bra $L__BB9_124; + + add.s32 %r66, %r65, %r30; + mul.wide.s32 %rd122, %r66, 4; + add.s64 %rd123, %rd6, %rd122; + ld.local.f32 %f838, [%rd22+4]; + ld.local.f32 %f839, [%rd123]; + fma.rn.f32 %f1201, %f839, %f838, %f1201; + setp.eq.s32 %p123, %r58, 2; + @%p123 bra $L__BB9_124; + + add.s32 %r373, %r66, %r30; + mul.wide.s32 %rd124, %r373, 4; + add.s64 %rd125, %rd6, %rd124; + ld.local.f32 %f840, [%rd22+8]; + ld.local.f32 %f841, [%rd125]; + fma.rn.f32 %f1201, %f841, %f840, %f1201; + +$L__BB9_124: + add.s32 %r374, %r560, %r56; + mul.wide.s32 %rd126, %r374, 4; + add.s64 %rd127, %rd6, %rd126; + ld.local.f32 %f842, [%rd127]; + sub.f32 %f843, %f842, %f1201; + st.local.f32 [%rd127], %f843; + +$L__BB9_125: + add.s32 %r67, %r560, 1; + setp.lt.u32 %p124, %r560, %r559; + mov.u32 %r560, %r67; + @%p124 bra $L__BB9_116; + + setp.ge.s32 %p125, %r559, %r29; + @%p125 bra $L__BB9_139; + + add.s32 %r375, %r56, %r559; + mul.wide.s32 %rd128, %r375, 4; + add.s64 %rd23, %rd6, %rd128; + and.b32 %r68, %r559, 3; + sub.s32 %r69, %r559, %r68; + mov.u32 %r564, %r559; + +$L__BB9_128: + add.s32 %r564, %r564, 1; + setp.eq.s32 %p126, %r559, 0; + @%p126 bra $L__BB9_137; + + setp.lt.u32 %p127, %r55, 3; + mov.u32 %r567, 0; + mov.f32 %f1205, %f845; + @%p127 bra $L__BB9_132; + + mov.f32 %f1205, %f845; + mov.u32 %r566, %r69; + +$L__BB9_131: + mad.lo.s32 %r378, %r567, %r30, %r564; + mul.wide.s32 %rd129, %r378, 4; + add.s64 %rd130, %rd6, %rd129; + add.s32 %r379, %r567, %r56; + mul.wide.s32 %rd131, %r379, 4; + add.s64 %rd132, %rd6, %rd131; + ld.local.f32 %f847, [%rd132]; + ld.local.f32 %f848, [%rd130]; + fma.rn.f32 %f849, %f848, %f847, %f1205; + add.s64 %rd133, %rd130, %rd16; + ld.local.f32 %f850, [%rd132+4]; + ld.local.f32 %f851, [%rd133]; + fma.rn.f32 %f852, %f851, %f850, %f849; + add.s64 %rd134, %rd133, %rd16; + ld.local.f32 %f853, [%rd132+8]; + ld.local.f32 %f854, [%rd134]; + fma.rn.f32 %f855, %f854, %f853, %f852; + add.s64 %rd135, %rd134, %rd16; + ld.local.f32 %f856, [%rd132+12]; + ld.local.f32 %f857, [%rd135]; + fma.rn.f32 %f1205, %f857, %f856, %f855; + add.s32 %r567, %r567, 4; + add.s32 %r566, %r566, -4; + setp.ne.s32 %p128, %r566, 0; + @%p128 bra $L__BB9_131; + +$L__BB9_132: + setp.eq.s32 %p129, %r68, 0; + @%p129 bra $L__BB9_136; + + setp.eq.s32 %p130, %r68, 1; + mad.lo.s32 %r77, %r567, %r30, %r564; + mul.wide.s32 %rd136, %r77, 4; + add.s64 %rd137, %rd6, %rd136; + add.s32 %r380, %r567, %r56; + mul.wide.s32 %rd138, %r380, 4; + add.s64 %rd24, %rd6, %rd138; + ld.local.f32 %f858, [%rd24]; + ld.local.f32 %f859, [%rd137]; + fma.rn.f32 %f1205, %f859, %f858, %f1205; + @%p130 bra $L__BB9_136; + + setp.eq.s32 %p131, %r68, 2; + add.s32 %r78, %r77, %r30; + mul.wide.s32 %rd139, %r78, 4; + add.s64 %rd140, %rd6, %rd139; + ld.local.f32 %f860, [%rd24+4]; + ld.local.f32 %f861, [%rd140]; + fma.rn.f32 %f1205, %f861, %f860, %f1205; + @%p131 bra $L__BB9_136; + + add.s32 %r381, %r78, %r30; + mul.wide.s32 %rd141, %r381, 4; + add.s64 %rd142, %rd6, %rd141; + ld.local.f32 %f862, [%rd24+8]; + ld.local.f32 %f863, [%rd142]; + fma.rn.f32 %f1205, %f863, %f862, %f1205; + +$L__BB9_136: + ld.local.f32 %f864, [%rd23]; + rcp.rn.f32 %f865, %f864; + add.s32 %r382, %r564, %r56; + mul.wide.s32 %rd143, %r382, 4; + add.s64 %rd144, %rd6, %rd143; + ld.local.f32 %f866, [%rd144]; + sub.f32 %f867, %f866, %f1205; + mul.f32 %f868, %f865, %f867; + st.local.f32 [%rd144], %f868; + bra.uni $L__BB9_138; + +$L__BB9_137: + ld.local.f32 %f869, [%rd6]; + rcp.rn.f32 %f870, %f869; + mul.wide.s32 %rd145, %r564, 4; + add.s64 %rd146, %rd6, %rd145; + ld.local.f32 %f871, [%rd146]; + mul.f32 %f872, %f870, %f871; + st.local.f32 [%rd146], %f872; + +$L__BB9_138: + setp.lt.s32 %p132, %r564, %r29; + @%p132 bra $L__BB9_128; + +$L__BB9_139: + setp.lt.s32 %p133, %r559, %r29; + add.s32 %r559, %r559, 1; + @%p133 bra $L__BB9_115; + + cvt.s64.s32 %rd25, %r29; + add.s64 %rd26, %rd10, %rd88; + mad.lo.s32 %r384, %r30, %r29, %r29; + cvt.s64.s32 %rd27, %r384; + mul.wide.s32 %rd148, %r384, 4; + add.s64 %rd149, %rd6, %rd148; + ld.local.f32 %f129, [%rd149]; + mov.u32 %r385, 1; + sub.s32 %r80, %r385, %r29; + mov.u32 %r568, 0; + setp.eq.s32 %p135, %r545, 0; + mov.f32 %f898, 0f00000000; + +$L__BB9_141: + setp.eq.s32 %p134, %r568, 0; + selp.f32 %f873, 0f3F800000, 0f00000000, %p134; + st.local.f32 [%rd10], %f873; + @%p135 bra $L__BB9_151; + + mov.u32 %r386, 0; + mov.u32 %r569, %r385; + mov.u32 %r570, %r386; + +$L__BB9_143: + mov.u32 %r82, %r570; + mov.u32 %r570, %r569; + and.b32 %r84, %r570, 3; + setp.lt.u32 %p136, %r82, 3; + mov.f32 %f1209, 0f00000000; + mov.u32 %r573, %r386; + @%p136 bra $L__BB9_146; + + sub.s32 %r572, %r570, %r84; + mov.u32 %r573, %r386; + +$L__BB9_145: + mad.lo.s32 %r390, %r573, %r30, %r570; + mul.wide.s32 %rd150, %r390, 4; + add.s64 %rd151, %rd6, %rd150; + mul.wide.s32 %rd152, %r573, 4; + add.s64 %rd153, %rd10, %rd152; + ld.local.f32 %f877, [%rd153]; + ld.local.f32 %f878, [%rd151]; + fma.rn.f32 %f879, %f878, %f877, %f1209; + add.s64 %rd154, %rd151, %rd16; + ld.local.f32 %f880, [%rd153+4]; + ld.local.f32 %f881, [%rd154]; + fma.rn.f32 %f882, %f881, %f880, %f879; + add.s64 %rd155, %rd154, %rd16; + ld.local.f32 %f883, [%rd153+8]; + ld.local.f32 %f884, [%rd155]; + fma.rn.f32 %f885, %f884, %f883, %f882; + add.s64 %rd156, %rd155, %rd16; + ld.local.f32 %f886, [%rd153+12]; + ld.local.f32 %f887, [%rd156]; + fma.rn.f32 %f1209, %f887, %f886, %f885; + add.s32 %r573, %r573, 4; + add.s32 %r572, %r572, -4; + setp.ne.s32 %p137, %r572, 0; + @%p137 bra $L__BB9_145; + +$L__BB9_146: + setp.eq.s32 %p138, %r84, 0; + @%p138 bra $L__BB9_150; + + mad.lo.s32 %r91, %r573, %r30, %r570; + mul.wide.s32 %rd157, %r91, 4; + add.s64 %rd158, %rd6, %rd157; + mul.wide.s32 %rd159, %r573, 4; + add.s64 %rd30, %rd10, %rd159; + ld.local.f32 %f888, [%rd30]; + ld.local.f32 %f889, [%rd158]; + fma.rn.f32 %f1209, %f889, %f888, %f1209; + setp.eq.s32 %p139, %r84, 1; + @%p139 bra $L__BB9_150; + + add.s32 %r92, %r91, %r30; + mul.wide.s32 %rd160, %r92, 4; + add.s64 %rd161, %rd6, %rd160; + ld.local.f32 %f890, [%rd30+4]; + ld.local.f32 %f891, [%rd161]; + fma.rn.f32 %f1209, %f891, %f890, %f1209; + setp.eq.s32 %p140, %r84, 2; + @%p140 bra $L__BB9_150; + + add.s32 %r391, %r92, %r30; + mul.wide.s32 %rd162, %r391, 4; + add.s64 %rd163, %rd6, %rd162; + ld.local.f32 %f892, [%rd30+8]; + ld.local.f32 %f893, [%rd163]; + fma.rn.f32 %f1209, %f893, %f892, %f1209; + +$L__BB9_150: + setp.eq.s32 %p141, %r570, %r568; + selp.f32 %f894, 0f3F800000, 0f00000000, %p141; + sub.f32 %f895, %f894, %f1209; + mul.wide.s32 %rd164, %r570, 4; + add.s64 %rd165, %rd10, %rd164; + st.local.f32 [%rd165], %f895; + add.s32 %r569, %r570, 1; + setp.lt.s32 %p142, %r570, %r29; + @%p142 bra $L__BB9_143; + +$L__BB9_151: + ld.local.f32 %f896, [%rd26]; + div.rn.f32 %f897, %f896, %f129; + mul.lo.s32 %r94, %r568, %r30; + add.s32 %r392, %r94, %r29; + mul.wide.s32 %rd166, %r392, 4; + add.s64 %rd167, %rd4, %rd166; + st.local.f32 [%rd167], %f897; + @%p135 bra $L__BB9_161; + + mov.u32 %r574, 0; + mov.u32 %r575, %r29; + +$L__BB9_153: + mov.u32 %r96, %r575; + sub.s32 %r394, %r29, %r574; + max.s32 %r395, %r394, %r29; + add.s32 %r396, %r80, %r574; + add.s32 %r97, %r395, %r396; + add.s32 %r575, %r96, -1; + add.s32 %r397, %r96, %r94; + mul.wide.s32 %rd168, %r397, 4; + add.s64 %rd31, %rd4, %rd168; + setp.gt.s32 %p144, %r96, %r29; + mov.f32 %f1213, %f898; + @%p144 bra $L__BB9_160; + + and.b32 %r99, %r97, 3; + setp.eq.s32 %p145, %r99, 0; + mov.u32 %r576, %r96; + mov.f32 %f1213, %f898; + @%p145 bra $L__BB9_158; + + mad.lo.s32 %r100, %r96, %r30, %r575; + mul.wide.s32 %rd169, %r100, 4; + add.s64 %rd170, %rd6, %rd169; + ld.local.f32 %f901, [%rd31]; + ld.local.f32 %f902, [%rd170]; + fma.rn.f32 %f1213, %f902, %f901, 0f00000000; + add.s32 %r576, %r96, 1; + setp.eq.s32 %p146, %r99, 1; + @%p146 bra $L__BB9_158; + + add.s32 %r102, %r100, %r30; + mul.wide.s32 %rd171, %r102, 4; + add.s64 %rd172, %rd6, %rd171; + ld.local.f32 %f903, [%rd31+4]; + ld.local.f32 %f904, [%rd172]; + fma.rn.f32 %f1213, %f904, %f903, %f1213; + add.s32 %r576, %r96, 2; + setp.eq.s32 %p147, %r99, 2; + @%p147 bra $L__BB9_158; + + add.s32 %r398, %r102, %r30; + mul.wide.s32 %rd173, %r398, 4; + add.s64 %rd174, %rd6, %rd173; + ld.local.f32 %f905, [%rd31+8]; + ld.local.f32 %f906, [%rd174]; + fma.rn.f32 %f1213, %f906, %f905, %f1213; + add.s32 %r576, %r96, 3; + +$L__BB9_158: + add.s32 %r399, %r97, -1; + setp.lt.u32 %p148, %r399, 3; + @%p148 bra $L__BB9_160; + +$L__BB9_159: + mad.lo.s32 %r400, %r576, %r30, %r575; + mul.wide.s32 %rd175, %r400, 4; + add.s64 %rd176, %rd6, %rd175; + add.s32 %r401, %r576, %r94; + mul.wide.s32 %rd177, %r401, 4; + add.s64 %rd178, %rd4, %rd177; + ld.local.f32 %f907, [%rd178]; + ld.local.f32 %f908, [%rd176]; + fma.rn.f32 %f909, %f908, %f907, %f1213; + add.s64 %rd179, %rd176, %rd16; + ld.local.f32 %f910, [%rd178+4]; + ld.local.f32 %f911, [%rd179]; + fma.rn.f32 %f912, %f911, %f910, %f909; + add.s64 %rd180, %rd179, %rd16; + ld.local.f32 %f913, [%rd178+8]; + ld.local.f32 %f914, [%rd180]; + fma.rn.f32 %f915, %f914, %f913, %f912; + add.s64 %rd181, %rd180, %rd16; + ld.local.f32 %f916, [%rd178+12]; + ld.local.f32 %f917, [%rd181]; + fma.rn.f32 %f1213, %f917, %f916, %f915; + add.s32 %r107, %r576, 4; + add.s32 %r402, %r576, 3; + setp.lt.s32 %p149, %r402, %r29; + mov.u32 %r576, %r107; + @%p149 bra $L__BB9_159; + +$L__BB9_160: + mad.lo.s32 %r403, %r575, %r30, %r575; + mul.wide.s32 %rd182, %r403, 4; + add.s64 %rd183, %rd6, %rd182; + ld.local.f32 %f918, [%rd183]; + rcp.rn.f32 %f919, %f918; + mul.wide.s32 %rd184, %r575, 4; + add.s64 %rd185, %rd10, %rd184; + ld.local.f32 %f920, [%rd185]; + sub.f32 %f921, %f920, %f1213; + mul.f32 %f922, %f919, %f921; + st.local.f32 [%rd31+-4], %f922; + add.s32 %r574, %r574, 1; + setp.gt.s32 %p150, %r96, 1; + @%p150 bra $L__BB9_153; + +$L__BB9_161: + add.s32 %r109, %r568, 1; + setp.lt.s32 %p151, %r568, %r29; + mov.u32 %r568, %r109; + @%p151 bra $L__BB9_141; + + mov.u32 %r580, 0; + @%p104 bra $L__BB9_165; + + shl.b64 %rd186, %rd25, 2; + add.s64 %rd32, %rd186, 8; + +$L__BB9_164: + shl.b32 %r406, %r580, 1; + mad.lo.s32 %r407, %r406, %r30, %r406; + mul.wide.s32 %rd187, %r407, 4; + add.s64 %rd188, %rd4, %rd187; + ld.local.f32 %f923, [%rd188]; + abs.f32 %f924, %f923; + sqrt.rn.f32 %f925, %f924; + add.s32 %r408, %r580, %r3; + mul.wide.s32 %rd189, %r408, 4; + add.s64 %rd190, %rd2, %rd189; + st.global.f32 [%rd190], %f925; + add.s64 %rd191, %rd188, %rd32; + ld.local.f32 %f926, [%rd191]; + abs.f32 %f927, %f926; + sqrt.rn.f32 %f928, %f927; + add.s64 %rd192, %rd1, %rd189; + st.global.f32 [%rd192], %f928; + add.s64 %rd193, %rd191, %rd32; + ld.local.f32 %f929, [%rd193]; + abs.f32 %f930, %f929; + sqrt.rn.f32 %f931, %f930; + st.global.f32 [%rd190+4], %f931; + add.s64 %rd194, %rd193, %rd32; + ld.local.f32 %f932, [%rd194]; + abs.f32 %f933, %f932; + sqrt.rn.f32 %f934, %f933; + st.global.f32 [%rd192+4], %f934; + add.s64 %rd195, %rd194, %rd32; + ld.local.f32 %f935, [%rd195]; + abs.f32 %f936, %f935; + sqrt.rn.f32 %f937, %f936; + st.global.f32 [%rd190+8], %f937; + add.s64 %rd196, %rd195, %rd32; ld.local.f32 %f938, [%rd196]; - ld.local.f32 %f939, [%rd194]; - fma.rn.f32 %f1453, %f939, %f938, %f1450; - add.s32 %r855, %r853, 1; - -BB9_167: - setp.lt.u32 %p157, %r851, 4; - @%p157 bra BB9_169; - -BB9_168: - mad.lo.s32 %r515, %r855, %r425, %r851; - mul.wide.s32 %rd197, %r515, 4; - add.s64 %rd198, %rd5, %rd197; - mul.wide.s32 %rd199, %r855, 4; - add.s64 %rd200, %rd6, %rd199; - ld.local.f32 %f940, [%rd200]; - ld.local.f32 %f941, [%rd198]; - fma.rn.f32 %f942, %f941, %f940, %f1453; - add.s32 %r516, %r515, %r425; - mul.wide.s32 %rd201, %r516, 4; - add.s64 %rd202, %rd5, %rd201; - ld.local.f32 %f943, [%rd200+4]; - ld.local.f32 %f944, [%rd202]; - fma.rn.f32 %f945, %f944, %f943, %f942; - add.s32 %r517, %r855, 2; - mad.lo.s32 %r518, %r517, %r425, %r851; - mul.wide.s32 %rd203, %r518, 4; - add.s64 %rd204, %rd5, %rd203; - ld.local.f32 %f946, [%rd200+8]; - ld.local.f32 %f947, [%rd204]; - fma.rn.f32 %f948, %f947, %f946, %f945; - add.s32 %r519, %r855, 3; - mad.lo.s32 %r520, %r519, %r425, %r851; - mul.wide.s32 %rd205, %r520, 4; - add.s64 %rd206, %rd5, %rd205; - ld.local.f32 %f949, [%rd200+12]; - ld.local.f32 %f950, [%rd206]; - fma.rn.f32 %f1453, %f950, %f949, %f948; - add.s32 %r855, %r855, 4; - setp.lt.s32 %p158, %r519, %r503; - @%p158 bra BB9_168; - -BB9_169: - mul.wide.s32 %rd207, %r851, 4; - add.s64 %rd208, %rd6, %rd207; - setp.eq.s32 %p159, %r851, %r850; - selp.f32 %f951, 0f3F800000, 0f00000000, %p159; - sub.f32 %f952, %f951, %f1453; - st.local.f32 [%rd208], %f952; - add.s32 %r851, %r851, 1; - setp.lt.s32 %p160, %r851, %r425; - @%p160 bra BB9_160; - - ld.local.f32 %f953, [%rd17]; - div.rn.f32 %f954, %f953, %f139; - mul.lo.s32 %r87, %r850, %r425; - add.s32 %r525, %r87, %r26; - mul.wide.s32 %rd209, %r525, 4; - add.s64 %rd210, %rd3, %rd209; - st.local.f32 [%rd210], %f954; - mov.u32 %r856, 0; - setp.lt.s32 %p161, %r75, 0; - mov.u32 %r857, %r75; - @%p161 bra BB9_181; - -BB9_171: - add.s32 %r856, %r856, 1; - add.s32 %r861, %r857, 1; - add.s32 %r528, %r861, %r87; - mul.wide.s32 %rd211, %r528, 4; - add.s64 %rd18, %rd3, %rd211; - mov.f32 %f1458, 0f00000000; - setp.ge.s32 %p162, %r861, %r425; - @%p162 bra BB9_180; - - and.b32 %r91, %r856, 3; - setp.eq.s32 %p163, %r91, 0; - mov.f32 %f1458, 0f00000000; - @%p163 bra BB9_178; - - setp.eq.s32 %p164, %r91, 1; - add.s32 %r859, %r857, 1; - mov.f32 %f1455, 0f00000000; - @%p164 bra BB9_177; - - setp.eq.s32 %p165, %r91, 2; - add.s32 %r858, %r857, 1; - mov.f32 %f1454, 0f00000000; - @%p165 bra BB9_176; - - add.s32 %r530, %r857, 1; - mad.lo.s32 %r531, %r530, %r425, %r857; - mul.wide.s32 %rd212, %r531, 4; - add.s64 %rd213, %rd5, %rd212; - ld.local.f32 %f959, [%rd18]; - ld.local.f32 %f960, [%rd213]; - fma.rn.f32 %f1454, %f960, %f959, 0f00000000; - add.s32 %r858, %r857, 2; - -BB9_176: - mad.lo.s32 %r533, %r858, %r425, %r857; - mul.wide.s32 %rd214, %r533, 4; + abs.f32 %f939, %f938; + sqrt.rn.f32 %f940, %f939; + st.global.f32 [%rd192+8], %f940; + add.s64 %rd197, %rd196, %rd32; + ld.local.f32 %f941, [%rd197]; + abs.f32 %f942, %f941; + sqrt.rn.f32 %f943, %f942; + st.global.f32 [%rd190+12], %f943; + add.s64 %rd198, %rd197, %rd32; + ld.local.f32 %f944, [%rd198]; + abs.f32 %f945, %f944; + sqrt.rn.f32 %f946, %f945; + st.global.f32 [%rd192+12], %f946; + add.s32 %r580, %r580, 4; + add.s32 %r579, %r579, -4; + setp.ne.s32 %p153, %r579, 0; + @%p153 bra $L__BB9_164; + +$L__BB9_165: + @%p110 bra $L__BB9_167; + +$L__BB9_166: + .pragma "nounroll"; + shl.b32 %r409, %r580, 1; + mul.lo.s32 %r410, %r409, %r30; + add.s32 %r411, %r410, %r409; + mul.wide.s32 %rd199, %r411, 4; + add.s64 %rd200, %rd4, %rd199; + ld.local.f32 %f947, [%rd200]; + abs.f32 %f948, %f947; + sqrt.rn.f32 %f949, %f948; + add.s32 %r412, %r580, %r3; + mul.wide.s32 %rd201, %r412, 4; + add.s64 %rd202, %rd2, %rd201; + st.global.f32 [%rd202], %f949; + add.s32 %r413, %r410, %r30; + add.s32 %r414, %r413, %r409; + add.s32 %r415, %r414, 1; + mul.wide.s32 %rd203, %r415, 4; + add.s64 %rd204, %rd4, %rd203; + ld.local.f32 %f950, [%rd204]; + abs.f32 %f951, %f950; + sqrt.rn.f32 %f952, %f951; + add.s64 %rd205, %rd1, %rd201; + st.global.f32 [%rd205], %f952; + add.s32 %r580, %r580, 1; + add.s32 %r582, %r582, -1; + setp.ne.s32 %p155, %r582, 0; + @%p155 bra $L__BB9_166; + +$L__BB9_167: + mov.u32 %r416, 0; + mov.f64 %fd24, 0d4000000000000000; + mov.u32 %r583, %r416; + +$L__BB9_168: + add.s32 %r418, %r583, %r3; + mul.wide.s32 %rd206, %r418, 4; + add.s64 %rd33, %rd8, %rd206; + add.s64 %rd34, %rd2, %rd206; + add.s64 %rd35, %rd7, %rd206; + add.s64 %rd36, %rd1, %rd206; + shl.b32 %r419, %r583, 1; + or.b32 %r420, %r419, 1; + mad.lo.s32 %r120, %r420, %r30, 1; + mul.lo.s32 %r121, %r419, %r30; + mov.u32 %r584, %r416; + +$L__BB9_169: + setp.eq.s32 %p156, %r583, %r584; + @%p156 bra $L__BB9_197; + + ld.global.f32 %f953, [%rd33]; + add.s32 %r123, %r584, %r3; + mul.wide.s32 %rd207, %r123, 4; + add.s64 %rd208, %rd8, %rd207; + ld.global.f32 %f954, [%rd208]; + sub.f32 %f955, %f953, %f954; + abs.f32 %f146, %f955; + cvt.f64.f32 %fd1, %f146; + { + .reg .b32 %temp; + mov.b64 {%temp, %r124}, %fd1; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r125}, %fd24; + } + and.b32 %r126, %r125, 2146435072; + setp.eq.s32 %p157, %r126, 1062207488; + abs.f64 %fd2, %fd1; + { // callseq 182, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd2; + .param .b64 param1; + st.param.f64 [param1+0], %fd24; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd44, [retval0+0]; + } // callseq 182 + setp.lt.s32 %p158, %r124, 0; + and.pred %p5, %p158, %p157; + not.pred %p159, %p5; + @%p159 bra $L__BB9_172; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r421}, %fd44; + } + xor.b32 %r422, %r421, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r423, %temp}, %fd44; + } + mov.b64 %fd44, {%r423, %r422}; + +$L__BB9_172: + setp.eq.f32 %p160, %f146, 0f00000000; + @%p160 bra $L__BB9_176; + bra.uni $L__BB9_173; + +$L__BB9_176: + selp.b32 %r424, %r124, 0, %p157; + mov.u32 %r425, 0; + or.b32 %r426, %r424, 2146435072; + setp.lt.s32 %p164, %r125, 0; + selp.b32 %r427, %r426, %r424, %p164; + mov.b64 %fd44, {%r425, %r427}; + bra.uni $L__BB9_177; + +$L__BB9_173: + setp.gt.s32 %p161, %r124, -1; + @%p161 bra $L__BB9_177; + + cvt.rzi.f64.f64 %fd26, %fd24; + setp.eq.f64 %p162, %fd26, 0d4000000000000000; + @%p162 bra $L__BB9_177; + + mov.f64 %fd44, 0dFFF8000000000000; + +$L__BB9_177: + add.f64 %fd8, %fd1, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r428}, %fd8; + } + and.b32 %r429, %r428, 2146435072; + setp.ne.s32 %p165, %r429, 2146435072; + mov.f64 %fd45, %fd44; + @%p165 bra $L__BB9_183; + + setp.gtu.f64 %p166, %fd2, 0d7FF0000000000000; + mov.f64 %fd45, %fd8; + @%p166 bra $L__BB9_183; + + { + .reg .b32 %temp; + mov.b64 {%r430, %temp}, %fd24; + } + and.b32 %r127, %r125, 2147483647; + setp.eq.s32 %p167, %r127, 2146435072; + setp.eq.s32 %p168, %r430, 0; + and.pred %p169, %p167, %p168; + @%p169 bra $L__BB9_182; + bra.uni $L__BB9_180; + +$L__BB9_182: + setp.gt.f64 %p176, %fd2, 0d3FF0000000000000; + selp.b32 %r437, 2146435072, 0, %p176; + mov.u32 %r438, 0; + xor.b32 %r439, %r437, 2146435072; + setp.lt.s32 %p177, %r125, 0; + selp.b32 %r440, %r439, %r437, %p177; + setp.eq.f32 %p178, %f146, 0fBF800000; + selp.b32 %r441, 1072693248, %r440, %p178; + mov.b64 %fd45, {%r438, %r441}; + bra.uni $L__BB9_183; + +$L__BB9_180: + { + .reg .b32 %temp; + mov.b64 {%r431, %temp}, %fd1; + } + and.b32 %r432, %r124, 2147483647; + setp.ne.s32 %p170, %r432, 2146435072; + setp.ne.s32 %p171, %r431, 0; + or.pred %p172, %p170, %p171; + mov.f64 %fd45, %fd44; + @%p172 bra $L__BB9_183; + + setp.gt.s32 %p173, %r125, -1; + selp.b32 %r433, 2146435072, 0, %p173; + mov.u32 %r434, 0; + setp.ne.s32 %p174, %r127, 1071644672; + and.pred %p175, %p174, %p5; + or.b32 %r435, %r433, -2147483648; + selp.b32 %r436, %r435, %r433, %p175; + mov.b64 %fd45, {%r434, %r436}; + +$L__BB9_183: + setp.eq.f32 %p179, %f146, 0f3F800000; + selp.f64 %fd29, 0d3FF0000000000000, %fd45, %p179; + ld.global.f32 %f956, [%rd34]; + cvt.f64.f32 %fd30, %f956; + div.rn.f64 %fd31, %fd29, %fd30; + add.s64 %rd210, %rd2, %rd207; + ld.global.f32 %f957, [%rd210]; + cvt.f64.f32 %fd32, %f957; + div.rn.f64 %fd12, %fd31, %fd32; + add.s64 %rd211, %rd7, %rd207; + ld.global.f32 %f958, [%rd211]; + ld.global.f32 %f959, [%rd35]; + sub.f32 %f960, %f959, %f958; + abs.f32 %f147, %f960; + cvt.f64.f32 %fd13, %f147; + { + .reg .b32 %temp; + mov.b64 {%temp, %r128}, %fd13; + } + abs.f64 %fd14, %fd13; + { // callseq 183, 0 + .reg .b32 temp_param_reg; + .param .b64 param0; + st.param.f64 [param0+0], %fd14; + .param .b64 param1; + st.param.f64 [param1+0], %fd24; + .param .b64 retval0; + call.uni (retval0), + __internal_accurate_pow, + ( + param0, + param1 + ); + ld.param.f64 %fd47, [retval0+0]; + } // callseq 183 + setp.lt.s32 %p180, %r128, 0; + and.pred %p6, %p180, %p157; + not.pred %p182, %p6; + @%p182 bra $L__BB9_185; + + { + .reg .b32 %temp; + mov.b64 {%temp, %r442}, %fd47; + } + xor.b32 %r443, %r442, -2147483648; + { + .reg .b32 %temp; + mov.b64 {%r444, %temp}, %fd47; + } + mov.b64 %fd47, {%r444, %r443}; + +$L__BB9_185: + setp.eq.f32 %p183, %f147, 0f00000000; + @%p183 bra $L__BB9_189; + bra.uni $L__BB9_186; + +$L__BB9_189: + selp.b32 %r445, %r128, 0, %p157; + mov.u32 %r446, 0; + or.b32 %r447, %r445, 2146435072; + setp.lt.s32 %p187, %r125, 0; + selp.b32 %r448, %r447, %r445, %p187; + mov.b64 %fd47, {%r446, %r448}; + bra.uni $L__BB9_190; + +$L__BB9_186: + setp.gt.s32 %p184, %r128, -1; + @%p184 bra $L__BB9_190; + + cvt.rzi.f64.f64 %fd35, %fd24; + setp.eq.f64 %p185, %fd35, 0d4000000000000000; + @%p185 bra $L__BB9_190; + + mov.f64 %fd47, 0dFFF8000000000000; + +$L__BB9_190: + add.f64 %fd20, %fd13, 0d4000000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r449}, %fd20; + } + and.b32 %r450, %r449, 2146435072; + setp.ne.s32 %p188, %r450, 2146435072; + mov.f64 %fd48, %fd47; + @%p188 bra $L__BB9_196; + + setp.gtu.f64 %p189, %fd14, 0d7FF0000000000000; + mov.f64 %fd48, %fd20; + @%p189 bra $L__BB9_196; + + { + .reg .b32 %temp; + mov.b64 {%r451, %temp}, %fd24; + } + and.b32 %r129, %r125, 2147483647; + setp.eq.s32 %p190, %r129, 2146435072; + setp.eq.s32 %p191, %r451, 0; + and.pred %p192, %p190, %p191; + @%p192 bra $L__BB9_195; + bra.uni $L__BB9_193; + +$L__BB9_195: + setp.gt.f64 %p199, %fd14, 0d3FF0000000000000; + selp.b32 %r458, 2146435072, 0, %p199; + mov.u32 %r459, 0; + xor.b32 %r460, %r458, 2146435072; + setp.lt.s32 %p200, %r125, 0; + selp.b32 %r461, %r460, %r458, %p200; + setp.eq.f32 %p201, %f147, 0fBF800000; + selp.b32 %r462, 1072693248, %r461, %p201; + mov.b64 %fd48, {%r459, %r462}; + bra.uni $L__BB9_196; + +$L__BB9_193: + { + .reg .b32 %temp; + mov.b64 {%r452, %temp}, %fd13; + } + and.b32 %r453, %r128, 2147483647; + setp.ne.s32 %p193, %r453, 2146435072; + setp.ne.s32 %p194, %r452, 0; + or.pred %p195, %p193, %p194; + mov.f64 %fd48, %fd47; + @%p195 bra $L__BB9_196; + + setp.gt.s32 %p196, %r125, -1; + selp.b32 %r454, 2146435072, 0, %p196; + mov.u32 %r455, 0; + setp.ne.s32 %p197, %r129, 1071644672; + and.pred %p198, %p197, %p6; + or.b32 %r456, %r454, -2147483648; + selp.b32 %r457, %r456, %r454, %p198; + mov.b64 %fd48, {%r455, %r457}; + +$L__BB9_196: + cvt.rn.f32.f64 %f961, %fd12; + setp.eq.f32 %p202, %f147, 0f3F800000; + selp.f64 %fd38, 0d3FF0000000000000, %fd48, %p202; + ld.global.f32 %f962, [%rd36]; + cvt.f64.f32 %fd39, %f962; + div.rn.f64 %fd40, %fd38, %fd39; + add.s64 %rd213, %rd1, %rd207; + ld.global.f32 %f963, [%rd213]; + cvt.f64.f32 %fd41, %f963; + div.rn.f64 %fd42, %fd40, %fd41; + cvt.rn.f32.f64 %f964, %fd42; + add.f32 %f965, %f964, 0f3F800000; + div.rn.f32 %f966, %f964, %f965; + shl.b32 %r463, %r584, 1; + add.s32 %r464, %r120, %r463; + mul.wide.s32 %rd214, %r464, 4; add.s64 %rd215, %rd5, %rd214; - add.s32 %r534, %r858, %r87; - mul.wide.s32 %rd216, %r534, 4; - add.s64 %rd217, %rd3, %rd216; - ld.local.f32 %f961, [%rd217]; - ld.local.f32 %f962, [%rd215]; - fma.rn.f32 %f1455, %f962, %f961, %f1454; - add.s32 %r859, %r858, 1; - -BB9_177: - mad.lo.s32 %r536, %r859, %r425, %r857; - mul.wide.s32 %rd218, %r536, 4; - add.s64 %rd219, %rd5, %rd218; - add.s32 %r537, %r859, %r87; - mul.wide.s32 %rd220, %r537, 4; - add.s64 %rd221, %rd3, %rd220; - ld.local.f32 %f963, [%rd221]; - ld.local.f32 %f964, [%rd219]; - fma.rn.f32 %f1458, %f964, %f963, %f1455; - add.s32 %r861, %r859, 1; - -BB9_178: - setp.lt.u32 %p166, %r856, 4; - @%p166 bra BB9_180; - -BB9_179: - mad.lo.s32 %r539, %r861, %r425, %r857; - mul.wide.s32 %rd222, %r539, 4; - add.s64 %rd223, %rd5, %rd222; - add.s32 %r540, %r861, %r87; - mul.wide.s32 %rd224, %r540, 4; - add.s64 %rd225, %rd3, %rd224; - ld.local.f32 %f965, [%rd225]; - ld.local.f32 %f966, [%rd223]; - fma.rn.f32 %f967, %f966, %f965, %f1458; - add.s32 %r541, %r539, %r425; - mul.wide.s32 %rd226, %r541, 4; + ld.local.f32 %f967, [%rd215]; + mul.f32 %f968, %f967, %f966; + st.local.f32 [%rd215], %f968; + add.f32 %f969, %f961, 0f3F800000; + div.rn.f32 %f970, %f961, %f969; + add.s32 %r465, %r463, %r121; + mul.wide.s32 %rd216, %r465, 4; + add.s64 %rd217, %rd5, %rd216; + ld.local.f32 %f971, [%rd217]; + mul.f32 %f972, %f970, %f971; + st.local.f32 [%rd217], %f972; + +$L__BB9_197: + add.s32 %r584, %r584, 1; + setp.lt.s32 %p203, %r584, %r545; + @%p203 bra $L__BB9_169; + + add.s32 %r583, %r583, 1; + setp.lt.s32 %p204, %r583, %r545; + @%p204 bra $L__BB9_168; + + mov.u32 %r466, 0; + st.local.u32 [%rd4], %r466; + st.local.u32 [%rd4+4], %r466; + st.local.u32 [%rd4+8], %r466; + st.local.u32 [%rd4+12], %r466; + st.local.u32 [%rd4+16], %r466; + st.local.u32 [%rd4+20], %r466; + st.local.u32 [%rd4+24], %r466; + st.local.u32 [%rd4+28], %r466; + st.local.u32 [%rd4+32], %r466; + st.local.u32 [%rd4+36], %r466; + st.local.u32 [%rd4+40], %r466; + st.local.u32 [%rd4+44], %r466; + st.local.u32 [%rd4+48], %r466; + st.local.u32 [%rd4+52], %r466; + st.local.u32 [%rd4+56], %r466; + st.local.u32 [%rd4+60], %r466; + st.local.u32 [%rd4+64], %r466; + st.local.u32 [%rd4+68], %r466; + st.local.u32 [%rd4+72], %r466; + st.local.u32 [%rd4+76], %r466; + st.local.u32 [%rd4+80], %r466; + st.local.u32 [%rd4+84], %r466; + st.local.u32 [%rd4+88], %r466; + st.local.u32 [%rd4+92], %r466; + st.local.u32 [%rd4+96], %r466; + st.local.u32 [%rd4+100], %r466; + st.local.u32 [%rd4+104], %r466; + st.local.u32 [%rd4+108], %r466; + st.local.u32 [%rd4+112], %r466; + st.local.u32 [%rd4+116], %r466; + st.local.u32 [%rd4+120], %r466; + st.local.u32 [%rd4+124], %r466; + st.local.u32 [%rd4+128], %r466; + st.local.u32 [%rd4+132], %r466; + st.local.u32 [%rd4+136], %r466; + st.local.u32 [%rd4+140], %r466; + st.local.u32 [%rd4+144], %r466; + st.local.u32 [%rd4+148], %r466; + st.local.u32 [%rd4+152], %r466; + st.local.u32 [%rd4+156], %r466; + st.local.u32 [%rd4+160], %r466; + st.local.u32 [%rd4+164], %r466; + st.local.u32 [%rd4+168], %r466; + st.local.u32 [%rd4+172], %r466; + st.local.u32 [%rd4+176], %r466; + st.local.u32 [%rd4+180], %r466; + st.local.u32 [%rd4+184], %r466; + st.local.u32 [%rd4+188], %r466; + st.local.u32 [%rd4+192], %r466; + st.local.u32 [%rd4+196], %r466; + st.local.u32 [%rd4+200], %r466; + st.local.u32 [%rd4+204], %r466; + st.local.u32 [%rd4+208], %r466; + st.local.u32 [%rd4+212], %r466; + st.local.u32 [%rd4+216], %r466; + st.local.u32 [%rd4+220], %r466; + st.local.u32 [%rd4+224], %r466; + st.local.u32 [%rd4+228], %r466; + st.local.u32 [%rd4+232], %r466; + st.local.u32 [%rd4+236], %r466; + st.local.u32 [%rd4+240], %r466; + st.local.u32 [%rd4+244], %r466; + st.local.u32 [%rd4+248], %r466; + st.local.u32 [%rd4+252], %r466; + st.local.u32 [%rd4+256], %r466; + st.local.u32 [%rd4+260], %r466; + st.local.u32 [%rd4+264], %r466; + st.local.u32 [%rd4+268], %r466; + st.local.u32 [%rd4+272], %r466; + st.local.u32 [%rd4+276], %r466; + st.local.u32 [%rd4+280], %r466; + st.local.u32 [%rd4+284], %r466; + st.local.u32 [%rd4+288], %r466; + st.local.u32 [%rd4+292], %r466; + st.local.u32 [%rd4+296], %r466; + st.local.u32 [%rd4+300], %r466; + st.local.u32 [%rd4+304], %r466; + st.local.u32 [%rd4+308], %r466; + st.local.u32 [%rd4+312], %r466; + st.local.u32 [%rd4+316], %r466; + st.local.u32 [%rd4+320], %r466; + st.local.u32 [%rd4+324], %r466; + st.local.u32 [%rd4+328], %r466; + st.local.u32 [%rd4+332], %r466; + st.local.u32 [%rd4+336], %r466; + st.local.u32 [%rd4+340], %r466; + st.local.u32 [%rd4+344], %r466; + st.local.u32 [%rd4+348], %r466; + st.local.u32 [%rd4+352], %r466; + st.local.u32 [%rd4+356], %r466; + st.local.u32 [%rd4+360], %r466; + st.local.u32 [%rd4+364], %r466; + st.local.u32 [%rd4+368], %r466; + st.local.u32 [%rd4+372], %r466; + st.local.u32 [%rd4+376], %r466; + st.local.u32 [%rd4+380], %r466; + st.local.u32 [%rd4+384], %r466; + st.local.u32 [%rd4+388], %r466; + st.local.u32 [%rd4+392], %r466; + st.local.u32 [%rd4+396], %r466; + st.local.u32 [%rd4+400], %r466; + st.local.u32 [%rd4+404], %r466; + st.local.u32 [%rd4+408], %r466; + st.local.u32 [%rd4+412], %r466; + st.local.u32 [%rd4+416], %r466; + st.local.u32 [%rd4+420], %r466; + st.local.u32 [%rd4+424], %r466; + st.local.u32 [%rd4+428], %r466; + st.local.u32 [%rd4+432], %r466; + st.local.u32 [%rd4+436], %r466; + st.local.u32 [%rd4+440], %r466; + st.local.u32 [%rd4+444], %r466; + st.local.u32 [%rd4+448], %r466; + st.local.u32 [%rd4+452], %r466; + st.local.u32 [%rd4+456], %r466; + st.local.u32 [%rd4+460], %r466; + st.local.u32 [%rd4+464], %r466; + st.local.u32 [%rd4+468], %r466; + st.local.u32 [%rd4+472], %r466; + st.local.u32 [%rd4+476], %r466; + st.local.u32 [%rd4+480], %r466; + shl.b64 %rd218, %rd25, 2; + add.s64 %rd37, %rd218, 4; + mov.f32 %f996, 0f00000000; + mov.u32 %r585, %r466; + +$L__BB9_200: + add.s32 %r133, %r585, -1; + mul.lo.s32 %r134, %r585, %r30; + mov.u32 %r586, %r466; + +$L__BB9_201: + setp.eq.s32 %p205, %r586, 0; + @%p205 bra $L__BB9_210; + + add.s32 %r469, %r586, -1; + and.b32 %r136, %r586, 3; + setp.lt.u32 %p206, %r469, 3; + mov.u32 %r589, 0; + mov.f32 %f1217, 0f00000000; + @%p206 bra $L__BB9_205; + + sub.s32 %r588, %r586, %r136; + +$L__BB9_204: + mad.lo.s32 %r471, %r589, %r30, %r586; + mul.wide.s32 %rd219, %r471, 4; + add.s64 %rd220, %rd5, %rd219; + add.s32 %r472, %r589, %r134; + mul.wide.s32 %rd221, %r472, 4; + add.s64 %rd222, %rd5, %rd221; + ld.local.f32 %f976, [%rd222]; + ld.local.f32 %f977, [%rd220]; + fma.rn.f32 %f978, %f977, %f976, %f1217; + add.s64 %rd223, %rd220, %rd37; + ld.local.f32 %f979, [%rd222+4]; + ld.local.f32 %f980, [%rd223]; + fma.rn.f32 %f981, %f980, %f979, %f978; + add.s64 %rd224, %rd223, %rd37; + ld.local.f32 %f982, [%rd222+8]; + ld.local.f32 %f983, [%rd224]; + fma.rn.f32 %f984, %f983, %f982, %f981; + add.s64 %rd225, %rd224, %rd37; + ld.local.f32 %f985, [%rd222+12]; + ld.local.f32 %f986, [%rd225]; + fma.rn.f32 %f1217, %f986, %f985, %f984; + add.s32 %r589, %r589, 4; + add.s32 %r588, %r588, -4; + setp.ne.s32 %p207, %r588, 0; + @%p207 bra $L__BB9_204; + +$L__BB9_205: + setp.eq.s32 %p208, %r136, 0; + @%p208 bra $L__BB9_209; + + mad.lo.s32 %r143, %r589, %r30, %r586; + mul.wide.s32 %rd226, %r143, 4; add.s64 %rd227, %rd5, %rd226; - ld.local.f32 %f968, [%rd225+4]; - ld.local.f32 %f969, [%rd227]; - fma.rn.f32 %f970, %f969, %f968, %f967; - add.s32 %r542, %r861, 2; - mad.lo.s32 %r543, %r542, %r425, %r857; - mul.wide.s32 %rd228, %r543, 4; - add.s64 %rd229, %rd5, %rd228; - ld.local.f32 %f971, [%rd225+8]; - ld.local.f32 %f972, [%rd229]; - fma.rn.f32 %f973, %f972, %f971, %f970; - add.s32 %r544, %r861, 3; - mad.lo.s32 %r545, %r544, %r425, %r857; - mul.wide.s32 %rd230, %r545, 4; - add.s64 %rd231, %rd5, %rd230; - ld.local.f32 %f974, [%rd225+12]; - ld.local.f32 %f975, [%rd231]; - fma.rn.f32 %f1458, %f975, %f974, %f973; - add.s32 %r861, %r861, 4; - setp.lt.s32 %p167, %r861, %r425; - @%p167 bra BB9_179; - -BB9_180: - mad.lo.s32 %r547, %r857, %r425, %r857; - mul.wide.s32 %rd232, %r547, 4; - add.s64 %rd233, %rd5, %rd232; - ld.local.f32 %f976, [%rd233]; - rcp.rn.f32 %f977, %f976; - mul.wide.s32 %rd234, %r857, 4; - add.s64 %rd235, %rd6, %rd234; - ld.local.f32 %f978, [%rd235]; - sub.f32 %f979, %f978, %f1458; - mul.f32 %f980, %f977, %f979; - st.local.f32 [%rd18+-4], %f980; - add.s32 %r857, %r857, -1; - setp.gt.s32 %p168, %r857, -1; - @%p168 bra BB9_171; - -BB9_181: - add.s32 %r850, %r850, 1; - setp.lt.s32 %p169, %r850, %r425; - @%p169 bra BB9_159; - -BB9_182: - @%p112 bra BB9_190; - - mov.u32 %r551, 1; - max.s32 %r105, %r425, %r551; - and.b32 %r106, %r105, 3; - setp.eq.s32 %p171, %r106, 1; - mov.u32 %r863, 0; - @%p171 bra BB9_187; - - setp.eq.s32 %p172, %r106, 2; - mov.u32 %r862, 0; - @%p172 bra BB9_186; - - ld.local.f32 %f981, [%rd3]; - st.local.f32 [%rd7], %f981; - mov.u32 %r862, %r551; - -BB9_186: - neg.s32 %r555, %r862; - and.b32 %r556, %r425, %r555; - add.s32 %r557, %r556, %r862; - mul.wide.s32 %rd236, %r557, 4; - add.s64 %rd237, %rd3, %rd236; - ld.local.f32 %f982, [%rd237]; - mul.wide.u32 %rd238, %r862, 4; - add.s64 %rd239, %rd7, %rd238; - st.local.f32 [%rd239], %f982; - add.s32 %r863, %r862, 1; - -BB9_187: - mad.lo.s32 %r559, %r863, %r425, %r863; - mul.wide.s32 %rd240, %r559, 4; - add.s64 %rd241, %rd3, %rd240; - ld.local.f32 %f983, [%rd241]; - mul.wide.s32 %rd242, %r863, 4; - add.s64 %rd243, %rd7, %rd242; - st.local.f32 [%rd243], %f983; - setp.lt.u32 %p173, %r105, 4; - @%p173 bra BB9_190; - - add.s32 %r864, %r863, 1; - -BB9_189: - mul.lo.s32 %r561, %r864, %r425; - add.s32 %r562, %r561, %r864; - mul.wide.s32 %rd244, %r562, 4; - add.s64 %rd245, %rd3, %rd244; - ld.local.f32 %f984, [%rd245]; - mul.wide.s32 %rd246, %r864, 4; - add.s64 %rd247, %rd7, %rd246; - st.local.f32 [%rd247], %f984; - add.s32 %r563, %r561, %r425; - add.s32 %r564, %r864, %r563; - add.s32 %r565, %r564, 1; - mul.wide.s32 %rd248, %r565, 4; - add.s64 %rd249, %rd3, %rd248; - ld.local.f32 %f985, [%rd249]; - st.local.f32 [%rd247+4], %f985; - add.s32 %r566, %r864, 2; - mad.lo.s32 %r567, %r566, %r425, %r566; - mul.wide.s32 %rd250, %r567, 4; - add.s64 %rd251, %rd3, %rd250; - ld.local.f32 %f986, [%rd251]; - st.local.f32 [%rd247+8], %f986; - add.s32 %r568, %r864, 3; - mad.lo.s32 %r569, %r568, %r425, %r568; - mul.wide.s32 %rd252, %r569, 4; - add.s64 %rd253, %rd3, %rd252; - ld.local.f32 %f987, [%rd253]; - st.local.f32 [%rd247+12], %f987; - add.s32 %r864, %r864, 4; - setp.lt.s32 %p174, %r864, %r425; - @%p174 bra BB9_189; - -BB9_190: - mov.u32 %r868, 0; - mul.wide.s32 %rd254, %r1, 4; - add.s64 %rd20, %rd2, %rd254; - add.s64 %rd21, %rd1, %rd254; - @%p118 bra BB9_196; - - setp.eq.s32 %p176, %r37, 1; - mov.u32 %r866, 0; - @%p176 bra BB9_195; - - setp.eq.s32 %p177, %r37, 2; - mov.u32 %r865, 0; - @%p177 bra BB9_194; - - ld.local.f32 %f988, [%rd3]; - abs.f32 %f989, %f988; - sqrt.rn.f32 %f990, %f989; - st.global.f32 [%rd20], %f990; - add.s32 %r574, %r26, 2; - mul.wide.s32 %rd255, %r574, 4; - add.s64 %rd256, %rd3, %rd255; - ld.local.f32 %f991, [%rd256]; - abs.f32 %f992, %f991; - sqrt.rn.f32 %f993, %f992; - st.global.f32 [%rd21], %f993; - mov.u32 %r865, 1; - -BB9_194: - shl.b32 %r576, %r865, 1; - mul.lo.s32 %r577, %r576, %r425; - add.s32 %r578, %r577, %r576; - mul.wide.s32 %rd257, %r578, 4; - add.s64 %rd258, %rd3, %rd257; - ld.local.f32 %f994, [%rd258]; - abs.f32 %f995, %f994; - sqrt.rn.f32 %f996, %f995; - add.s32 %r579, %r865, %r1; - mul.wide.s32 %rd259, %r579, 4; - add.s64 %rd260, %rd2, %rd259; - st.global.f32 [%rd260], %f996; - add.s32 %r580, %r577, %r425; - add.s32 %r581, %r576, %r580; - add.s32 %r582, %r581, 1; - mul.wide.s32 %rd261, %r582, 4; - add.s64 %rd262, %rd3, %rd261; - ld.local.f32 %f997, [%rd262]; - abs.f32 %f998, %f997; - sqrt.rn.f32 %f999, %f998; - add.s64 %rd263, %rd1, %rd259; - st.global.f32 [%rd263], %f999; - add.s32 %r866, %r865, 1; - -BB9_195: - shl.b32 %r584, %r866, 1; - mul.lo.s32 %r585, %r584, %r425; - add.s32 %r586, %r585, %r584; - mul.wide.s32 %rd264, %r586, 4; - add.s64 %rd265, %rd3, %rd264; - ld.local.f32 %f1000, [%rd265]; - abs.f32 %f1001, %f1000; - sqrt.rn.f32 %f1002, %f1001; - add.s32 %r587, %r866, %r1; - mul.wide.s32 %rd266, %r587, 4; - add.s64 %rd267, %rd2, %rd266; - st.global.f32 [%rd267], %f1002; - add.s32 %r588, %r585, %r425; - add.s32 %r589, %r584, %r588; - add.s32 %r590, %r589, 1; - mul.wide.s32 %rd268, %r590, 4; - add.s64 %rd269, %rd3, %rd268; - ld.local.f32 %f1003, [%rd269]; - abs.f32 %f1004, %f1003; - sqrt.rn.f32 %f1005, %f1004; - add.s64 %rd270, %rd1, %rd266; - st.global.f32 [%rd270], %f1005; - add.s32 %r868, %r866, 1; - -BB9_196: - @%p124 bra BB9_198; - -BB9_197: - mad.lo.s32 %r591, %r824, 4, 2; - mul.wide.s32 %rd271, %r591, 4; - add.s64 %rd272, %rd271, 8; - shl.b32 %r593, %r868, 1; - mul.lo.s32 %r594, %r593, %r425; - add.s32 %r595, %r594, %r593; - mul.wide.s32 %rd273, %r595, 4; - add.s64 %rd274, %rd3, %rd273; - ld.local.f32 %f1006, [%rd274]; - abs.f32 %f1007, %f1006; - sqrt.rn.f32 %f1008, %f1007; - add.s32 %r596, %r868, %r1; - mul.wide.s32 %rd275, %r596, 4; - add.s64 %rd276, %rd2, %rd275; - st.global.f32 [%rd276], %f1008; - add.s32 %r597, %r594, %r425; - add.s32 %r598, %r593, %r597; - add.s32 %r599, %r598, 1; - mul.wide.s32 %rd277, %r599, 4; - add.s64 %rd278, %rd3, %rd277; - ld.local.f32 %f1009, [%rd278]; - abs.f32 %f1010, %f1009; - sqrt.rn.f32 %f1011, %f1010; - add.s64 %rd279, %rd1, %rd275; - st.global.f32 [%rd279], %f1011; - add.s32 %r600, %r868, 1; - shl.b32 %r601, %r600, 1; - mul.lo.s32 %r602, %r601, %r425; - add.s32 %r603, %r602, %r601; - mul.wide.s32 %rd280, %r603, 4; - add.s64 %rd281, %rd3, %rd280; - ld.local.f32 %f1012, [%rd281]; - abs.f32 %f1013, %f1012; - sqrt.rn.f32 %f1014, %f1013; - st.global.f32 [%rd276+4], %f1014; - add.s32 %r604, %r602, %r425; - add.s32 %r605, %r601, %r604; - add.s32 %r606, %r605, 1; - mul.wide.s32 %rd282, %r606, 4; - add.s64 %rd283, %rd3, %rd282; - ld.local.f32 %f1015, [%rd283]; - abs.f32 %f1016, %f1015; - sqrt.rn.f32 %f1017, %f1016; - st.global.f32 [%rd279+4], %f1017; - add.s64 %rd284, %rd281, %rd272; - ld.local.f32 %f1018, [%rd284]; - abs.f32 %f1019, %f1018; - sqrt.rn.f32 %f1020, %f1019; - st.global.f32 [%rd276+8], %f1020; - add.s64 %rd285, %rd283, %rd272; - ld.local.f32 %f1021, [%rd285]; - abs.f32 %f1022, %f1021; - sqrt.rn.f32 %f1023, %f1022; - st.global.f32 [%rd279+8], %f1023; - add.s64 %rd286, %rd284, %rd272; - ld.local.f32 %f1024, [%rd286]; - abs.f32 %f1025, %f1024; - sqrt.rn.f32 %f1026, %f1025; - st.global.f32 [%rd276+12], %f1026; - add.s64 %rd287, %rd285, %rd272; - ld.local.f32 %f1027, [%rd287]; - abs.f32 %f1028, %f1027; - sqrt.rn.f32 %f1029, %f1028; - st.global.f32 [%rd279+12], %f1029; - add.s32 %r868, %r868, 4; - setp.lt.s32 %p179, %r868, %r824; - @%p179 bra BB9_197; - -BB9_198: - cvta.to.global.u64 %rd22, %rd60; - add.s64 %rd23, %rd22, %rd254; - cvta.to.global.u64 %rd24, %rd61; - add.s64 %rd25, %rd24, %rd254; - mov.u32 %r607, 0; - mov.u32 %r869, %r607; - -BB9_199: - add.s32 %r609, %r869, %r1; - mul.wide.s32 %rd289, %r609, 4; - add.s64 %rd26, %rd22, %rd289; - add.s64 %rd27, %rd2, %rd289; - add.s64 %rd28, %rd24, %rd289; - add.s64 %rd29, %rd1, %rd289; - shl.b32 %r610, %r869, 1; - add.s32 %r611, %r610, 1; - mad.lo.s32 %r123, %r611, %r425, 1; - mul.lo.s32 %r124, %r610, %r425; - mov.u32 %r873, %r607; - @%p118 bra BB9_210; - - setp.eq.s32 %p181, %r37, 1; - mov.u32 %r871, 0; - @%p181 bra BB9_207; - - setp.eq.s32 %p182, %r37, 2; - mov.u32 %r870, 0; - @%p182 bra BB9_204; - - setp.eq.s32 %p183, %r869, 0; - mov.u32 %r870, 1; - @%p183 bra BB9_204; - - ld.global.f32 %f1030, [%rd26]; - ld.global.f32 %f1031, [%rd23]; - sub.f32 %f1032, %f1030, %f1031; - abs.f32 %f1033, %f1032; - mul.f32 %f1034, %f1033, %f1033; - ld.global.f32 %f1035, [%rd27]; - div.rn.f32 %f1036, %f1034, %f1035; - ld.global.f32 %f1037, [%rd20]; - div.rn.f32 %f1038, %f1036, %f1037; - ld.global.f32 %f1039, [%rd25]; - ld.global.f32 %f1040, [%rd28]; - sub.f32 %f1041, %f1040, %f1039; - abs.f32 %f1042, %f1041; - mul.f32 %f1043, %f1042, %f1042; - ld.global.f32 %f1044, [%rd29]; - div.rn.f32 %f1045, %f1043, %f1044; - ld.global.f32 %f1046, [%rd21]; - div.rn.f32 %f1047, %f1045, %f1046; - add.f32 %f1048, %f1047, 0f3F800000; - div.rn.f32 %f1049, %f1047, %f1048; - mul.wide.s32 %rd290, %r123, 4; + add.s32 %r473, %r589, %r134; + mul.wide.s32 %rd228, %r473, 4; + add.s64 %rd38, %rd5, %rd228; + ld.local.f32 %f987, [%rd38]; + ld.local.f32 %f988, [%rd227]; + fma.rn.f32 %f1217, %f988, %f987, %f1217; + setp.eq.s32 %p209, %r136, 1; + @%p209 bra $L__BB9_209; + + add.s32 %r144, %r143, %r30; + mul.wide.s32 %rd229, %r144, 4; + add.s64 %rd230, %rd5, %rd229; + ld.local.f32 %f989, [%rd38+4]; + ld.local.f32 %f990, [%rd230]; + fma.rn.f32 %f1217, %f990, %f989, %f1217; + setp.eq.s32 %p210, %r136, 2; + @%p210 bra $L__BB9_209; + + add.s32 %r474, %r144, %r30; + mul.wide.s32 %rd231, %r474, 4; + add.s64 %rd232, %rd5, %rd231; + ld.local.f32 %f991, [%rd38+8]; + ld.local.f32 %f992, [%rd232]; + fma.rn.f32 %f1217, %f992, %f991, %f1217; + +$L__BB9_209: + add.s32 %r475, %r586, %r134; + mul.wide.s32 %rd233, %r475, 4; + add.s64 %rd234, %rd5, %rd233; + ld.local.f32 %f993, [%rd234]; + sub.f32 %f994, %f993, %f1217; + st.local.f32 [%rd234], %f994; + +$L__BB9_210: + add.s32 %r145, %r586, 1; + setp.lt.u32 %p211, %r586, %r585; + mov.u32 %r586, %r145; + @%p211 bra $L__BB9_201; + + setp.ge.s32 %p212, %r585, %r29; + @%p212 bra $L__BB9_224; + + add.s32 %r476, %r134, %r585; + mul.wide.s32 %rd235, %r476, 4; + add.s64 %rd39, %rd5, %rd235; + and.b32 %r146, %r585, 3; + sub.s32 %r147, %r585, %r146; + mov.u32 %r590, %r585; + +$L__BB9_213: + add.s32 %r149, %r590, 1; + setp.eq.s32 %p213, %r585, 0; + @%p213 bra $L__BB9_222; + + setp.lt.u32 %p214, %r133, 3; + mov.u32 %r593, 0; + mov.f32 %f1221, %f996; + @%p214 bra $L__BB9_217; + + mov.f32 %f1221, %f996; + mov.u32 %r592, %r147; + +$L__BB9_216: + mad.lo.s32 %r479, %r593, %r30, %r149; + mul.wide.s32 %rd236, %r479, 4; + add.s64 %rd237, %rd5, %rd236; + add.s32 %r480, %r593, %r134; + mul.wide.s32 %rd238, %r480, 4; + add.s64 %rd239, %rd5, %rd238; + ld.local.f32 %f998, [%rd239]; + ld.local.f32 %f999, [%rd237]; + fma.rn.f32 %f1000, %f999, %f998, %f1221; + add.s64 %rd240, %rd237, %rd37; + ld.local.f32 %f1001, [%rd239+4]; + ld.local.f32 %f1002, [%rd240]; + fma.rn.f32 %f1003, %f1002, %f1001, %f1000; + add.s64 %rd241, %rd240, %rd37; + ld.local.f32 %f1004, [%rd239+8]; + ld.local.f32 %f1005, [%rd241]; + fma.rn.f32 %f1006, %f1005, %f1004, %f1003; + add.s64 %rd242, %rd241, %rd37; + ld.local.f32 %f1007, [%rd239+12]; + ld.local.f32 %f1008, [%rd242]; + fma.rn.f32 %f1221, %f1008, %f1007, %f1006; + add.s32 %r593, %r593, 4; + add.s32 %r592, %r592, -4; + setp.ne.s32 %p215, %r592, 0; + @%p215 bra $L__BB9_216; + +$L__BB9_217: + setp.eq.s32 %p216, %r146, 0; + @%p216 bra $L__BB9_221; + + setp.eq.s32 %p217, %r146, 1; + mad.lo.s32 %r155, %r593, %r30, %r149; + mul.wide.s32 %rd243, %r155, 4; + add.s64 %rd244, %rd5, %rd243; + add.s32 %r481, %r593, %r134; + mul.wide.s32 %rd245, %r481, 4; + add.s64 %rd40, %rd5, %rd245; + ld.local.f32 %f1009, [%rd40]; + ld.local.f32 %f1010, [%rd244]; + fma.rn.f32 %f1221, %f1010, %f1009, %f1221; + @%p217 bra $L__BB9_221; + + setp.eq.s32 %p218, %r146, 2; + add.s32 %r156, %r155, %r30; + mul.wide.s32 %rd246, %r156, 4; + add.s64 %rd247, %rd5, %rd246; + ld.local.f32 %f1011, [%rd40+4]; + ld.local.f32 %f1012, [%rd247]; + fma.rn.f32 %f1221, %f1012, %f1011, %f1221; + @%p218 bra $L__BB9_221; + + add.s32 %r482, %r156, %r30; + mul.wide.s32 %rd248, %r482, 4; + add.s64 %rd249, %rd5, %rd248; + ld.local.f32 %f1013, [%rd40+8]; + ld.local.f32 %f1014, [%rd249]; + fma.rn.f32 %f1221, %f1014, %f1013, %f1221; + +$L__BB9_221: + ld.local.f32 %f1015, [%rd39]; + rcp.rn.f32 %f1016, %f1015; + add.s32 %r483, %r149, %r134; + mul.wide.s32 %rd250, %r483, 4; + add.s64 %rd251, %rd5, %rd250; + ld.local.f32 %f1017, [%rd251]; + sub.f32 %f1018, %f1017, %f1221; + mul.f32 %f1019, %f1016, %f1018; + st.local.f32 [%rd251], %f1019; + bra.uni $L__BB9_223; + +$L__BB9_222: + ld.local.f32 %f1020, [%rd5]; + rcp.rn.f32 %f1021, %f1020; + mul.wide.s32 %rd252, %r590, 4; + add.s64 %rd253, %rd15, %rd252; + ld.local.f32 %f1022, [%rd253]; + mul.f32 %f1023, %f1021, %f1022; + st.local.f32 [%rd253], %f1023; + +$L__BB9_223: + setp.lt.s32 %p219, %r149, %r29; + mov.u32 %r590, %r149; + @%p219 bra $L__BB9_213; + +$L__BB9_224: + setp.lt.s32 %p220, %r585, %r29; + add.s32 %r585, %r585, 1; + @%p220 bra $L__BB9_200; + + shl.b64 %rd254, %rd27, 2; + add.s64 %rd255, %rd5, %rd254; + ld.local.f32 %f164, [%rd255]; + shl.b32 %r485, %r545, 3; + or.b32 %r486, %r485, 4; + mul.wide.s32 %rd41, %r486, 4; + mov.u32 %r594, 0; + mov.f32 %f1049, 0f00000000; + +$L__BB9_226: + setp.eq.s32 %p221, %r594, 0; + selp.f32 %f1024, 0f3F800000, 0f00000000, %p221; + st.local.f32 [%rd10], %f1024; + @%p135 bra $L__BB9_236; + + mov.u32 %r487, 0; + mov.u32 %r595, %r487; + +$L__BB9_228: + mov.u32 %r159, %r595; + add.s32 %r595, %r159, 1; + and.b32 %r160, %r595, 3; + setp.lt.u32 %p223, %r159, 3; + mov.f32 %f1225, 0f00000000; + mov.u32 %r598, %r487; + @%p223 bra $L__BB9_231; + + sub.s32 %r596, %r159, %r160; + mul.wide.s32 %rd257, %r595, 4; + add.s64 %rd312, %rd5, %rd257; + add.s32 %r492, %r29, %r595; + mul.wide.s32 %rd258, %r492, 4; + add.s64 %rd310, %rd15, %rd258; + mov.u64 %rd311, %rd10; + mov.u32 %r598, %r487; + +$L__BB9_230: + ld.local.f32 %f1028, [%rd311]; + ld.local.f32 %f1029, [%rd312]; + fma.rn.f32 %f1030, %f1029, %f1028, %f1225; + ld.local.f32 %f1031, [%rd311+4]; + ld.local.f32 %f1032, [%rd310]; + fma.rn.f32 %f1033, %f1032, %f1031, %f1030; + add.s64 %rd259, %rd310, %rd37; + ld.local.f32 %f1034, [%rd311+8]; + ld.local.f32 %f1035, [%rd259]; + fma.rn.f32 %f1036, %f1035, %f1034, %f1033; + add.s64 %rd260, %rd259, %rd37; + ld.local.f32 %f1037, [%rd311+12]; + ld.local.f32 %f1038, [%rd260]; + fma.rn.f32 %f1225, %f1038, %f1037, %f1036; + add.s32 %r598, %r598, 4; + add.s64 %rd312, %rd312, %rd41; + add.s64 %rd311, %rd311, 16; + add.s64 %rd310, %rd310, %rd41; + add.s32 %r596, %r596, -4; + setp.ne.s32 %p224, %r596, -1; + @%p224 bra $L__BB9_230; + +$L__BB9_231: + setp.eq.s32 %p225, %r160, 0; + @%p225 bra $L__BB9_235; + + mad.lo.s32 %r493, %r598, %r30, %r159; + add.s32 %r167, %r493, 1; + mul.wide.s32 %rd261, %r167, 4; + add.s64 %rd262, %rd5, %rd261; + mul.wide.s32 %rd263, %r598, 4; + add.s64 %rd51, %rd10, %rd263; + ld.local.f32 %f1039, [%rd51]; + ld.local.f32 %f1040, [%rd262]; + fma.rn.f32 %f1225, %f1040, %f1039, %f1225; + setp.eq.s32 %p226, %r160, 1; + @%p226 bra $L__BB9_235; + + add.s32 %r168, %r167, %r30; + mul.wide.s32 %rd264, %r168, 4; + add.s64 %rd265, %rd5, %rd264; + ld.local.f32 %f1041, [%rd51+4]; + ld.local.f32 %f1042, [%rd265]; + fma.rn.f32 %f1225, %f1042, %f1041, %f1225; + setp.eq.s32 %p227, %r160, 2; + @%p227 bra $L__BB9_235; + + add.s32 %r494, %r168, %r30; + mul.wide.s32 %rd266, %r494, 4; + add.s64 %rd267, %rd5, %rd266; + ld.local.f32 %f1043, [%rd51+8]; + ld.local.f32 %f1044, [%rd267]; + fma.rn.f32 %f1225, %f1044, %f1043, %f1225; + +$L__BB9_235: + setp.eq.s32 %p228, %r595, %r594; + selp.f32 %f1045, 0f3F800000, 0f00000000, %p228; + sub.f32 %f1046, %f1045, %f1225; + mul.wide.s32 %rd268, %r595, 4; + add.s64 %rd269, %rd10, %rd268; + st.local.f32 [%rd269], %f1046; + setp.lt.s32 %p229, %r595, %r29; + @%p229 bra $L__BB9_228; + +$L__BB9_236: + ld.local.f32 %f1047, [%rd26]; + div.rn.f32 %f1048, %f1047, %f164; + mul.lo.s32 %r170, %r594, %r30; + add.s32 %r495, %r170, %r29; + mul.wide.s32 %rd270, %r495, 4; + add.s64 %rd271, %rd4, %rd270; + st.local.f32 [%rd271], %f1048; + @%p135 bra $L__BB9_246; + + mov.u32 %r599, 0; + mov.u32 %r600, %r29; + +$L__BB9_238: + mov.u32 %r172, %r600; + sub.s32 %r497, %r29, %r599; + max.s32 %r498, %r497, %r29; + add.s32 %r499, %r80, %r599; + add.s32 %r173, %r498, %r499; + add.s32 %r600, %r172, -1; + add.s32 %r500, %r172, %r170; + mul.wide.s32 %rd272, %r500, 4; + add.s64 %rd52, %rd4, %rd272; + setp.gt.s32 %p231, %r172, %r29; + mov.f32 %f1229, %f1049; + @%p231 bra $L__BB9_245; + + and.b32 %r175, %r173, 3; + setp.eq.s32 %p232, %r175, 0; + mov.u32 %r601, %r172; + mov.f32 %f1229, %f1049; + @%p232 bra $L__BB9_243; + + mad.lo.s32 %r176, %r172, %r30, %r600; + mul.wide.s32 %rd273, %r176, 4; + add.s64 %rd274, %rd5, %rd273; + ld.local.f32 %f1052, [%rd52]; + ld.local.f32 %f1053, [%rd274]; + fma.rn.f32 %f1229, %f1053, %f1052, 0f00000000; + add.s32 %r601, %r172, 1; + setp.eq.s32 %p233, %r175, 1; + @%p233 bra $L__BB9_243; + + add.s32 %r178, %r176, %r30; + mul.wide.s32 %rd275, %r178, 4; + add.s64 %rd276, %rd5, %rd275; + ld.local.f32 %f1054, [%rd52+4]; + ld.local.f32 %f1055, [%rd276]; + fma.rn.f32 %f1229, %f1055, %f1054, %f1229; + add.s32 %r601, %r172, 2; + setp.eq.s32 %p234, %r175, 2; + @%p234 bra $L__BB9_243; + + add.s32 %r501, %r178, %r30; + mul.wide.s32 %rd277, %r501, 4; + add.s64 %rd278, %rd5, %rd277; + ld.local.f32 %f1056, [%rd52+8]; + ld.local.f32 %f1057, [%rd278]; + fma.rn.f32 %f1229, %f1057, %f1056, %f1229; + add.s32 %r601, %r172, 3; + +$L__BB9_243: + add.s32 %r502, %r173, -1; + setp.lt.u32 %p235, %r502, 3; + @%p235 bra $L__BB9_245; + +$L__BB9_244: + mad.lo.s32 %r503, %r601, %r30, %r600; + mul.wide.s32 %rd279, %r503, 4; + add.s64 %rd280, %rd5, %rd279; + add.s32 %r504, %r601, %r170; + mul.wide.s32 %rd281, %r504, 4; + add.s64 %rd282, %rd4, %rd281; + ld.local.f32 %f1058, [%rd282]; + ld.local.f32 %f1059, [%rd280]; + fma.rn.f32 %f1060, %f1059, %f1058, %f1229; + add.s64 %rd283, %rd280, %rd37; + ld.local.f32 %f1061, [%rd282+4]; + ld.local.f32 %f1062, [%rd283]; + fma.rn.f32 %f1063, %f1062, %f1061, %f1060; + add.s64 %rd284, %rd283, %rd37; + ld.local.f32 %f1064, [%rd282+8]; + ld.local.f32 %f1065, [%rd284]; + fma.rn.f32 %f1066, %f1065, %f1064, %f1063; + add.s64 %rd285, %rd284, %rd37; + ld.local.f32 %f1067, [%rd282+12]; + ld.local.f32 %f1068, [%rd285]; + fma.rn.f32 %f1229, %f1068, %f1067, %f1066; + add.s32 %r183, %r601, 4; + add.s32 %r505, %r601, 3; + setp.lt.s32 %p236, %r505, %r29; + mov.u32 %r601, %r183; + @%p236 bra $L__BB9_244; + +$L__BB9_245: + mad.lo.s32 %r506, %r600, %r30, %r600; + mul.wide.s32 %rd286, %r506, 4; + add.s64 %rd287, %rd5, %rd286; + ld.local.f32 %f1069, [%rd287]; + rcp.rn.f32 %f1070, %f1069; + mul.wide.s32 %rd288, %r600, 4; + add.s64 %rd289, %rd10, %rd288; + ld.local.f32 %f1071, [%rd289]; + sub.f32 %f1072, %f1071, %f1229; + mul.f32 %f1073, %f1070, %f1072; + st.local.f32 [%rd52+-4], %f1073; + add.s32 %r599, %r599, 1; + setp.gt.s32 %p237, %r172, 1; + @%p237 bra $L__BB9_238; + +$L__BB9_246: + add.s32 %r185, %r594, 1; + setp.lt.s32 %p238, %r594, %r29; + mov.u32 %r594, %r185; + @%p238 bra $L__BB9_226; + +$L__BB9_247: + mov.u32 %r539, %tid.x; + mov.u32 %r538, %ctaid.x; + mov.u32 %r537, %ntid.x; + mad.lo.s32 %r536, %r537, %r538, %r539; + cvt.s64.s32 %rd309, %r536; + cvt.u32.u64 %r509, %rd309; + shl.b32 %r510, %r214, 1; + or.b32 %r511, %r510, 1; + mul.lo.s32 %r186, %r509, %r511; + mov.u32 %r613, 0; + max.s32 %r512, %r29, 0; + add.s32 %r187, %r512, 1; + and.b32 %r188, %r187, 3; + setp.lt.u32 %p239, %r512, 3; + mov.u32 %r607, %r613; + @%p239 bra $L__BB9_250; + + sub.s32 %r605, %r187, %r188; + +$L__BB9_249: + mul.lo.s32 %r515, %r607, %r30; + add.s32 %r516, %r515, %r607; + mul.wide.s32 %rd290, %r516, 4; add.s64 %rd291, %rd4, %rd290; - ld.local.f32 %f1050, [%rd291]; - mul.f32 %f1051, %f1049, %f1050; - st.local.f32 [%rd291], %f1051; - add.f32 %f1052, %f1038, 0f3F800000; - div.rn.f32 %f1053, %f1038, %f1052; - mul.wide.s32 %rd292, %r124, 4; - add.s64 %rd293, %rd4, %rd292; - ld.local.f32 %f1054, [%rd293]; - mul.f32 %f1055, %f1053, %f1054; - st.local.f32 [%rd293], %f1055; - -BB9_204: - setp.eq.s32 %p184, %r869, %r870; - @%p184 bra BB9_206; - - ld.global.f32 %f1056, [%rd26]; - add.s32 %r617, %r870, %r1; - mul.wide.s32 %rd295, %r617, 4; - add.s64 %rd296, %rd22, %rd295; - ld.global.f32 %f1057, [%rd296]; - sub.f32 %f1058, %f1056, %f1057; - abs.f32 %f1059, %f1058; - mul.f32 %f1060, %f1059, %f1059; - ld.global.f32 %f1061, [%rd27]; - div.rn.f32 %f1062, %f1060, %f1061; - add.s64 %rd297, %rd2, %rd295; - ld.global.f32 %f1063, [%rd297]; - div.rn.f32 %f1064, %f1062, %f1063; - add.s64 %rd299, %rd24, %rd295; - ld.global.f32 %f1065, [%rd299]; - ld.global.f32 %f1066, [%rd28]; - sub.f32 %f1067, %f1066, %f1065; - abs.f32 %f1068, %f1067; - mul.f32 %f1069, %f1068, %f1068; - ld.global.f32 %f1070, [%rd29]; - div.rn.f32 %f1071, %f1069, %f1070; - add.s64 %rd300, %rd1, %rd295; - ld.global.f32 %f1072, [%rd300]; - div.rn.f32 %f1073, %f1071, %f1072; - add.f32 %f1074, %f1073, 0f3F800000; - div.rn.f32 %f1075, %f1073, %f1074; - shl.b32 %r618, %r870, 1; - add.s32 %r619, %r123, %r618; - mul.wide.s32 %rd301, %r619, 4; - add.s64 %rd302, %rd4, %rd301; - ld.local.f32 %f1076, [%rd302]; - mul.f32 %f1077, %f1075, %f1076; - st.local.f32 [%rd302], %f1077; - add.f32 %f1078, %f1064, 0f3F800000; - div.rn.f32 %f1079, %f1064, %f1078; - add.s32 %r620, %r618, %r124; - mul.wide.s32 %rd303, %r620, 4; - add.s64 %rd304, %rd4, %rd303; - ld.local.f32 %f1080, [%rd304]; - mul.f32 %f1081, %f1079, %f1080; - st.local.f32 [%rd304], %f1081; - -BB9_206: - add.s32 %r871, %r870, 1; - -BB9_207: - setp.eq.s32 %p185, %r869, %r871; - @%p185 bra BB9_209; - - ld.global.f32 %f1082, [%rd26]; - add.s32 %r621, %r871, %r1; - mul.wide.s32 %rd306, %r621, 4; - add.s64 %rd307, %rd22, %rd306; - ld.global.f32 %f1083, [%rd307]; - sub.f32 %f1084, %f1082, %f1083; + ld.local.f32 %f1074, [%rd291]; + abs.f32 %f1075, %f1074; + sqrt.rn.f32 %f1076, %f1075; + add.s32 %r517, %r607, %r186; + mul.wide.s32 %rd292, %r517, 4; + add.s64 %rd293, %rd9, %rd292; + st.global.f32 [%rd293], %f1076; + add.s32 %r518, %r515, %r30; + add.s32 %r519, %r518, %r607; + add.s32 %r520, %r519, 1; + mul.wide.s32 %rd294, %r520, 4; + add.s64 %rd295, %rd4, %rd294; + ld.local.f32 %f1077, [%rd295]; + abs.f32 %f1078, %f1077; + sqrt.rn.f32 %f1079, %f1078; + st.global.f32 [%rd293+4], %f1079; + ld.local.f32 %f1080, [%rd291+4]; + cvt.u32.u64 %r521, %rd11; + add.s32 %r522, %r613, %r521; + mul.wide.s32 %rd296, %r522, 4; + add.s64 %rd297, %rd3, %rd296; + st.global.f32 [%rd297], %f1080; + add.s32 %r523, %r518, %r30; + add.s32 %r524, %r523, %r607; + add.s32 %r525, %r524, 2; + mul.wide.s32 %rd298, %r525, 4; + add.s64 %rd299, %rd4, %rd298; + ld.local.f32 %f1081, [%rd299]; + abs.f32 %f1082, %f1081; + sqrt.rn.f32 %f1083, %f1082; + st.global.f32 [%rd293+8], %f1083; + add.s32 %r526, %r523, %r30; + add.s32 %r527, %r526, %r607; + add.s32 %r528, %r527, 3; + mul.wide.s32 %rd300, %r528, 4; + add.s64 %rd301, %rd4, %rd300; + ld.local.f32 %f1084, [%rd301]; abs.f32 %f1085, %f1084; - mul.f32 %f1086, %f1085, %f1085; - ld.global.f32 %f1087, [%rd27]; - div.rn.f32 %f1088, %f1086, %f1087; - add.s64 %rd308, %rd2, %rd306; - ld.global.f32 %f1089, [%rd308]; - div.rn.f32 %f1090, %f1088, %f1089; - add.s64 %rd310, %rd24, %rd306; - ld.global.f32 %f1091, [%rd310]; - ld.global.f32 %f1092, [%rd28]; - sub.f32 %f1093, %f1092, %f1091; - abs.f32 %f1094, %f1093; - mul.f32 %f1095, %f1094, %f1094; - ld.global.f32 %f1096, [%rd29]; - div.rn.f32 %f1097, %f1095, %f1096; - add.s64 %rd311, %rd1, %rd306; - ld.global.f32 %f1098, [%rd311]; - div.rn.f32 %f1099, %f1097, %f1098; - add.f32 %f1100, %f1099, 0f3F800000; - div.rn.f32 %f1101, %f1099, %f1100; - shl.b32 %r622, %r871, 1; - add.s32 %r623, %r123, %r622; - mul.wide.s32 %rd312, %r623, 4; - add.s64 %rd313, %rd4, %rd312; - ld.local.f32 %f1102, [%rd313]; - mul.f32 %f1103, %f1101, %f1102; - st.local.f32 [%rd313], %f1103; - add.f32 %f1104, %f1090, 0f3F800000; - div.rn.f32 %f1105, %f1090, %f1104; - add.s32 %r624, %r622, %r124; - mul.wide.s32 %rd314, %r624, 4; - add.s64 %rd315, %rd4, %rd314; - ld.local.f32 %f1106, [%rd315]; - mul.f32 %f1107, %f1105, %f1106; - st.local.f32 [%rd315], %f1107; - -BB9_209: - add.s32 %r873, %r871, 1; - -BB9_210: - @%p124 bra BB9_220; - -BB9_211: - setp.eq.s32 %p187, %r869, %r873; - @%p187 bra BB9_213; - - ld.global.f32 %f1108, [%rd26]; - add.s32 %r625, %r873, %r1; - mul.wide.s32 %rd317, %r625, 4; - add.s64 %rd318, %rd22, %rd317; - ld.global.f32 %f1109, [%rd318]; - sub.f32 %f1110, %f1108, %f1109; - abs.f32 %f1111, %f1110; - mul.f32 %f1112, %f1111, %f1111; - ld.global.f32 %f1113, [%rd27]; - div.rn.f32 %f1114, %f1112, %f1113; - add.s64 %rd319, %rd2, %rd317; - ld.global.f32 %f1115, [%rd319]; - div.rn.f32 %f1116, %f1114, %f1115; - add.s64 %rd321, %rd24, %rd317; - ld.global.f32 %f1117, [%rd321]; - ld.global.f32 %f1118, [%rd28]; - sub.f32 %f1119, %f1118, %f1117; - abs.f32 %f1120, %f1119; - mul.f32 %f1121, %f1120, %f1120; - ld.global.f32 %f1122, [%rd29]; - div.rn.f32 %f1123, %f1121, %f1122; - add.s64 %rd322, %rd1, %rd317; - ld.global.f32 %f1124, [%rd322]; - div.rn.f32 %f1125, %f1123, %f1124; - add.f32 %f1126, %f1125, 0f3F800000; - div.rn.f32 %f1127, %f1125, %f1126; - shl.b32 %r626, %r873, 1; - add.s32 %r627, %r123, %r626; - mul.wide.s32 %rd323, %r627, 4; - add.s64 %rd324, %rd4, %rd323; - ld.local.f32 %f1128, [%rd324]; - mul.f32 %f1129, %f1127, %f1128; - st.local.f32 [%rd324], %f1129; - add.f32 %f1130, %f1116, 0f3F800000; - div.rn.f32 %f1131, %f1116, %f1130; - add.s32 %r628, %r626, %r124; - mul.wide.s32 %rd325, %r628, 4; - add.s64 %rd326, %rd4, %rd325; - ld.local.f32 %f1132, [%rd326]; - mul.f32 %f1133, %f1131, %f1132; - st.local.f32 [%rd326], %f1133; - -BB9_213: - add.s32 %r131, %r873, 1; - setp.eq.s32 %p188, %r869, %r131; - @%p188 bra BB9_215; - - ld.global.f32 %f1134, [%rd26]; - add.s32 %r629, %r131, %r1; - mul.wide.s32 %rd328, %r629, 4; - add.s64 %rd329, %rd22, %rd328; - ld.global.f32 %f1135, [%rd329]; - sub.f32 %f1136, %f1134, %f1135; - abs.f32 %f1137, %f1136; - mul.f32 %f1138, %f1137, %f1137; - ld.global.f32 %f1139, [%rd27]; - div.rn.f32 %f1140, %f1138, %f1139; - add.s64 %rd330, %rd2, %rd328; - ld.global.f32 %f1141, [%rd330]; - div.rn.f32 %f1142, %f1140, %f1141; - add.s64 %rd332, %rd24, %rd328; - ld.global.f32 %f1143, [%rd332]; - ld.global.f32 %f1144, [%rd28]; - sub.f32 %f1145, %f1144, %f1143; - abs.f32 %f1146, %f1145; - mul.f32 %f1147, %f1146, %f1146; - ld.global.f32 %f1148, [%rd29]; - div.rn.f32 %f1149, %f1147, %f1148; - add.s64 %rd333, %rd1, %rd328; - ld.global.f32 %f1150, [%rd333]; - div.rn.f32 %f1151, %f1149, %f1150; - add.f32 %f1152, %f1151, 0f3F800000; - div.rn.f32 %f1153, %f1151, %f1152; - shl.b32 %r630, %r131, 1; - add.s32 %r631, %r123, %r630; - mul.wide.s32 %rd334, %r631, 4; - add.s64 %rd335, %rd4, %rd334; - ld.local.f32 %f1154, [%rd335]; - mul.f32 %f1155, %f1153, %f1154; - st.local.f32 [%rd335], %f1155; - add.f32 %f1156, %f1142, 0f3F800000; - div.rn.f32 %f1157, %f1142, %f1156; - add.s32 %r632, %r630, %r124; - mul.wide.s32 %rd336, %r632, 4; - add.s64 %rd337, %rd4, %rd336; - ld.local.f32 %f1158, [%rd337]; - mul.f32 %f1159, %f1157, %f1158; - st.local.f32 [%rd337], %f1159; - -BB9_215: - add.s32 %r132, %r873, 2; - setp.eq.s32 %p189, %r869, %r132; - @%p189 bra BB9_217; - - ld.global.f32 %f1160, [%rd26]; - add.s32 %r633, %r132, %r1; - mul.wide.s32 %rd339, %r633, 4; - add.s64 %rd340, %rd22, %rd339; - ld.global.f32 %f1161, [%rd340]; - sub.f32 %f1162, %f1160, %f1161; - abs.f32 %f1163, %f1162; - mul.f32 %f1164, %f1163, %f1163; - ld.global.f32 %f1165, [%rd27]; - div.rn.f32 %f1166, %f1164, %f1165; - add.s64 %rd341, %rd2, %rd339; - ld.global.f32 %f1167, [%rd341]; - div.rn.f32 %f1168, %f1166, %f1167; - add.s64 %rd343, %rd24, %rd339; - ld.global.f32 %f1169, [%rd343]; - ld.global.f32 %f1170, [%rd28]; - sub.f32 %f1171, %f1170, %f1169; - abs.f32 %f1172, %f1171; - mul.f32 %f1173, %f1172, %f1172; - ld.global.f32 %f1174, [%rd29]; - div.rn.f32 %f1175, %f1173, %f1174; - add.s64 %rd344, %rd1, %rd339; - ld.global.f32 %f1176, [%rd344]; - div.rn.f32 %f1177, %f1175, %f1176; - add.f32 %f1178, %f1177, 0f3F800000; - div.rn.f32 %f1179, %f1177, %f1178; - shl.b32 %r634, %r132, 1; - add.s32 %r635, %r123, %r634; - mul.wide.s32 %rd345, %r635, 4; - add.s64 %rd346, %rd4, %rd345; - ld.local.f32 %f1180, [%rd346]; - mul.f32 %f1181, %f1179, %f1180; - st.local.f32 [%rd346], %f1181; - add.f32 %f1182, %f1168, 0f3F800000; - div.rn.f32 %f1183, %f1168, %f1182; - add.s32 %r636, %r634, %r124; - mul.wide.s32 %rd347, %r636, 4; - add.s64 %rd348, %rd4, %rd347; - ld.local.f32 %f1184, [%rd348]; - mul.f32 %f1185, %f1183, %f1184; - st.local.f32 [%rd348], %f1185; - -BB9_217: - add.s32 %r133, %r873, 3; - setp.eq.s32 %p190, %r869, %r133; - @%p190 bra BB9_219; - - ld.global.f32 %f1186, [%rd26]; - add.s32 %r637, %r133, %r1; - mul.wide.s32 %rd350, %r637, 4; - add.s64 %rd351, %rd22, %rd350; - ld.global.f32 %f1187, [%rd351]; - sub.f32 %f1188, %f1186, %f1187; - abs.f32 %f1189, %f1188; - mul.f32 %f1190, %f1189, %f1189; - ld.global.f32 %f1191, [%rd27]; - div.rn.f32 %f1192, %f1190, %f1191; - add.s64 %rd352, %rd2, %rd350; - ld.global.f32 %f1193, [%rd352]; - div.rn.f32 %f1194, %f1192, %f1193; - add.s64 %rd354, %rd24, %rd350; - ld.global.f32 %f1195, [%rd354]; - ld.global.f32 %f1196, [%rd28]; - sub.f32 %f1197, %f1196, %f1195; - abs.f32 %f1198, %f1197; - mul.f32 %f1199, %f1198, %f1198; - ld.global.f32 %f1200, [%rd29]; - div.rn.f32 %f1201, %f1199, %f1200; - add.s64 %rd355, %rd1, %rd350; - ld.global.f32 %f1202, [%rd355]; - div.rn.f32 %f1203, %f1201, %f1202; - add.f32 %f1204, %f1203, 0f3F800000; - div.rn.f32 %f1205, %f1203, %f1204; - shl.b32 %r638, %r133, 1; - add.s32 %r639, %r123, %r638; - mul.wide.s32 %rd356, %r639, 4; - add.s64 %rd357, %rd4, %rd356; - ld.local.f32 %f1206, [%rd357]; - mul.f32 %f1207, %f1205, %f1206; - st.local.f32 [%rd357], %f1207; - add.f32 %f1208, %f1194, 0f3F800000; - div.rn.f32 %f1209, %f1194, %f1208; - add.s32 %r640, %r638, %r124; - mul.wide.s32 %rd358, %r640, 4; - add.s64 %rd359, %rd4, %rd358; - ld.local.f32 %f1210, [%rd359]; - mul.f32 %f1211, %f1209, %f1210; - st.local.f32 [%rd359], %f1211; - -BB9_219: - add.s32 %r873, %r873, 4; - setp.lt.s32 %p191, %r873, %r824; - @%p191 bra BB9_211; - -BB9_220: - add.s32 %r869, %r869, 1; - setp.lt.s32 %p192, %r869, %r824; - @%p192 bra BB9_199; - - st.local.u32 [%rd3+4], %rd103; - st.local.u32 [%rd3], %rd103; - st.local.u32 [%rd3+12], %rd103; - st.local.u32 [%rd3+8], %rd103; - st.local.u32 [%rd3+20], %rd103; - st.local.u32 [%rd3+16], %rd103; - st.local.u32 [%rd3+28], %rd103; - st.local.u32 [%rd3+24], %rd103; - st.local.u32 [%rd3+36], %rd103; - st.local.u32 [%rd3+32], %rd103; - st.local.u32 [%rd3+44], %rd103; - st.local.u32 [%rd3+40], %rd103; - st.local.u32 [%rd3+52], %rd103; - st.local.u32 [%rd3+48], %rd103; - st.local.u32 [%rd3+60], %rd103; - st.local.u32 [%rd3+56], %rd103; - st.local.u32 [%rd3+68], %rd103; - st.local.u32 [%rd3+64], %rd103; - st.local.u32 [%rd3+76], %rd103; - st.local.u32 [%rd3+72], %rd103; - st.local.u32 [%rd3+84], %rd103; - st.local.u32 [%rd3+80], %rd103; - st.local.u32 [%rd3+92], %rd103; - st.local.u32 [%rd3+88], %rd103; - st.local.u32 [%rd3+100], %rd103; - st.local.u32 [%rd3+96], %rd103; - st.local.u32 [%rd3+108], %rd103; - st.local.u32 [%rd3+104], %rd103; - st.local.u32 [%rd3+116], %rd103; - st.local.u32 [%rd3+112], %rd103; - st.local.u32 [%rd3+124], %rd103; - st.local.u32 [%rd3+120], %rd103; - st.local.u32 [%rd3+132], %rd103; - st.local.u32 [%rd3+128], %rd103; - st.local.u32 [%rd3+140], %rd103; - st.local.u32 [%rd3+136], %rd103; - st.local.u32 [%rd3+148], %rd103; - st.local.u32 [%rd3+144], %rd103; - st.local.u32 [%rd3+156], %rd103; - st.local.u32 [%rd3+152], %rd103; - st.local.u32 [%rd3+164], %rd103; - st.local.u32 [%rd3+160], %rd103; - st.local.u32 [%rd3+172], %rd103; - st.local.u32 [%rd3+168], %rd103; - st.local.u32 [%rd3+180], %rd103; - st.local.u32 [%rd3+176], %rd103; - st.local.u32 [%rd3+188], %rd103; - st.local.u32 [%rd3+184], %rd103; - st.local.u32 [%rd3+196], %rd103; - st.local.u32 [%rd3+192], %rd103; - st.local.u32 [%rd3+204], %rd103; - st.local.u32 [%rd3+200], %rd103; - st.local.u32 [%rd3+212], %rd103; - st.local.u32 [%rd3+208], %rd103; - st.local.u32 [%rd3+220], %rd103; - st.local.u32 [%rd3+216], %rd103; - st.local.u32 [%rd3+228], %rd103; - st.local.u32 [%rd3+224], %rd103; - st.local.u32 [%rd3+236], %rd103; - st.local.u32 [%rd3+232], %rd103; - st.local.u32 [%rd3+244], %rd103; - st.local.u32 [%rd3+240], %rd103; - st.local.u32 [%rd3+252], %rd103; - st.local.u32 [%rd3+248], %rd103; - st.local.u32 [%rd3+260], %rd103; - st.local.u32 [%rd3+256], %rd103; - st.local.u32 [%rd3+268], %rd103; - st.local.u32 [%rd3+264], %rd103; - st.local.u32 [%rd3+276], %rd103; - st.local.u32 [%rd3+272], %rd103; - st.local.u32 [%rd3+284], %rd103; - st.local.u32 [%rd3+280], %rd103; - st.local.u32 [%rd3+292], %rd103; - st.local.u32 [%rd3+288], %rd103; - st.local.u32 [%rd3+300], %rd103; - st.local.u32 [%rd3+296], %rd103; - st.local.u32 [%rd3+308], %rd103; - st.local.u32 [%rd3+304], %rd103; - st.local.u32 [%rd3+316], %rd103; - st.local.u32 [%rd3+312], %rd103; - st.local.u32 [%rd3+324], %rd103; - st.local.u32 [%rd3+320], %rd103; - st.local.u32 [%rd3+332], %rd103; - st.local.u32 [%rd3+328], %rd103; - st.local.u32 [%rd3+340], %rd103; - st.local.u32 [%rd3+336], %rd103; - st.local.u32 [%rd3+348], %rd103; - st.local.u32 [%rd3+344], %rd103; - st.local.u32 [%rd3+356], %rd103; - st.local.u32 [%rd3+352], %rd103; - st.local.u32 [%rd3+364], %rd103; - st.local.u32 [%rd3+360], %rd103; - st.local.u32 [%rd3+372], %rd103; - st.local.u32 [%rd3+368], %rd103; - st.local.u32 [%rd3+380], %rd103; - st.local.u32 [%rd3+376], %rd103; - st.local.u32 [%rd3+388], %rd103; - st.local.u32 [%rd3+384], %rd103; - st.local.u32 [%rd3+396], %rd103; - st.local.u32 [%rd3+392], %rd103; - st.local.u32 [%rd3+404], %rd103; - st.local.u32 [%rd3+400], %rd103; - st.local.u32 [%rd3+412], %rd103; - st.local.u32 [%rd3+408], %rd103; - st.local.u32 [%rd3+420], %rd103; - st.local.u32 [%rd3+416], %rd103; - st.local.u32 [%rd3+428], %rd103; - st.local.u32 [%rd3+424], %rd103; - st.local.u32 [%rd3+436], %rd103; - st.local.u32 [%rd3+432], %rd103; - st.local.u32 [%rd3+444], %rd103; - st.local.u32 [%rd3+440], %rd103; - st.local.u32 [%rd3+452], %rd103; - st.local.u32 [%rd3+448], %rd103; - st.local.u32 [%rd3+460], %rd103; - st.local.u32 [%rd3+456], %rd103; - st.local.u32 [%rd3+468], %rd103; - st.local.u32 [%rd3+464], %rd103; - st.local.u32 [%rd3+476], %rd103; - st.local.u32 [%rd3+472], %rd103; - mov.u32 %r911, 0; - st.local.u32 [%rd3+480], %r911; - @%p112 bra BB9_255; - - mad.lo.s32 %r136, %r824, 2, 1; - mad.lo.s32 %r137, %r824, 8, 4; - mov.u32 %r874, 0; - -BB9_223: - mul.lo.s32 %r139, %r874, %r425; - mul.wide.s32 %rd361, %r139, 4; - add.s64 %rd30, %rd4, %rd361; - setp.lt.s32 %p194, %r874, 0; - @%p194 bra BB9_238; - - mov.u32 %r875, 0; - -BB9_225: - setp.lt.s32 %p195, %r875, 1; - @%p195 bra BB9_237; - - add.s32 %r142, %r875, -1; - setp.lt.s32 %p196, %r142, 0; - mov.f32 %f1463, 0f00000000; - @%p196 bra BB9_236; - - and.b32 %r143, %r875, 3; - setp.eq.s32 %p197, %r143, 0; - mov.u32 %r878, 0; - mov.f32 %f1463, 0f00000000; - @%p197 bra BB9_233; - - setp.eq.s32 %p198, %r143, 1; - mov.u32 %r877, 0; - mov.f32 %f1460, 0f00000000; - @%p198 bra BB9_232; - - setp.eq.s32 %p199, %r143, 2; - mov.u32 %r876, 0; - mov.f32 %f1459, 0f00000000; - @%p199 bra BB9_231; - - mul.wide.s32 %rd362, %r875, 4; - add.s64 %rd363, %rd4, %rd362; - ld.local.f32 %f1216, [%rd30]; - ld.local.f32 %f1217, [%rd363]; - fma.rn.f32 %f1459, %f1217, %f1216, 0f00000000; - mov.u32 %r876, 1; - -BB9_231: - neg.s32 %r651, %r876; - and.b32 %r652, %r425, %r651; - add.s32 %r653, %r652, %r875; - mul.wide.s32 %rd364, %r653, 4; - add.s64 %rd365, %rd4, %rd364; - add.s32 %r654, %r876, %r139; - mul.wide.s32 %rd366, %r654, 4; - add.s64 %rd367, %rd4, %rd366; - ld.local.f32 %f1218, [%rd367]; - ld.local.f32 %f1219, [%rd365]; - fma.rn.f32 %f1460, %f1219, %f1218, %f1459; - add.s32 %r877, %r876, 1; - -BB9_232: - mad.lo.s32 %r656, %r877, %r425, %r875; - mul.wide.s32 %rd368, %r656, 4; - add.s64 %rd369, %rd4, %rd368; - add.s32 %r657, %r877, %r139; - mul.wide.s32 %rd370, %r657, 4; - add.s64 %rd371, %rd4, %rd370; - ld.local.f32 %f1220, [%rd371]; - ld.local.f32 %f1221, [%rd369]; - fma.rn.f32 %f1463, %f1221, %f1220, %f1460; - add.s32 %r878, %r877, 1; - -BB9_233: - setp.lt.u32 %p200, %r875, 4; - @%p200 bra BB9_236; - - add.s32 %r883, %r878, -1; - add.s32 %r658, %r139, %r878; - mul.wide.s32 %rd372, %r658, 4; - add.s64 %rd514, %rd4, %rd372; - add.s32 %r659, %r878, 3; - mad.lo.s32 %r882, %r136, %r659, %r875; - add.s32 %r660, %r878, 2; - mad.lo.s32 %r881, %r136, %r660, %r875; - mad.lo.s32 %r880, %r136, %r878, %r875; - add.s32 %r879, %r880, %r136; - -BB9_235: - mul.wide.s32 %rd373, %r880, 4; - add.s64 %rd374, %rd4, %rd373; - ld.local.f32 %f1222, [%rd514]; - ld.local.f32 %f1223, [%rd374]; - fma.rn.f32 %f1224, %f1223, %f1222, %f1463; - mul.wide.s32 %rd375, %r879, 4; - add.s64 %rd376, %rd4, %rd375; - ld.local.f32 %f1225, [%rd514+4]; - ld.local.f32 %f1226, [%rd376]; - fma.rn.f32 %f1227, %f1226, %f1225, %f1224; - mul.wide.s32 %rd377, %r881, 4; - add.s64 %rd378, %rd4, %rd377; - ld.local.f32 %f1228, [%rd514+8]; - ld.local.f32 %f1229, [%rd378]; - fma.rn.f32 %f1230, %f1229, %f1228, %f1227; - mul.wide.s32 %rd379, %r882, 4; - add.s64 %rd380, %rd4, %rd379; - ld.local.f32 %f1231, [%rd514+12]; - ld.local.f32 %f1232, [%rd380]; - fma.rn.f32 %f1463, %f1232, %f1231, %f1230; - add.s64 %rd514, %rd514, 16; - add.s32 %r882, %r882, %r137; - add.s32 %r881, %r881, %r137; - add.s32 %r880, %r880, %r137; - add.s32 %r879, %r879, %r137; - add.s32 %r883, %r883, 4; - setp.lt.s32 %p201, %r883, %r142; - @%p201 bra BB9_235; - -BB9_236: - add.s32 %r661, %r875, %r139; - mul.wide.s32 %rd381, %r661, 4; - add.s64 %rd382, %rd4, %rd381; - ld.local.f32 %f1233, [%rd382]; - sub.f32 %f1234, %f1233, %f1463; - st.local.f32 [%rd382], %f1234; - -BB9_237: - add.s32 %r164, %r875, 1; - setp.lt.s32 %p202, %r875, %r874; - mov.u32 %r875, %r164; - @%p202 bra BB9_225; - -BB9_238: - add.s32 %r165, %r874, 1; - setp.ge.s32 %p203, %r165, %r425; - @%p203 bra BB9_254; - - add.s32 %r166, %r874, -1; - add.s32 %r663, %r139, %r874; - mul.wide.s32 %rd383, %r663, 4; - add.s64 %rd34, %rd4, %rd383; - and.b32 %r167, %r874, 3; - mul.lo.s32 %r168, %r136, %r874; - mov.u32 %r884, %r165; - -BB9_240: - add.s32 %r664, %r884, %r139; - mul.wide.s32 %rd384, %r664, 4; - add.s64 %rd35, %rd4, %rd384; - setp.gt.s32 %p204, %r874, 0; - @%p204 bra BB9_242; - bra.uni BB9_241; - -BB9_242: - setp.lt.s32 %p205, %r166, 0; - mov.f32 %f1468, 0f00000000; - @%p205 bra BB9_252; - - setp.eq.s32 %p206, %r167, 0; - mov.u32 %r887, 0; - mov.f32 %f1468, 0f00000000; - @%p206 bra BB9_249; - - setp.eq.s32 %p207, %r167, 1; - mov.u32 %r886, 0; - mov.f32 %f1465, 0f00000000; - @%p207 bra BB9_248; - - setp.eq.s32 %p208, %r167, 2; - mov.u32 %r885, 0; - mov.f32 %f1464, 0f00000000; - @%p208 bra BB9_247; - - mul.wide.s32 %rd385, %r884, 4; - add.s64 %rd386, %rd4, %rd385; - ld.local.f32 %f1243, [%rd30]; - ld.local.f32 %f1244, [%rd386]; - fma.rn.f32 %f1464, %f1244, %f1243, 0f00000000; - mov.u32 %r885, 1; - -BB9_247: - neg.s32 %r670, %r885; - and.b32 %r671, %r425, %r670; - add.s32 %r672, %r671, %r884; - mul.wide.s32 %rd387, %r672, 4; - add.s64 %rd388, %rd4, %rd387; - add.s32 %r673, %r885, %r139; - mul.wide.s32 %rd389, %r673, 4; - add.s64 %rd390, %rd4, %rd389; - ld.local.f32 %f1245, [%rd390]; - ld.local.f32 %f1246, [%rd388]; - fma.rn.f32 %f1465, %f1246, %f1245, %f1464; - add.s32 %r886, %r885, 1; - -BB9_248: - mad.lo.s32 %r675, %r886, %r425, %r884; - mul.wide.s32 %rd391, %r675, 4; - add.s64 %rd392, %rd4, %rd391; - add.s32 %r676, %r886, %r139; - mul.wide.s32 %rd393, %r676, 4; - add.s64 %rd394, %rd4, %rd393; - ld.local.f32 %f1247, [%rd394]; - ld.local.f32 %f1248, [%rd392]; - fma.rn.f32 %f1468, %f1248, %f1247, %f1465; - add.s32 %r887, %r886, 1; - -BB9_249: - setp.lt.u32 %p209, %r874, 4; - @%p209 bra BB9_252; - - add.s32 %r892, %r887, -1; - add.s32 %r677, %r168, %r887; - mul.wide.s32 %rd395, %r677, 4; - add.s64 %rd515, %rd4, %rd395; - add.s32 %r678, %r887, 3; - mad.lo.s32 %r891, %r136, %r678, %r884; - add.s32 %r679, %r887, 2; - mad.lo.s32 %r890, %r136, %r679, %r884; - mad.lo.s32 %r889, %r136, %r887, %r884; - add.s32 %r888, %r889, %r136; - -BB9_251: - mul.wide.s32 %rd396, %r889, 4; - add.s64 %rd397, %rd4, %rd396; - ld.local.f32 %f1249, [%rd515]; - ld.local.f32 %f1250, [%rd397]; - fma.rn.f32 %f1251, %f1250, %f1249, %f1468; - mul.wide.s32 %rd398, %r888, 4; - add.s64 %rd399, %rd4, %rd398; - ld.local.f32 %f1252, [%rd515+4]; - ld.local.f32 %f1253, [%rd399]; - fma.rn.f32 %f1254, %f1253, %f1252, %f1251; - mul.wide.s32 %rd400, %r890, 4; - add.s64 %rd401, %rd4, %rd400; - ld.local.f32 %f1255, [%rd515+8]; - ld.local.f32 %f1256, [%rd401]; - fma.rn.f32 %f1257, %f1256, %f1255, %f1254; - mul.wide.s32 %rd402, %r891, 4; - add.s64 %rd403, %rd4, %rd402; - ld.local.f32 %f1258, [%rd515+12]; - ld.local.f32 %f1259, [%rd403]; - fma.rn.f32 %f1468, %f1259, %f1258, %f1257; - add.s64 %rd515, %rd515, 16; - add.s32 %r891, %r891, %r137; - add.s32 %r890, %r890, %r137; - add.s32 %r889, %r889, %r137; - add.s32 %r888, %r888, %r137; - add.s32 %r892, %r892, 4; - setp.lt.s32 %p210, %r892, %r166; - @%p210 bra BB9_251; - -BB9_252: - ld.local.f32 %f1260, [%rd34]; - rcp.rn.f32 %f1261, %f1260; - ld.local.f32 %f1262, [%rd35]; - sub.f32 %f1263, %f1262, %f1468; - mul.f32 %f1264, %f1261, %f1263; - st.local.f32 [%rd35], %f1264; - bra.uni BB9_253; - -BB9_241: - ld.local.f32 %f1235, [%rd34]; - rcp.rn.f32 %f1236, %f1235; - ld.local.f32 %f1237, [%rd35]; - mul.f32 %f1238, %f1236, %f1237; - st.local.f32 [%rd35], %f1238; - -BB9_253: - add.s32 %r884, %r884, 1; - setp.lt.s32 %p211, %r884, %r425; - @%p211 bra BB9_240; - -BB9_254: - setp.lt.s32 %p212, %r165, %r425; - mov.u32 %r874, %r165; - @%p212 bra BB9_223; - -BB9_255: - @%p112 bra BB9_283; - - mad.lo.s32 %r684, %r425, %r26, %r26; - mul.wide.s32 %rd404, %r684, 4; - add.s64 %rd405, %rd4, %rd404; - ld.local.f32 %f176, [%rd405]; - mad.lo.s32 %r191, %r824, 8, 4; - add.s32 %r192, %r26, -1; - mul.wide.s32 %rd39, %r425, 4; - mov.u32 %r893, 0; - -BB9_257: - setp.eq.s32 %p214, %r893, 0; - selp.f32 %f1265, 0f3F800000, 0f00000000, %p214; - st.local.f32 [%rd6], %f1265; - mov.u32 %r894, 1; - -BB9_258: - add.s32 %r686, %r894, -1; - setp.lt.s32 %p215, %r686, 0; - mov.f32 %f1473, 0f00000000; - @%p215 bra BB9_268; - - and.b32 %r195, %r894, 3; - setp.eq.s32 %p216, %r195, 0; - mov.u32 %r897, 0; - mov.f32 %f1473, 0f00000000; - @%p216 bra BB9_265; - - setp.eq.s32 %p217, %r195, 1; - mov.u32 %r896, 0; - mov.f32 %f1470, 0f00000000; - @%p217 bra BB9_264; - - setp.eq.s32 %p218, %r195, 2; - mov.u32 %r895, 0; - mov.f32 %f1469, 0f00000000; - @%p218 bra BB9_263; - - mul.wide.s32 %rd406, %r894, 4; - add.s64 %rd407, %rd4, %rd406; - ld.local.f32 %f1270, [%rd6]; - ld.local.f32 %f1271, [%rd407]; - fma.rn.f32 %f1469, %f1271, %f1270, 0f00000000; - mov.u32 %r895, 1; - -BB9_263: - neg.s32 %r692, %r895; - and.b32 %r693, %r425, %r692; - add.s32 %r694, %r693, %r894; - mul.wide.s32 %rd408, %r694, 4; - add.s64 %rd409, %rd4, %rd408; - mul.wide.u32 %rd410, %r895, 4; - add.s64 %rd411, %rd6, %rd410; - ld.local.f32 %f1272, [%rd411]; - ld.local.f32 %f1273, [%rd409]; - fma.rn.f32 %f1470, %f1273, %f1272, %f1469; - add.s32 %r896, %r895, 1; - -BB9_264: - mad.lo.s32 %r696, %r896, %r425, %r894; - mul.wide.s32 %rd412, %r696, 4; - add.s64 %rd413, %rd4, %rd412; - mul.wide.s32 %rd414, %r896, 4; - add.s64 %rd415, %rd6, %rd414; - ld.local.f32 %f1274, [%rd415]; - ld.local.f32 %f1275, [%rd413]; - fma.rn.f32 %f1473, %f1275, %f1274, %f1470; - add.s32 %r897, %r896, 1; - -BB9_265: - setp.lt.u32 %p219, %r894, 4; - @%p219 bra BB9_268; - - add.s32 %r902, %r897, -1; - mul.wide.s32 %rd416, %r897, 4; - add.s64 %rd516, %rd6, %rd416; - add.s32 %r697, %r192, 2; - add.s32 %r698, %r897, 3; - mad.lo.s32 %r901, %r697, %r698, %r894; - add.s32 %r699, %r897, 2; - mad.lo.s32 %r900, %r697, %r699, %r894; - mad.lo.s32 %r899, %r697, %r897, %r894; - add.s32 %r898, %r899, %r697; - -BB9_267: - mul.wide.s32 %rd417, %r899, 4; - add.s64 %rd418, %rd4, %rd417; - ld.local.f32 %f1276, [%rd516]; - ld.local.f32 %f1277, [%rd418]; - fma.rn.f32 %f1278, %f1277, %f1276, %f1473; - mul.wide.s32 %rd419, %r898, 4; - add.s64 %rd420, %rd4, %rd419; - ld.local.f32 %f1279, [%rd516+4]; - ld.local.f32 %f1280, [%rd420]; - fma.rn.f32 %f1281, %f1280, %f1279, %f1278; - mul.wide.s32 %rd421, %r900, 4; - add.s64 %rd422, %rd4, %rd421; - ld.local.f32 %f1282, [%rd516+8]; - ld.local.f32 %f1283, [%rd422]; - fma.rn.f32 %f1284, %f1283, %f1282, %f1281; - mul.wide.s32 %rd423, %r901, 4; - add.s64 %rd424, %rd4, %rd423; - ld.local.f32 %f1285, [%rd516+12]; - ld.local.f32 %f1286, [%rd424]; - fma.rn.f32 %f1473, %f1286, %f1285, %f1284; - add.s64 %rd516, %rd516, 16; - add.s32 %r901, %r901, %r191; - add.s32 %r900, %r900, %r191; - add.s32 %r899, %r899, %r191; - add.s32 %r898, %r898, %r191; - add.s32 %r902, %r902, 4; - setp.lt.s32 %p220, %r902, %r686; - @%p220 bra BB9_267; - -BB9_268: - mul.wide.s32 %rd425, %r894, 4; - add.s64 %rd426, %rd6, %rd425; - setp.eq.s32 %p221, %r894, %r893; - selp.f32 %f1287, 0f3F800000, 0f00000000, %p221; - sub.f32 %f1288, %f1287, %f1473; - st.local.f32 [%rd426], %f1288; - add.s32 %r894, %r894, 1; - setp.lt.s32 %p222, %r894, %r425; - @%p222 bra BB9_258; - - ld.local.f32 %f1289, [%rd17]; - div.rn.f32 %f1290, %f1289, %f176; - mul.lo.s32 %r217, %r893, %r425; - add.s32 %r703, %r217, %r26; - mul.wide.s32 %rd427, %r703, 4; - add.s64 %rd428, %rd3, %rd427; - st.local.f32 [%rd428], %f1290; - setp.lt.s32 %p223, %r192, 0; - @%p223 bra BB9_282; - - add.s32 %r705, %r192, 2; - mul.lo.s32 %r218, %r705, %r893; - mov.u32 %r903, 0; - mov.u32 %r904, %r192; - -BB9_271: - add.s32 %r903, %r903, 1; - add.s32 %r907, %r904, 1; - add.s32 %r708, %r907, %r217; - mul.wide.s32 %rd429, %r708, 4; - add.s64 %rd43, %rd3, %rd429; - mov.f32 %f1478, 0f00000000; - setp.ge.s32 %p224, %r907, %r425; - @%p224 bra BB9_281; - - and.b32 %r222, %r903, 3; - setp.eq.s32 %p225, %r222, 0; - mov.f32 %f1478, 0f00000000; - @%p225 bra BB9_278; - - setp.eq.s32 %p226, %r222, 1; - add.s32 %r906, %r904, 1; - mov.f32 %f1475, 0f00000000; - @%p226 bra BB9_277; - - setp.eq.s32 %p227, %r222, 2; - add.s32 %r905, %r904, 1; - mov.f32 %f1474, 0f00000000; - @%p227 bra BB9_276; - - add.s32 %r710, %r904, 1; - mad.lo.s32 %r711, %r710, %r425, %r904; - mul.wide.s32 %rd430, %r711, 4; - add.s64 %rd431, %rd4, %rd430; - ld.local.f32 %f1295, [%rd43]; - ld.local.f32 %f1296, [%rd431]; - fma.rn.f32 %f1474, %f1296, %f1295, 0f00000000; - add.s32 %r905, %r904, 2; - -BB9_276: - mad.lo.s32 %r713, %r905, %r425, %r904; - mul.wide.s32 %rd432, %r713, 4; - add.s64 %rd433, %rd4, %rd432; - add.s32 %r714, %r905, %r217; - mul.wide.s32 %rd434, %r714, 4; - add.s64 %rd435, %rd3, %rd434; - ld.local.f32 %f1297, [%rd435]; - ld.local.f32 %f1298, [%rd433]; - fma.rn.f32 %f1475, %f1298, %f1297, %f1474; - add.s32 %r906, %r905, 1; - -BB9_277: - mad.lo.s32 %r716, %r906, %r425, %r904; - mul.wide.s32 %rd436, %r716, 4; - add.s64 %rd437, %rd4, %rd436; - add.s32 %r717, %r906, %r217; - mul.wide.s32 %rd438, %r717, 4; - add.s64 %rd439, %rd3, %rd438; - ld.local.f32 %f1299, [%rd439]; - ld.local.f32 %f1300, [%rd437]; - fma.rn.f32 %f1478, %f1300, %f1299, %f1475; - add.s32 %r907, %r906, 1; - -BB9_278: - setp.lt.u32 %p228, %r903, 4; - @%p228 bra BB9_281; - - add.s32 %r718, %r218, %r907; - mul.wide.s32 %rd440, %r718, 4; - add.s64 %rd517, %rd3, %rd440; - mad.lo.s32 %r908, %r705, %r907, %r904; - -BB9_280: - mul.wide.s32 %rd441, %r908, 4; - add.s64 %rd442, %rd4, %rd441; - ld.local.f32 %f1301, [%rd517]; - ld.local.f32 %f1302, [%rd442]; - fma.rn.f32 %f1303, %f1302, %f1301, %f1478; - add.s64 %rd443, %rd442, %rd39; - ld.local.f32 %f1304, [%rd517+4]; - ld.local.f32 %f1305, [%rd443]; - fma.rn.f32 %f1306, %f1305, %f1304, %f1303; - add.s64 %rd444, %rd443, %rd39; - ld.local.f32 %f1307, [%rd517+8]; - ld.local.f32 %f1308, [%rd444]; - fma.rn.f32 %f1309, %f1308, %f1307, %f1306; - add.s64 %rd445, %rd444, %rd39; - ld.local.f32 %f1310, [%rd517+12]; - ld.local.f32 %f1311, [%rd445]; - fma.rn.f32 %f1478, %f1311, %f1310, %f1309; - add.s64 %rd517, %rd517, 16; - add.s32 %r908, %r908, %r191; - add.s32 %r907, %r907, 4; - setp.lt.s32 %p229, %r907, %r425; - @%p229 bra BB9_280; - -BB9_281: - mad.lo.s32 %r722, %r904, %r425, %r904; - mul.wide.s32 %rd446, %r722, 4; - add.s64 %rd447, %rd4, %rd446; - ld.local.f32 %f1312, [%rd447]; - rcp.rn.f32 %f1313, %f1312; - mul.wide.s32 %rd448, %r904, 4; - add.s64 %rd449, %rd6, %rd448; - ld.local.f32 %f1314, [%rd449]; - sub.f32 %f1315, %f1314, %f1478; - mul.f32 %f1316, %f1313, %f1315; - st.local.f32 [%rd43+-4], %f1316; - add.s32 %r904, %r904, -1; - setp.gt.s32 %p230, %r904, -1; - @%p230 bra BB9_271; - -BB9_282: - add.s32 %r893, %r893, 1; - setp.lt.s32 %p231, %r893, %r425; - @%p231 bra BB9_257; - -BB9_283: - @%p112 bra BB9_291; - - mov.u32 %r726, 1; - max.s32 %r239, %r425, %r726; - and.b32 %r240, %r239, 3; - setp.eq.s32 %p233, %r240, 1; - @%p233 bra BB9_288; - - setp.eq.s32 %p234, %r240, 2; - mov.u32 %r910, 0; - @%p234 bra BB9_287; - - ld.local.f32 %f1317, [%rd3]; - st.local.f32 [%rd7], %f1317; - mov.u32 %r910, %r726; - -BB9_287: - neg.s32 %r730, %r910; - and.b32 %r731, %r425, %r730; - add.s32 %r732, %r731, %r910; - mul.wide.s32 %rd450, %r732, 4; - add.s64 %rd451, %rd3, %rd450; - ld.local.f32 %f1318, [%rd451]; - mul.wide.u32 %rd452, %r910, 4; - add.s64 %rd453, %rd7, %rd452; - st.local.f32 [%rd453], %f1318; - add.s32 %r911, %r910, 1; - -BB9_288: - mad.lo.s32 %r244, %r911, %r425, %r911; - mul.wide.s32 %rd454, %r244, 4; - add.s64 %rd455, %rd3, %rd454; - ld.local.f32 %f1319, [%rd455]; - mul.wide.s32 %rd456, %r911, 4; - add.s64 %rd518, %rd7, %rd456; - st.local.f32 [%rd518], %f1319; - setp.lt.u32 %p235, %r239, 4; - @%p235 bra BB9_291; - - add.s32 %r914, %r911, 1; - add.s32 %r734, %r911, 4; - mul.wide.u32 %rd458, %r734, 4; - add.s64 %rd519, %rd7, %rd458; - mad.lo.s32 %r735, %r824, 2, 1; - mad.lo.s32 %r246, %r824, 8, 8; - add.s32 %r736, %r244, %r735; - add.s32 %r913, %r736, 1; - add.s32 %r737, %r911, 2; - mad.lo.s32 %r912, %r737, %r735, %r737; - mul.wide.s32 %rd460, %r735, 4; - add.s64 %rd49, %rd460, 4; - -BB9_290: - mul.wide.s32 %rd461, %r913, 4; - add.s64 %rd462, %rd3, %rd461; - ld.local.f32 %f1320, [%rd462]; - st.local.f32 [%rd518+4], %f1320; - mul.wide.s32 %rd463, %r912, 4; - add.s64 %rd464, %rd3, %rd463; - ld.local.f32 %f1321, [%rd464]; - st.local.f32 [%rd518+8], %f1321; - add.s64 %rd465, %rd464, %rd49; - ld.local.f32 %f1322, [%rd465]; - st.local.f32 [%rd518+12], %f1322; - add.s64 %rd466, %rd465, %rd49; - ld.local.f32 %f1323, [%rd466]; - st.local.f32 [%rd519], %f1323; - add.s64 %rd519, %rd519, 16; - add.s64 %rd518, %rd518, 16; - add.s32 %r913, %r913, %r246; - add.s32 %r912, %r912, %r246; - add.s32 %r914, %r914, 4; - setp.lt.s32 %p236, %r914, %r425; - @%p236 bra BB9_290; - -BB9_291: - setp.lt.s32 %p237, %r824, 0; - @%p237 bra BB9_311; - - shl.b32 %r745, %r290, 1; - mov.u32 %r746, 1; - add.s32 %r747, %r745, 1; - mul.lo.s32 %r255, %r296, %r747; - add.s32 %r748, %r26, 1; - max.s32 %r256, %r748, %r746; - and.b32 %r257, %r256, 3; - setp.eq.s32 %p238, %r257, 1; - mov.u32 %r925, 0; - mov.u32 %r921, %r925; - @%p238 bra BB9_298; - - setp.eq.s32 %p239, %r257, 2; - mov.u32 %r925, 0; - mov.u32 %r915, %r925; - @%p239 bra BB9_295; - - ld.param.u64 %rd511, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_7]; - ld.local.f32 %f1324, [%rd3]; - abs.f32 %f1325, %f1324; - sqrt.rn.f32 %f1326, %f1325; - cvta.to.global.u64 %rd467, %rd511; - mul.wide.s32 %rd468, %r255, 4; - add.s64 %rd469, %rd467, %rd468; - st.global.f32 [%rd469], %f1326; - mov.u32 %r915, %r746; - -BB9_295: - ld.param.u64 %rd512, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_7]; - neg.s32 %r753, %r915; - and.b32 %r754, %r748, %r753; - add.s32 %r755, %r754, %r915; - mul.wide.s32 %rd470, %r755, 4; - add.s64 %rd471, %rd3, %rd470; - ld.local.f32 %f1327, [%rd471]; - abs.f32 %f1328, %f1327; - sqrt.rn.f32 %f1329, %f1328; - add.s32 %r756, %r255, %r915; - cvta.to.global.u64 %rd472, %rd512; - mul.wide.s32 %rd473, %r756, 4; - add.s64 %rd474, %rd472, %rd473; - st.global.f32 [%rd474], %f1329; - setp.eq.s32 %p240, %r915, 0; - @%p240 bra BB9_297; - - add.s32 %r758, %r915, -1; - mov.u32 %r925, 1; - mad.lo.s32 %r760, %r758, %r748, %r915; - mul.wide.s32 %rd475, %r760, 4; - add.s64 %rd476, %rd3, %rd475; - ld.local.f32 %f1330, [%rd476]; - cvta.to.global.u64 %rd477, %rd64; - mul.wide.s32 %rd478, %r1, 4; - add.s64 %rd479, %rd477, %rd478; - st.global.f32 [%rd479], %f1330; - -BB9_297: - add.s32 %r921, %r915, 1; - -BB9_298: - ld.param.u64 %rd513, [_Z11kernel_CRLBiiffPKfS0_S0_PfS1_iS1_S1__param_7]; - mul.lo.s32 %r263, %r921, %r748; - add.s32 %r762, %r263, %r921; - mul.wide.s32 %rd480, %r762, 4; - add.s64 %rd481, %rd3, %rd480; - ld.local.f32 %f1331, [%rd481]; - abs.f32 %f1332, %f1331; - sqrt.rn.f32 %f1333, %f1332; - add.s32 %r763, %r255, %r921; - cvta.to.global.u64 %rd482, %rd513; - mul.wide.s32 %rd483, %r763, 4; - add.s64 %rd484, %rd482, %rd483; - st.global.f32 [%rd484], %f1333; - and.b32 %r764, %r921, 1; - setp.eq.b32 %p241, %r764, 1; - @!%p241 bra BB9_300; - bra.uni BB9_299; - -BB9_299: - add.s32 %r765, %r921, -1; - mad.lo.s32 %r767, %r765, %r748, %r921; - mul.wide.s32 %rd485, %r767, 4; - add.s64 %rd486, %rd3, %rd485; - ld.local.f32 %f1334, [%rd486]; - mad.lo.s32 %r772, %r296, %r290, %r925; - cvta.to.global.u64 %rd487, %rd64; - mul.wide.s32 %rd488, %r772, 4; - add.s64 %rd489, %rd487, %rd488; - st.global.f32 [%rd489], %f1334; - add.s32 %r925, %r925, 1; - -BB9_300: - setp.lt.u32 %p242, %r256, 4; - @%p242 bra BB9_311; - - add.s32 %r773, %r921, 3; - mad.lo.s32 %r266, %r824, 8, 8; - add.s32 %r776, %r921, 4; - mad.lo.s32 %r922, %r776, %r748, %r776; - add.s32 %r777, %r921, 2; - mad.lo.s32 %r268, %r773, %r748, 3; - add.s32 %r778, %r263, %r748; - add.s32 %r269, %r778, 1; - mad.lo.s32 %r270, %r777, %r748, 2; - add.s32 %r779, %r26, 2; - mul.lo.s32 %r920, %r921, %r779; - mad.lo.s32 %r784, %r290, 2, 1; - mad.lo.s32 %r785, %r296, %r784, %r921; - add.s32 %r786, %r785, 1; - mul.wide.s32 %rd491, %r786, 4; - add.s64 %rd520, %rd482, %rd491; - mov.u32 %r923, %r921; - -BB9_302: - add.s32 %r787, %r269, %r921; - mul.wide.s32 %rd492, %r787, 4; - add.s64 %rd56, %rd3, %rd492; - ld.local.f32 %f1335, [%rd56]; - abs.f32 %f1336, %f1335; - sqrt.rn.f32 %f1337, %f1336; - st.global.f32 [%rd520], %f1337; - add.s32 %r788, %r923, 1; - and.b32 %r789, %r788, 1; - setp.eq.b32 %p243, %r789, 1; - @!%p243 bra BB9_304; - bra.uni BB9_303; - -BB9_303: - add.s32 %r790, %r920, 1; - mul.wide.s32 %rd493, %r790, 4; - add.s64 %rd494, %rd3, %rd493; - ld.local.f32 %f1338, [%rd494]; - mad.lo.s32 %r795, %r296, %r290, %r925; - cvta.to.global.u64 %rd495, %rd64; - mul.wide.s32 %rd496, %r795, 4; - add.s64 %rd497, %rd495, %rd496; - st.global.f32 [%rd497], %f1338; - add.s32 %r925, %r925, 1; - -BB9_304: - add.s32 %r796, %r270, %r921; - mul.wide.s32 %rd498, %r796, 4; - add.s64 %rd57, %rd3, %rd498; - ld.local.f32 %f1339, [%rd57]; - abs.f32 %f1340, %f1339; - sqrt.rn.f32 %f1341, %f1340; - st.global.f32 [%rd520+4], %f1341; - add.s32 %r797, %r923, 2; - and.b32 %r798, %r797, 1; - setp.eq.b32 %p244, %r798, 1; - @!%p244 bra BB9_306; - bra.uni BB9_305; - -BB9_305: - ld.local.f32 %f1342, [%rd56+4]; - mad.lo.s32 %r803, %r296, %r290, %r925; - cvta.to.global.u64 %rd499, %rd64; - mul.wide.s32 %rd500, %r803, 4; - add.s64 %rd501, %rd499, %rd500; - st.global.f32 [%rd501], %f1342; - add.s32 %r925, %r925, 1; - -BB9_306: - add.s32 %r804, %r268, %r921; - mul.wide.s32 %rd502, %r804, 4; - add.s64 %rd58, %rd3, %rd502; - ld.local.f32 %f1343, [%rd58]; - abs.f32 %f1344, %f1343; - sqrt.rn.f32 %f1345, %f1344; - st.global.f32 [%rd520+8], %f1345; - add.s32 %r805, %r923, 3; - and.b32 %r806, %r805, 1; - setp.eq.b32 %p245, %r806, 1; - @!%p245 bra BB9_308; - bra.uni BB9_307; - -BB9_307: - ld.local.f32 %f1346, [%rd57+4]; - mad.lo.s32 %r811, %r296, %r290, %r925; - cvta.to.global.u64 %rd503, %rd64; - mul.wide.s32 %rd504, %r811, 4; - add.s64 %rd505, %rd503, %rd504; - st.global.f32 [%rd505], %f1346; - add.s32 %r925, %r925, 1; - -BB9_308: - mul.wide.s32 %rd506, %r922, 4; - add.s64 %rd507, %rd3, %rd506; - ld.local.f32 %f1347, [%rd507]; - abs.f32 %f1348, %f1347; - sqrt.rn.f32 %f1349, %f1348; - st.global.f32 [%rd520+12], %f1349; - add.s32 %r812, %r923, 4; - and.b32 %r813, %r812, 1; - setp.eq.b32 %p246, %r813, 1; - @!%p246 bra BB9_310; - bra.uni BB9_309; - -BB9_309: - cvt.u32.u64 %r814, %rd8; - ld.local.f32 %f1350, [%rd58+4]; - add.s32 %r815, %r925, %r814; - cvta.to.global.u64 %rd508, %rd64; - mul.wide.s32 %rd509, %r815, 4; - add.s64 %rd510, %rd508, %rd509; - st.global.f32 [%rd510], %f1350; - add.s32 %r925, %r925, 1; - -BB9_310: - add.s32 %r816, %r923, 5; - add.s32 %r922, %r922, %r266; - add.s32 %r921, %r921, %r266; - add.s32 %r920, %r920, %r266; - setp.lt.s32 %p247, %r816, %r748; - add.s64 %rd520, %rd520, 16; - mov.u32 %r923, %r812; - @%p247 bra BB9_302; - -BB9_311: + sqrt.rn.f32 %f1086, %f1085; + st.global.f32 [%rd293+12], %f1086; + ld.local.f32 %f1087, [%rd299+4]; + st.global.f32 [%rd297+4], %f1087; + add.s32 %r613, %r613, 2; + add.s32 %r607, %r607, 4; + add.s32 %r605, %r605, -4; + setp.ne.s32 %p240, %r605, 0; + @%p240 bra $L__BB9_249; + +$L__BB9_250: + add.s32 %r198, %r29, 2; + mul.lo.s32 %r609, %r607, %r198; + add.s32 %r529, %r609, -1; + sub.s32 %r610, %r529, %r29; + add.s32 %r530, %r607, %r186; + mul.wide.s32 %rd302, %r530, 4; + add.s64 %rd313, %rd9, %rd302; + add.s32 %r531, %r188, -1; + not.b32 %r608, %r531; + mov.pred %p242, 0; + +$L__BB9_251: + .pragma "nounroll"; + mul.wide.s32 %rd303, %r609, 4; + add.s64 %rd304, %rd4, %rd303; + ld.local.f32 %f1088, [%rd304]; + abs.f32 %f1089, %f1088; + sqrt.rn.f32 %f1090, %f1089; + st.global.f32 [%rd313], %f1090; + and.b32 %r532, %r607, 1; + setp.eq.b32 %p241, %r532, 1; + xor.pred %p243, %p241, %p242; + not.pred %p244, %p243; + @%p244 bra $L__BB9_253; + + cvt.u32.u64 %r533, %rd11; + mul.wide.s32 %rd305, %r610, 4; + add.s64 %rd306, %rd4, %rd305; + ld.local.f32 %f1091, [%rd306]; + add.s32 %r534, %r613, %r533; + mul.wide.s32 %rd307, %r534, 4; + add.s64 %rd308, %rd3, %rd307; + st.global.f32 [%rd308], %f1091; + add.s32 %r613, %r613, 1; + +$L__BB9_253: + add.s32 %r607, %r607, 1; + add.s32 %r610, %r610, %r198; + add.s64 %rd313, %rd313, 4; + add.s32 %r609, %r609, %r198; + add.s32 %r608, %r608, 1; + setp.ne.s32 %p245, %r608, 0; + @%p245 bra $L__BB9_251; + +$L__BB9_254: ret; + } +.func (.param .b64 func_retval0) __internal_accurate_pow( + .param .b64 __internal_accurate_pow_param_0, + .param .b64 __internal_accurate_pow_param_1 +) +{ + .reg .pred %p<10>; + .reg .f32 %f<3>; + .reg .b32 %r<53>; + .reg .f64 %fd<138>; + + + ld.param.f64 %fd12, [__internal_accurate_pow_param_0]; + ld.param.f64 %fd13, [__internal_accurate_pow_param_1]; + { + .reg .b32 %temp; + mov.b64 {%temp, %r50}, %fd12; + } + { + .reg .b32 %temp; + mov.b64 {%r49, %temp}, %fd12; + } + shr.u32 %r51, %r50, 20; + setp.ne.s32 %p1, %r51, 0; + @%p1 bra $L__BB10_2; + + mul.f64 %fd14, %fd12, 0d4350000000000000; + { + .reg .b32 %temp; + mov.b64 {%temp, %r50}, %fd14; + } + { + .reg .b32 %temp; + mov.b64 {%r49, %temp}, %fd14; + } + shr.u32 %r16, %r50, 20; + add.s32 %r51, %r16, -54; + +$L__BB10_2: + add.s32 %r52, %r51, -1023; + and.b32 %r17, %r50, -2146435073; + or.b32 %r18, %r17, 1072693248; + mov.b64 %fd135, {%r49, %r18}; + setp.lt.u32 %p2, %r18, 1073127583; + @%p2 bra $L__BB10_4; + + { + .reg .b32 %temp; + mov.b64 {%r19, %temp}, %fd135; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r20}, %fd135; + } + add.s32 %r21, %r20, -1048576; + mov.b64 %fd135, {%r19, %r21}; + add.s32 %r52, %r51, -1022; + +$L__BB10_4: + add.f64 %fd15, %fd135, 0d3FF0000000000000; + mov.f64 %fd16, 0d3FF0000000000000; + rcp.approx.ftz.f64 %fd17, %fd15; + neg.f64 %fd18, %fd15; + fma.rn.f64 %fd19, %fd18, %fd17, %fd16; + fma.rn.f64 %fd20, %fd19, %fd19, %fd19; + fma.rn.f64 %fd21, %fd20, %fd17, %fd17; + add.f64 %fd22, %fd135, 0dBFF0000000000000; + mul.f64 %fd23, %fd22, %fd21; + fma.rn.f64 %fd24, %fd22, %fd21, %fd23; + mul.f64 %fd25, %fd24, %fd24; + mov.f64 %fd26, 0d3ED0F5D241AD3B5A; + mov.f64 %fd27, 0d3EB0F5FF7D2CAFE2; + fma.rn.f64 %fd28, %fd27, %fd25, %fd26; + mov.f64 %fd29, 0d3EF3B20A75488A3F; + fma.rn.f64 %fd30, %fd28, %fd25, %fd29; + mov.f64 %fd31, 0d3F1745CDE4FAECD5; + fma.rn.f64 %fd32, %fd30, %fd25, %fd31; + mov.f64 %fd33, 0d3F3C71C7258A578B; + fma.rn.f64 %fd34, %fd32, %fd25, %fd33; + mov.f64 %fd35, 0d3F6249249242B910; + fma.rn.f64 %fd36, %fd34, %fd25, %fd35; + mov.f64 %fd37, 0d3F89999999999DFB; + fma.rn.f64 %fd38, %fd36, %fd25, %fd37; + sub.f64 %fd39, %fd22, %fd24; + add.f64 %fd40, %fd39, %fd39; + neg.f64 %fd41, %fd24; + fma.rn.f64 %fd42, %fd41, %fd22, %fd40; + mul.f64 %fd43, %fd21, %fd42; + fma.rn.f64 %fd44, %fd25, %fd38, 0d3FB5555555555555; + mov.f64 %fd45, 0d3FB5555555555555; + sub.f64 %fd46, %fd45, %fd44; + fma.rn.f64 %fd47, %fd25, %fd38, %fd46; + add.f64 %fd48, %fd47, 0d0000000000000000; + add.f64 %fd49, %fd48, 0dBC46A4CB00B9E7B0; + add.f64 %fd50, %fd44, %fd49; + sub.f64 %fd51, %fd44, %fd50; + add.f64 %fd52, %fd49, %fd51; + mul.rn.f64 %fd53, %fd24, %fd24; + neg.f64 %fd54, %fd53; + fma.rn.f64 %fd55, %fd24, %fd24, %fd54; + { + .reg .b32 %temp; + mov.b64 {%r22, %temp}, %fd43; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r23}, %fd43; + } + add.s32 %r24, %r23, 1048576; + mov.b64 %fd56, {%r22, %r24}; + fma.rn.f64 %fd57, %fd24, %fd56, %fd55; + mul.rn.f64 %fd58, %fd53, %fd24; + neg.f64 %fd59, %fd58; + fma.rn.f64 %fd60, %fd53, %fd24, %fd59; + fma.rn.f64 %fd61, %fd53, %fd43, %fd60; + fma.rn.f64 %fd62, %fd57, %fd24, %fd61; + mul.rn.f64 %fd63, %fd50, %fd58; + neg.f64 %fd64, %fd63; + fma.rn.f64 %fd65, %fd50, %fd58, %fd64; + fma.rn.f64 %fd66, %fd50, %fd62, %fd65; + fma.rn.f64 %fd67, %fd52, %fd58, %fd66; + add.f64 %fd68, %fd63, %fd67; + sub.f64 %fd69, %fd63, %fd68; + add.f64 %fd70, %fd67, %fd69; + add.f64 %fd71, %fd24, %fd68; + sub.f64 %fd72, %fd24, %fd71; + add.f64 %fd73, %fd68, %fd72; + add.f64 %fd74, %fd70, %fd73; + add.f64 %fd75, %fd43, %fd74; + add.f64 %fd76, %fd71, %fd75; + sub.f64 %fd77, %fd71, %fd76; + add.f64 %fd78, %fd75, %fd77; + xor.b32 %r25, %r52, -2147483648; + mov.u32 %r26, -2147483648; + mov.u32 %r27, 1127219200; + mov.b64 %fd79, {%r25, %r27}; + mov.b64 %fd80, {%r26, %r27}; + sub.f64 %fd81, %fd79, %fd80; + mov.f64 %fd82, 0d3FE62E42FEFA39EF; + fma.rn.f64 %fd83, %fd81, %fd82, %fd76; + neg.f64 %fd84, %fd81; + fma.rn.f64 %fd85, %fd84, %fd82, %fd83; + sub.f64 %fd86, %fd85, %fd76; + sub.f64 %fd87, %fd78, %fd86; + mov.f64 %fd88, 0d3C7ABC9E3B39803F; + fma.rn.f64 %fd89, %fd81, %fd88, %fd87; + add.f64 %fd90, %fd83, %fd89; + sub.f64 %fd91, %fd83, %fd90; + add.f64 %fd92, %fd89, %fd91; + { + .reg .b32 %temp; + mov.b64 {%temp, %r28}, %fd13; + } + shl.b32 %r29, %r28, 1; + setp.gt.u32 %p3, %r29, -33554433; + and.b32 %r30, %r28, -15728641; + selp.b32 %r31, %r30, %r28, %p3; + { + .reg .b32 %temp; + mov.b64 {%r32, %temp}, %fd13; + } + mov.b64 %fd93, {%r32, %r31}; + mul.rn.f64 %fd94, %fd90, %fd93; + neg.f64 %fd95, %fd94; + fma.rn.f64 %fd96, %fd90, %fd93, %fd95; + fma.rn.f64 %fd97, %fd92, %fd93, %fd96; + add.f64 %fd4, %fd94, %fd97; + sub.f64 %fd98, %fd94, %fd4; + add.f64 %fd5, %fd97, %fd98; + mov.f64 %fd99, 0d4338000000000000; + mov.f64 %fd100, 0d3FF71547652B82FE; + fma.rn.f64 %fd101, %fd4, %fd100, %fd99; + { + .reg .b32 %temp; + mov.b64 {%r13, %temp}, %fd101; + } + mov.f64 %fd102, 0dC338000000000000; + add.rn.f64 %fd103, %fd101, %fd102; + mov.f64 %fd104, 0dBFE62E42FEFA39EF; + fma.rn.f64 %fd105, %fd103, %fd104, %fd4; + mov.f64 %fd106, 0dBC7ABC9E3B39803F; + fma.rn.f64 %fd107, %fd103, %fd106, %fd105; + mov.f64 %fd108, 0d3E928AF3FCA213EA; + mov.f64 %fd109, 0d3E5ADE1569CE2BDF; + fma.rn.f64 %fd110, %fd109, %fd107, %fd108; + mov.f64 %fd111, 0d3EC71DEE62401315; + fma.rn.f64 %fd112, %fd110, %fd107, %fd111; + mov.f64 %fd113, 0d3EFA01997C89EB71; + fma.rn.f64 %fd114, %fd112, %fd107, %fd113; + mov.f64 %fd115, 0d3F2A01A014761F65; + fma.rn.f64 %fd116, %fd114, %fd107, %fd115; + mov.f64 %fd117, 0d3F56C16C1852B7AF; + fma.rn.f64 %fd118, %fd116, %fd107, %fd117; + mov.f64 %fd119, 0d3F81111111122322; + fma.rn.f64 %fd120, %fd118, %fd107, %fd119; + mov.f64 %fd121, 0d3FA55555555502A1; + fma.rn.f64 %fd122, %fd120, %fd107, %fd121; + mov.f64 %fd123, 0d3FC5555555555511; + fma.rn.f64 %fd124, %fd122, %fd107, %fd123; + mov.f64 %fd125, 0d3FE000000000000B; + fma.rn.f64 %fd126, %fd124, %fd107, %fd125; + fma.rn.f64 %fd127, %fd126, %fd107, %fd16; + fma.rn.f64 %fd128, %fd127, %fd107, %fd16; + { + .reg .b32 %temp; + mov.b64 {%r14, %temp}, %fd128; + } + { + .reg .b32 %temp; + mov.b64 {%temp, %r15}, %fd128; + } + shl.b32 %r33, %r13, 20; + add.s32 %r34, %r15, %r33; + mov.b64 %fd136, {%r14, %r34}; + { + .reg .b32 %temp; + mov.b64 {%temp, %r35}, %fd4; + } + mov.b32 %f2, %r35; + abs.f32 %f1, %f2; + setp.lt.f32 %p4, %f1, 0f4086232B; + @%p4 bra $L__BB10_7; + + setp.lt.f64 %p5, %fd4, 0d0000000000000000; + add.f64 %fd129, %fd4, 0d7FF0000000000000; + selp.f64 %fd136, 0d0000000000000000, %fd129, %p5; + setp.geu.f32 %p6, %f1, 0f40874800; + @%p6 bra $L__BB10_7; + + mov.f64 %fd134, 0d4338000000000000; + mov.f64 %fd133, 0d3FF71547652B82FE; + fma.rn.f64 %fd132, %fd4, %fd133, %fd134; + { + .reg .b32 %temp; + mov.b64 {%r48, %temp}, %fd132; + } + shr.u32 %r36, %r48, 31; + add.s32 %r37, %r48, %r36; + shr.s32 %r38, %r37, 1; + shl.b32 %r39, %r38, 20; + add.s32 %r40, %r15, %r39; + mov.b64 %fd130, {%r14, %r40}; + sub.s32 %r41, %r48, %r38; + shl.b32 %r42, %r41, 20; + add.s32 %r43, %r42, 1072693248; + mov.u32 %r44, 0; + mov.b64 %fd131, {%r44, %r43}; + mul.f64 %fd136, %fd130, %fd131; + +$L__BB10_7: + { + .reg .b32 %temp; + mov.b64 {%temp, %r45}, %fd136; + } + and.b32 %r46, %r45, 2147483647; + setp.eq.s32 %p7, %r46, 2146435072; + { + .reg .b32 %temp; + mov.b64 {%r47, %temp}, %fd136; + } + setp.eq.s32 %p8, %r47, 0; + and.pred %p9, %p8, %p7; + @%p9 bra $L__BB10_9; + + fma.rn.f64 %fd136, %fd136, %fd5, %fd136; + +$L__BB10_9: + st.param.f64 [func_retval0+0], %fd136; + ret; +} diff --git a/MATLAB/source/cuda/cuda_Make.m b/MATLAB/source/cuda/cuda_Make.m index 9c82c417..84d0e769 100644 --- a/MATLAB/source/cuda/cuda_Make.m +++ b/MATLAB/source/cuda/cuda_Make.m @@ -11,9 +11,10 @@ if ispc % Adding system path for nvcc to compile with nvcc - setenv('PATH', [getenv('PATH') ';C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\bin']); + setenv('PATH', [getenv('PATH') ';C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.5\bin']); % Adding system path for VS2013 to compile with cl - setenv('PATH', [getenv('PATH') ';C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin']); + % setenv('PATH', [getenv('PATH') ';C:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin']); + setenv('PATH', [getenv('PATH') ';C:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.30.30705\bin\Hostx64\x64']) else % Linux/MacOS % Adding system path for nvcc to compile with nvcc setenv('PATH', [getenv('PATH') ':/usr/local/cuda-10.1/bin']); @@ -25,7 +26,8 @@ fprintf('Compiling %s ...\n', cuda_dir); addpath(cuda_dir); -[s, r] = system(sprintf('nvcc -ptx %s -o %s\n', ... +%note -allow-unsupported-compiler is used to allow 2022 version. DANGER! +[s, r] = system(sprintf('nvcc -allow-unsupported-compiler -ptx %s -o %s\n',... fullfile(cuda_dir, [cuda_dir, '.cu']), ... fullfile('..', '..', 'ptx', [cuda_dir, '.ptx']))) copyfile(fullfile(cuda_dir, [cuda_dir, '.cu']), fullfile('..', '..', 'ptx')); @@ -36,7 +38,7 @@ fprintf('Compiling %s ...\n', cuda_dir); addpath(cuda_dir); -[s, r] = system(sprintf('nvcc -ptx %s -o %s\n', ... +[s, r] = system(sprintf('nvcc -allow-unsupported-compiler -ptx %s -o %s\n', ... fullfile(cuda_dir, [cuda_dir, '.cu']), ... fullfile('..', '..', 'ptx', [cuda_dir, '.ptx']))) copyfile(fullfile(cuda_dir, [cuda_dir, '.cu']), fullfile('..', '..', 'ptx')); @@ -47,7 +49,7 @@ fprintf('Compiling %s ...\n', cuda_dir); addpath(cuda_dir); -[s, r] = system(sprintf('nvcc -ptx %s -o %s\n', ... +[s, r] = system(sprintf('nvcc -allow-unsupported-compiler -ptx %s -o %s\n', ... fullfile(cuda_dir, [cuda_dir, '.cu']), ... fullfile('..', '..', 'ptx', [cuda_dir, '.ptx']))); copyfile(fullfile(cuda_dir, [cuda_dir, '.cu']), fullfile('..', '..', 'ptx')); @@ -59,7 +61,7 @@ fprintf('Compiling %s ...\n', cuda_dir); addpath(cuda_dir); -[s, r] = system(sprintf('nvcc -ptx %s -o %s\n', ... +[s, r] = system(sprintf('nvcc -allow-unsupported-compiler -ptx %s -o %s\n', ... fullfile(cuda_dir, [cuda_dir, '.cu']), ... fullfile('..', '..', 'ptx', [cuda_dir, '.ptx']))) diff --git a/MATLAB/source/cuda/smi_cuda_gaussBlobROIStack/smi_cuda_gaussBlobROIStack.cu b/MATLAB/source/cuda/smi_cuda_gaussBlobROIStack/smi_cuda_gaussBlobROIStack.cu index cc04914b..c4f194e4 100644 --- a/MATLAB/source/cuda/smi_cuda_gaussBlobROIStack/smi_cuda_gaussBlobROIStack.cu +++ b/MATLAB/source/cuda/smi_cuda_gaussBlobROIStack/smi_cuda_gaussBlobROIStack.cu @@ -33,7 +33,7 @@ __global__ void kernel_guassiansampleblobs( const int sz, const int Nframes, con for(jj=0;jj