
jdk.graal.compiler.lir.amd64.AMD64MathTanOp Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of compiler Show documentation
Show all versions of compiler Show documentation
The GraalVM compiler and the Graal-truffle optimizer.
/*
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, Intel Corporation. All rights reserved.
* Intel Math Library (LIBM) Source Code
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.graal.compiler.lir.amd64;
import static jdk.vm.ci.amd64.AMD64.r10;
import static jdk.vm.ci.amd64.AMD64.r11;
import static jdk.vm.ci.amd64.AMD64.r8;
import static jdk.vm.ci.amd64.AMD64.r9;
import static jdk.vm.ci.amd64.AMD64.rax;
import static jdk.vm.ci.amd64.AMD64.rbx;
import static jdk.vm.ci.amd64.AMD64.rcx;
import static jdk.vm.ci.amd64.AMD64.rdi;
import static jdk.vm.ci.amd64.AMD64.rdx;
import static jdk.vm.ci.amd64.AMD64.rsi;
import static jdk.vm.ci.amd64.AMD64.rsp;
import static jdk.vm.ci.amd64.AMD64.xmm0;
import static jdk.vm.ci.amd64.AMD64.xmm1;
import static jdk.vm.ci.amd64.AMD64.xmm2;
import static jdk.vm.ci.amd64.AMD64.xmm3;
import static jdk.vm.ci.amd64.AMD64.xmm4;
import static jdk.vm.ci.amd64.AMD64.xmm5;
import static jdk.vm.ci.amd64.AMD64.xmm6;
import static jdk.vm.ci.amd64.AMD64.xmm7;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.pointerConstant;
import static jdk.graal.compiler.lir.amd64.AMD64LIRHelper.recordExternalAddress;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64Assembler.ConditionFlag;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.vm.ci.amd64.AMD64;
/**
*
* ALGORITHM DESCRIPTION - TAN()
* ---------------------
*
* Polynomials coefficients and other constants.
*
* Note that in this algorithm, there is a different polynomial for
* each breakpoint, so there are 32 sets of polynomial coefficients
* as well as 32 instances of the other constants.
*
* The polynomial coefficients and constants are offset from the start
* of the main block as follows:
*
* 0: c8 | c0
* 16: c9 | c1
* 32: c10 | c2
* 48: c11 | c3
* 64: c12 | c4
* 80: c13 | c5
* 96: c14 | c6
* 112: c15 | c7
* 128: T_hi
* 136: T_lo
* 144: Sigma
* 152: T_hl
* 160: Tau
* 168: Mask
* 176: (end of block)
*
* The total table size is therefore 5632 bytes.
*
* Note that c0 and c1 are always zero. We could try storing
* other constants here, and just loading the low part of the
* SIMD register in these cases, after ensuring the high part
* is zero.
*
* The higher terms of the polynomial are computed in the *low*
* part of the SIMD register. This is so we can overlap the
* multiplication by r^8 and the unpacking of the other part.
*
* The constants are:
* T_hi + T_lo = accurate constant term in power series
* Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit)
* Tau = multiplier for the reciprocal, always -1 or 0
*
* The basic reconstruction formula using these constants is:
*
* High = tau * recip_hi + t_hi
* Med = (sgn * r + t_hl * r)_hi
* Low = (sgn * r + t_hl * r)_lo +
* tau * recip_lo + T_lo + (T_hl + sigma) * c + pol
*
* where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15
*
* (c0 = c1 = 0, but using them keeps SIMD regularity)
*
* We then do a compensated sum High + Med, add the low parts together
* and then do the final sum.
*
* Here recip_hi + recip_lo is an accurate reciprocal of the remainder
* modulo pi/2
*
* Special cases:
* tan(NaN) = quiet NaN, and raise invalid exception
* tan(INF) = NaN and raise invalid exception
* tan(+/-0) = +/-0
*
*/
// @formatter:off
@SyncPort(from = "https://github.com/openjdk/jdk/blob/12358e6c94bc96e618efc3ec5299a2cfe1b4669d/src/hotspot/cpu/x86/stubGenerator_x86_64_tan.cpp#L31-L1029",
sha1 = "b75cd72bfce1bd795f00414d3fe76d10569eb76f")
// @formatter:on
public final class AMD64MathTanOp extends AMD64MathIntrinsicUnaryOp {
public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AMD64MathTanOp.class);
public AMD64MathTanOp() {
super(TYPE, /* GPR */ rax, rcx, rdx, rbx, rsi, rdi, r8, r9, r10, r11,
/* XMM */ xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7);
}
private static ArrayDataPointerConstant onehalf = pointerConstant(16, new int[]{
// @formatter:off
0x00000000, 0x3fe00000, 0x00000000, 0x3fe00000
// @formatter:on
});
private static ArrayDataPointerConstant mul16 = pointerConstant(16, new int[]{
// @formatter:off
0x00000000, 0x40300000, 0x00000000, 0x3ff00000
// @formatter:on
});
private static ArrayDataPointerConstant signMask = pointerConstant(16, new int[]{
// @formatter:off
0x00000000, 0x80000000, 0x00000000, 0x80000000
// @formatter:on
});
private static ArrayDataPointerConstant pi32Inv = pointerConstant(16, new int[]{
// @formatter:off
0x6dc9c883, 0x3fe45f30, 0x6dc9c883, 0x40245f30
// @formatter:on
});
private static ArrayDataPointerConstant p1 = pointerConstant(16, new int[]{
// @formatter:off
0x54444000, 0x3fb921fb, 0x54440000, 0x3fb921fb
// @formatter:on
});
private static ArrayDataPointerConstant p2 = pointerConstant(16, new int[]{
// @formatter:off
0x67674000, 0xbd32e7b9, 0x4c4c0000, 0x3d468c23
// @formatter:on
});
private static ArrayDataPointerConstant p3 = pointerConstant(16, new int[]{
// @formatter:off
0x3707344a, 0x3aa8a2e0, 0x03707345, 0x3ae98a2e
// @formatter:on
});
private static ArrayDataPointerConstant ctable = pointerConstant(16, new int[]{
// @formatter:off
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x882c10fa,
0x3f9664f4, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x55e6c23d, 0x3f8226e3, 0x55555555,
0x3fd55555, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x0e157de0, 0x3f6d6d3d, 0x11111111, 0x3fc11111, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x452b75e3, 0x3f57da36,
0x1ba1ba1c, 0x3faba1ba, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x3ff00000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b,
0x3f953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea,
0x00000000, 0x00000000, 0xda5b7511, 0x3f85ad63, 0xdc230b9b,
0x3fb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9,
0x77bb08ba, 0x3f757c85, 0xb6247521, 0x3fb1381e, 0x5922170c,
0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0x3f64e391,
0x3e666320, 0x3fa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06,
0x3fafa8ae, 0x8c5b2da2, 0x3fb936bb, 0x4e88f7a5, 0x3c587d05,
0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x5a279ea3, 0x3faa3407,
0x00000000, 0x00000000, 0x432d65fa, 0x3fa70153, 0x00000000,
0x00000000, 0x891a4602, 0x3f9d03ef, 0xd62ca5f8, 0x3fca77d9,
0xb35f4628, 0x3f97a265, 0x433258fa, 0x3fd8cf51, 0xb58fd909,
0x3f8f88e3, 0x01771cea, 0x3fc2b154, 0xf3562f8e, 0x3f888f57,
0xc028a723, 0x3fc7370f, 0x20b7f9f0, 0x3f80f44c, 0x214368e9,
0x3fb6dfaa, 0x28891863, 0x3f79b4b6, 0x172dbbf0, 0x3fb6cb8e,
0xe0553158, 0x3fc975f5, 0x593fe814, 0x3c2ef5d3, 0x00000000,
0x3ff00000, 0x03dec550, 0x3fa44203, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x9314533e, 0x3fbb8ec5, 0x00000000,
0x00000000, 0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000,
0xdcb427fd, 0x3fb13950, 0xd87ab0bb, 0x3fd5335e, 0xce0ae8a5,
0x3fabb382, 0x79143126, 0x3fddba41, 0x5f2b28d4, 0x3fa552f1,
0x59f21a6d, 0x3fd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa,
0x3fd0576c, 0x8f2c2950, 0x3f9a4898, 0xc0b3f22c, 0x3fc59462,
0x1883a4b8, 0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc,
0x3fd36a08, 0x1dce993d, 0xbc6d704d, 0x00000000, 0x3ff00000,
0x2b82ab63, 0x3fb78e92, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x56f37042, 0x3fccfc56, 0x00000000, 0x00000000,
0xaa563951, 0x3fc90125, 0x00000000, 0x00000000, 0x3d0e7c5d,
0x3fc50533, 0x9bed9b2e, 0x3fdf0ed9, 0x5fe7c47c, 0x3fc1f250,
0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0x3fbe5c71, 0x86362c20,
0x3fda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091, 0x3fd911bd,
0xb56658be, 0x3fb5e4c7, 0x93a2fd76, 0x3fd3c092, 0xda271794,
0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32, 0x3fda8279,
0xb68c1467, 0x3c708b2f, 0x00000000, 0x3ff00000, 0x980c4337,
0x3fc5f619, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xcc03e501, 0x3fdff10f, 0x00000000, 0x00000000, 0x44a4e845,
0x3fddb63b, 0x00000000, 0x00000000, 0x3768ad9f, 0x3fdb72a4,
0x3dd01cca, 0x3fe5fdb9, 0xa61d2811, 0x3fd972b2, 0x5645ad0b,
0x3fe977f9, 0xd013b3ab, 0x3fd78ca3, 0xbf0bf914, 0x3fe4f192,
0x4d53e730, 0x3fd5d060, 0x3f8b9000, 0x3fe49933, 0xe2b82f08,
0x3fd4322a, 0x5936a835, 0x3fe27ae1, 0xb1c61c9b, 0x3fd2b3fb,
0xef478605, 0x3fe1659e, 0x190834ec, 0x3fe11ab7, 0xcdb625ea,
0xbc8e564b, 0x00000000, 0x3ff00000, 0xb07217e3, 0x3fd248f1,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0,
0x3ff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58,
0x00000000, 0x00000000, 0xff691fa2, 0x3ff3972e, 0xe93463bd,
0x3feeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10,
0xa04e8ea3, 0x3ff4541a, 0x386accd3, 0x3ff1369e, 0x222a66dd,
0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0x3ff5178f,
0xddaa0031, 0x3ff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d,
0x3ff29311, 0x2ab7f990, 0x3fe561b8, 0x209c7df1, 0x3c87a8c5,
0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0xc7ab4d5a, 0x40085e24,
0x00000000, 0x00000000, 0xe93ea75d, 0x400b963d, 0x00000000,
0x00000000, 0x94a7f25a, 0x400f37e2, 0x4b6261cb, 0x3ff5f984,
0x5a9dd812, 0x4011aab0, 0x74c30018, 0x3ffaf5a5, 0x7f2ce8e3,
0x4013fe8b, 0xfe8e54fa, 0x3ffd7334, 0x670d618d, 0x4016a10c,
0x4db97058, 0x4000e012, 0x24df44dd, 0x40199c5f, 0x697d6ece,
0x4003006e, 0x83298b82, 0x401cfc4d, 0x19d490d6, 0x40058c19,
0x2ae42850, 0x3fea4300, 0x118e20e6, 0xbc7a6db8, 0x00000000,
0x40000000, 0xe33345b8, 0xbfd4e526, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x65965966, 0x40219659, 0x00000000,
0x00000000, 0x882c10fa, 0x402664f4, 0x00000000, 0x00000000,
0x83cd3723, 0x402c8342, 0x00000000, 0x40000000, 0x55e6c23d,
0x403226e3, 0x55555555, 0x40055555, 0x34451939, 0x40371c96,
0xaaaaaaab, 0x400aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111,
0x40111111, 0xa738201f, 0x4042bbce, 0x05b05b06, 0x4015b05b,
0x452b75e3, 0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000,
0x3ff00000, 0x00000000, 0x00000000, 0x00000000, 0x40000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x4f48b8d3, 0xbf33eaf9, 0x00000000, 0x00000000,
0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000, 0xd0258911,
0xbf0abaf3, 0x23e49fe9, 0xbfab5a8c, 0x2d53222e, 0x3ef60d15,
0x21169451, 0x3fa172b2, 0xbb254dbc, 0xbee1d3b5, 0xdbf93b8e,
0xbf84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7, 0x3f743924,
0x794a8297, 0xbeb7b7b9, 0xe015f797, 0xbf5d41f5, 0xe41a4a56,
0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000, 0xbfce49ce,
0x8c743719, 0x3d1eb860, 0x00000000, 0x00000000, 0x1b4863cf,
0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8,
0x535ad890, 0xbf2b9320, 0x00000000, 0x00000000, 0x018fdf1f,
0x3f16d61d, 0x00000000, 0x00000000, 0x0359f1be, 0xbf0139e4,
0xa4317c6d, 0xbfa67e17, 0x82672d0f, 0x3eebb405, 0x2f1b621e,
0x3f9f455b, 0x51ccf238, 0xbed55317, 0xf437b9ac, 0xbf804bee,
0xc791a2b5, 0x3ec0e993, 0x919a1db2, 0x3f7080c2, 0x336a5b0e,
0xbeaa48a2, 0x0a268358, 0xbf55a443, 0xdfd978e4, 0x3e94b61f,
0xd7767a58, 0x3f431806, 0x2aea0000, 0xbfc9bbe8, 0x7723ea61,
0xbd3a2369, 0x00000000, 0x00000000, 0xdf7796ff, 0x3fd6e642,
0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce,
0xbf231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0,
0x00000000, 0x00000000, 0x790b4cbc, 0xbef66191, 0x848a46c6,
0xbfa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea,
0xfdd299ef, 0xbec9dd1a, 0x3f8dbaaf, 0xbf793363, 0x309fc6ea,
0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0xbe9dae11,
0x3e5c67b3, 0xbf4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4,
0x3f3d1eb1, 0x29cfc000, 0xbfc549ce, 0xbf159358, 0xbd397b33,
0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000,
0x3ff00000, 0x00000000, 0xfffffff8, 0x7d98a556, 0xbf1a3958,
0x00000000, 0x00000000, 0x9d88dc01, 0x3f0704c2, 0x00000000,
0x00000000, 0x73742a2b, 0xbeed054a, 0x58844587, 0xbf9c2a13,
0x55688a79, 0x3ed7a326, 0xee33f1d6, 0x3f9a48f4, 0xa8dc9888,
0xbebf8939, 0xaad4b5b8, 0xbf72f746, 0x9102efa1, 0x3ea88f82,
0xdabc29cf, 0x3f678228, 0x9289afb8, 0xbe90f456, 0x741fb4ed,
0xbf46f3a3, 0xa97f6663, 0x3e79b4bf, 0xca89ff3f, 0x3f36db70,
0xa8a2a000, 0xbfc0ee13, 0x3da24be1, 0xbd338b9f, 0x00000000,
0x00000000, 0x11cd6c69, 0x3fd601fd, 0x00000000, 0x3ff00000,
0x00000000, 0xfffffff8, 0x1a154b97, 0xbf116b01, 0x00000000,
0x00000000, 0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000,
0xb93820c8, 0xbee264d4, 0xbb6cbb18, 0xbf94ab8c, 0x888d4d92,
0x3ed0568b, 0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0xbeb2f950,
0x22cf9f74, 0xbf6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce,
0x3f64aad7, 0x637b73af, 0xbe83487c, 0xe522591a, 0xbf3fc092,
0xa158e8bc, 0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000,
0xbfb9477f, 0xc2c2d2bc, 0xbd135ef9, 0x00000000, 0x00000000,
0xf2fdb123, 0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000,
0xfffffff8, 0xc41acb64, 0xbf05448d, 0x00000000, 0x00000000,
0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000, 0x9e42962d,
0xbed5aea5, 0x2579f8ef, 0xbf8b2398, 0x288a1ed9, 0x3ec81441,
0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0xbea57cd3, 0x5766336f,
0xbf617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8, 0x3f62c646,
0x6b8fb29c, 0xbe74e3a3, 0xdc4c0409, 0xbf33f952, 0x9bffe365,
0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000, 0xbfb0cc62,
0x016b907f, 0xbd119cbc, 0x00000000, 0x00000000, 0xe6b9d8fa,
0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8,
0x5daf22a6, 0xbef429d7, 0x00000000, 0x00000000, 0x06bca545,
0x3ef7a27d, 0x00000000, 0x00000000, 0x7211c19a, 0xbec41c3e,
0x956ed53e, 0xbf7ae3f4, 0xee750e72, 0x3ec3901b, 0x91d443f5,
0x3f96f713, 0x36661e6c, 0xbe936e09, 0x506f9381, 0xbf5122e8,
0xcb6dd43f, 0x3e9041b9, 0x6698b2ff, 0x3f61b0c7, 0x576bf12b,
0xbe625a8a, 0xe5a0e9dc, 0xbf23499d, 0x110384dd, 0x3e5b1c2c,
0x68d43db6, 0x3f2cb899, 0x6ecac000, 0xbfa0c414, 0xcd7dd58c,
0x3d13500f, 0x00000000, 0x00000000, 0x85a2c8fb, 0x3fd55fe0,
0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x2bf70ebe, 0x3ef66a8f,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0xd644267f, 0x3ec22805, 0x16c16c17, 0x3f96c16c,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc4e09162,
0x3e8d6db2, 0xbc011567, 0x3f61566a, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x1f79955c, 0x3e57da4e, 0x9334ef0b,
0x3f2bbd77, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x55555555, 0x3fd55555, 0x00000000,
0x3ff00000, 0x00000000, 0xfffffff8, 0x5daf22a6, 0x3ef429d7,
0x00000000, 0x00000000, 0x06bca545, 0x3ef7a27d, 0x00000000,
0x00000000, 0x7211c19a, 0x3ec41c3e, 0x956ed53e, 0x3f7ae3f4,
0xee750e72, 0x3ec3901b, 0x91d443f5, 0x3f96f713, 0x36661e6c,
0x3e936e09, 0x506f9381, 0x3f5122e8, 0xcb6dd43f, 0x3e9041b9,
0x6698b2ff, 0x3f61b0c7, 0x576bf12b, 0x3e625a8a, 0xe5a0e9dc,
0x3f23499d, 0x110384dd, 0x3e5b1c2c, 0x68d43db6, 0x3f2cb899,
0x6ecac000, 0x3fa0c414, 0xcd7dd58c, 0xbd13500f, 0x00000000,
0x00000000, 0x85a2c8fb, 0x3fd55fe0, 0x00000000, 0x3ff00000,
0x00000000, 0xfffffff8, 0xc41acb64, 0x3f05448d, 0x00000000,
0x00000000, 0xdbb03d6f, 0x3efb7ad2, 0x00000000, 0x00000000,
0x9e42962d, 0x3ed5aea5, 0x2579f8ef, 0x3f8b2398, 0x288a1ed9,
0x3ec81441, 0xb0198dc5, 0x3f979a3a, 0x2fdfe253, 0x3ea57cd3,
0x5766336f, 0x3f617caa, 0x600944c3, 0x3e954ed6, 0xa4e0aaf8,
0x3f62c646, 0x6b8fb29c, 0x3e74e3a3, 0xdc4c0409, 0x3f33f952,
0x9bffe365, 0x3e6301ec, 0xb8869e44, 0x3f2fc566, 0xe1e04000,
0x3fb0cc62, 0x016b907f, 0x3d119cbc, 0x00000000, 0x00000000,
0xe6b9d8fa, 0x3fd57fb3, 0x00000000, 0x3ff00000, 0x00000000,
0xfffffff8, 0x1a154b97, 0x3f116b01, 0x00000000, 0x00000000,
0x2d427630, 0x3f0147bf, 0x00000000, 0x00000000, 0xb93820c8,
0x3ee264d4, 0xbb6cbb18, 0x3f94ab8c, 0x888d4d92, 0x3ed0568b,
0x60730f7c, 0x3f98b19b, 0xe4b1fb11, 0x3eb2f950, 0x22cf9f74,
0x3f6b21cd, 0x4a3ff0a6, 0x3e9f499e, 0xfd2b83ce, 0x3f64aad7,
0x637b73af, 0x3e83487c, 0xe522591a, 0x3f3fc092, 0xa158e8bc,
0x3e6e3aae, 0xe5e82ffa, 0x3f329d2f, 0xd636a000, 0x3fb9477f,
0xc2c2d2bc, 0x3d135ef9, 0x00000000, 0x00000000, 0xf2fdb123,
0x3fd5b566, 0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8,
0x7d98a556, 0x3f1a3958, 0x00000000, 0x00000000, 0x9d88dc01,
0x3f0704c2, 0x00000000, 0x00000000, 0x73742a2b, 0x3eed054a,
0x58844587, 0x3f9c2a13, 0x55688a79, 0x3ed7a326, 0xee33f1d6,
0x3f9a48f4, 0xa8dc9888, 0x3ebf8939, 0xaad4b5b8, 0x3f72f746,
0x9102efa1, 0x3ea88f82, 0xdabc29cf, 0x3f678228, 0x9289afb8,
0x3e90f456, 0x741fb4ed, 0x3f46f3a3, 0xa97f6663, 0x3e79b4bf,
0xca89ff3f, 0x3f36db70, 0xa8a2a000, 0x3fc0ee13, 0x3da24be1,
0x3d338b9f, 0x00000000, 0x00000000, 0x11cd6c69, 0x3fd601fd,
0x00000000, 0x3ff00000, 0x00000000, 0xfffffff8, 0xb9ff07ce,
0x3f231c78, 0x00000000, 0x00000000, 0xa5517182, 0x3f0ff0e0,
0x00000000, 0x00000000, 0x790b4cbc, 0x3ef66191, 0x848a46c6,
0x3fa21ac0, 0xb16435fa, 0x3ee1d3ec, 0x2a1aa832, 0x3f9c71ea,
0xfdd299ef, 0x3ec9dd1a, 0x3f8dbaaf, 0x3f793363, 0x309fc6ea,
0x3eb415d6, 0xbee60471, 0x3f6b83ba, 0x94a0a697, 0x3e9dae11,
0x3e5c67b3, 0x3f4fd07b, 0x9a8f3e3e, 0x3e86bd75, 0xa4beb7a4,
0x3f3d1eb1, 0x29cfc000, 0x3fc549ce, 0xbf159358, 0x3d397b33,
0x00000000, 0x00000000, 0x871fee6c, 0x3fd666f0, 0x00000000,
0x3ff00000, 0x00000000, 0xfffffff8, 0x535ad890, 0x3f2b9320,
0x00000000, 0x00000000, 0x018fdf1f, 0x3f16d61d, 0x00000000,
0x00000000, 0x0359f1be, 0x3f0139e4, 0xa4317c6d, 0x3fa67e17,
0x82672d0f, 0x3eebb405, 0x2f1b621e, 0x3f9f455b, 0x51ccf238,
0x3ed55317, 0xf437b9ac, 0x3f804bee, 0xc791a2b5, 0x3ec0e993,
0x919a1db2, 0x3f7080c2, 0x336a5b0e, 0x3eaa48a2, 0x0a268358,
0x3f55a443, 0xdfd978e4, 0x3e94b61f, 0xd7767a58, 0x3f431806,
0x2aea0000, 0x3fc9bbe8, 0x7723ea61, 0x3d3a2369, 0x00000000,
0x00000000, 0xdf7796ff, 0x3fd6e642, 0x00000000, 0x3ff00000,
0x00000000, 0xfffffff8, 0x4f48b8d3, 0x3f33eaf9, 0x00000000,
0x00000000, 0x0cf7586f, 0x3f20b8ea, 0x00000000, 0x00000000,
0xd0258911, 0x3f0abaf3, 0x23e49fe9, 0x3fab5a8c, 0x2d53222e,
0x3ef60d15, 0x21169451, 0x3fa172b2, 0xbb254dbc, 0x3ee1d3b5,
0xdbf93b8e, 0x3f84c7db, 0x05b4630b, 0x3ecd3364, 0xee9aada7,
0x3f743924, 0x794a8297, 0x3eb7b7b9, 0xe015f797, 0x3f5d41f5,
0xe41a4a56, 0x3ea35dfb, 0xe4c2a251, 0x3f49a2ab, 0x5af9e000,
0x3fce49ce, 0x8c743719, 0xbd1eb860, 0x00000000, 0x00000000,
0x1b4863cf, 0x3fd78294, 0x00000000, 0x3ff00000, 0x00000000,
0xfffffff8, 0x65965966, 0xc0219659, 0x00000000, 0x00000000,
0x882c10fa, 0x402664f4, 0x00000000, 0x00000000, 0x83cd3723,
0xc02c8342, 0x00000000, 0xc0000000, 0x55e6c23d, 0x403226e3,
0x55555555, 0x40055555, 0x34451939, 0xc0371c96, 0xaaaaaaab,
0xc00aaaaa, 0x0e157de0, 0x403d6d3d, 0x11111111, 0x40111111,
0xa738201f, 0xc042bbce, 0x05b05b06, 0xc015b05b, 0x452b75e3,
0x4047da36, 0x1ba1ba1c, 0x401ba1ba, 0x00000000, 0xbff00000,
0x00000000, 0x00000000, 0x00000000, 0x40000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xc7ab4d5a, 0xc0085e24, 0x00000000, 0x00000000, 0xe93ea75d,
0x400b963d, 0x00000000, 0x00000000, 0x94a7f25a, 0xc00f37e2,
0x4b6261cb, 0xbff5f984, 0x5a9dd812, 0x4011aab0, 0x74c30018,
0x3ffaf5a5, 0x7f2ce8e3, 0xc013fe8b, 0xfe8e54fa, 0xbffd7334,
0x670d618d, 0x4016a10c, 0x4db97058, 0x4000e012, 0x24df44dd,
0xc0199c5f, 0x697d6ece, 0xc003006e, 0x83298b82, 0x401cfc4d,
0x19d490d6, 0x40058c19, 0x2ae42850, 0xbfea4300, 0x118e20e6,
0x3c7a6db8, 0x00000000, 0x40000000, 0xe33345b8, 0xbfd4e526,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2b2c49d0,
0xbff2de9c, 0x00000000, 0x00000000, 0x2655bc98, 0x3ff33e58,
0x00000000, 0x00000000, 0xff691fa2, 0xbff3972e, 0xe93463bd,
0xbfeeed87, 0x070e10a0, 0x3ff3f5b2, 0xf4d790a4, 0x3ff20c10,
0xa04e8ea3, 0xbff4541a, 0x386accd3, 0xbff1369e, 0x222a66dd,
0x3ff4b521, 0x22a9777e, 0x3ff20817, 0x52a04a6e, 0xbff5178f,
0xddaa0031, 0xbff22137, 0x4447d47c, 0x3ff57c01, 0x1e9c7f1d,
0x3ff29311, 0x2ab7f990, 0xbfe561b8, 0x209c7df1, 0xbc87a8c5,
0x00000000, 0x3ff00000, 0x4170bcc6, 0x3fdc92d8, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0xcc03e501, 0xbfdff10f,
0x00000000, 0x00000000, 0x44a4e845, 0x3fddb63b, 0x00000000,
0x00000000, 0x3768ad9f, 0xbfdb72a4, 0x3dd01cca, 0xbfe5fdb9,
0xa61d2811, 0x3fd972b2, 0x5645ad0b, 0x3fe977f9, 0xd013b3ab,
0xbfd78ca3, 0xbf0bf914, 0xbfe4f192, 0x4d53e730, 0x3fd5d060,
0x3f8b9000, 0x3fe49933, 0xe2b82f08, 0xbfd4322a, 0x5936a835,
0xbfe27ae1, 0xb1c61c9b, 0x3fd2b3fb, 0xef478605, 0x3fe1659e,
0x190834ec, 0xbfe11ab7, 0xcdb625ea, 0x3c8e564b, 0x00000000,
0x3ff00000, 0xb07217e3, 0x3fd248f1, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x56f37042, 0xbfccfc56, 0x00000000,
0x00000000, 0xaa563951, 0x3fc90125, 0x00000000, 0x00000000,
0x3d0e7c5d, 0xbfc50533, 0x9bed9b2e, 0xbfdf0ed9, 0x5fe7c47c,
0x3fc1f250, 0x96c125e5, 0x3fe2edd9, 0x5a02bbd8, 0xbfbe5c71,
0x86362c20, 0xbfda08b7, 0x4b4435ed, 0x3fb9d342, 0x4b494091,
0x3fd911bd, 0xb56658be, 0xbfb5e4c7, 0x93a2fd76, 0xbfd3c092,
0xda271794, 0x3fb29910, 0x3303df2b, 0x3fd189be, 0x99fcef32,
0xbfda8279, 0xb68c1467, 0xbc708b2f, 0x00000000, 0x3ff00000,
0x980c4337, 0x3fc5f619, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x9314533e, 0xbfbb8ec5, 0x00000000, 0x00000000,
0x09aa36d0, 0x3fb6d3f4, 0x00000000, 0x00000000, 0xdcb427fd,
0xbfb13950, 0xd87ab0bb, 0xbfd5335e, 0xce0ae8a5, 0x3fabb382,
0x79143126, 0x3fddba41, 0x5f2b28d4, 0xbfa552f1, 0x59f21a6d,
0xbfd015ab, 0x22c27d95, 0x3fa0e984, 0xe19fc6aa, 0x3fd0576c,
0x8f2c2950, 0xbf9a4898, 0xc0b3f22c, 0xbfc59462, 0x1883a4b8,
0x3f94b61c, 0x3f838640, 0x3fc30eb8, 0x355c63dc, 0xbfd36a08,
0x1dce993d, 0x3c6d704d, 0x00000000, 0x3ff00000, 0x2b82ab63,
0x3fb78e92, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x5a279ea3, 0xbfaa3407, 0x00000000, 0x00000000, 0x432d65fa,
0x3fa70153, 0x00000000, 0x00000000, 0x891a4602, 0xbf9d03ef,
0xd62ca5f8, 0xbfca77d9, 0xb35f4628, 0x3f97a265, 0x433258fa,
0x3fd8cf51, 0xb58fd909, 0xbf8f88e3, 0x01771cea, 0xbfc2b154,
0xf3562f8e, 0x3f888f57, 0xc028a723, 0x3fc7370f, 0x20b7f9f0,
0xbf80f44c, 0x214368e9, 0xbfb6dfaa, 0x28891863, 0x3f79b4b6,
0x172dbbf0, 0x3fb6cb8e, 0xe0553158, 0xbfc975f5, 0x593fe814,
0xbc2ef5d3, 0x00000000, 0x3ff00000, 0x03dec550, 0x3fa44203,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x4e435f9b,
0xbf953f83, 0x00000000, 0x00000000, 0x3c6e8e46, 0x3f9b74ea,
0x00000000, 0x00000000, 0xda5b7511, 0xbf85ad63, 0xdc230b9b,
0xbfb97558, 0x26cb3788, 0x3f881308, 0x76fc4985, 0x3fd62ac9,
0x77bb08ba, 0xbf757c85, 0xb6247521, 0xbfb1381e, 0x5922170c,
0x3f754e95, 0x8746482d, 0x3fc27f83, 0x11055b30, 0xbf64e391,
0x3e666320, 0xbfa3e609, 0x0de9dae3, 0x3f6301df, 0x1f1dca06,
0x3fafa8ae, 0x8c5b2da2, 0xbfb936bb, 0x4e88f7a5, 0xbc587d05,
0x00000000, 0x3ff00000, 0xa8935dd9, 0x3f83dde2, 0x00000000,
0x00000000, 0x00000000, 0x00000000
// @formatter:on
});
private static ArrayDataPointerConstant mask35 = pointerConstant(16, new int[]{
// @formatter:off
0xfffc0000, 0xffffffff, 0x00000000, 0x00000000
// @formatter:on
});
private static ArrayDataPointerConstant q11 = pointerConstant(16, new int[]{
// @formatter:off
0xb8fe4d77, 0x3f82609a
// @formatter:on
});
private static ArrayDataPointerConstant q9 = pointerConstant(16, new int[]{
// @formatter:off
0xbf847a43, 0x3f9664a0
// @formatter:on
});
private static ArrayDataPointerConstant q7 = pointerConstant(16, new int[]{
// @formatter:off
0x52c4c8ab, 0x3faba1ba
// @formatter:on
});
private static ArrayDataPointerConstant q5 = pointerConstant(16, new int[]{
// @formatter:off
0x11092746, 0x3fc11111
// @formatter:on
});
private static ArrayDataPointerConstant q3 = pointerConstant(16, new int[]{
// @formatter:off
0x55555612, 0x3fd55555
// @formatter:on
});
private static ArrayDataPointerConstant piInvTable = pointerConstant(16, new int[]{
// @formatter:off
0x00000000, 0x00000000, 0xa2f9836e, 0x4e441529, 0xfc2757d1,
0xf534ddc0, 0xdb629599, 0x3c439041, 0xfe5163ab, 0xdebbc561,
0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c, 0xfe1deb1c,
0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff,
0xde05980f, 0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7,
0x4f463f66, 0x9e5fea2d, 0x7527bac7, 0xebe5f17b, 0x3d0739f7,
0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08, 0x56033046, 0xfc7b6bab,
0xf0cfbc21
// @formatter:on
});
private static ArrayDataPointerConstant pi4 = pointerConstant(8, new int[]{
// @formatter:off
0x00000000, 0x3fe921fb,
});
private static ArrayDataPointerConstant pi48 = pointerConstant(8, new int[]{
0x4611a626, 0x3e85110b
// @formatter:on
});
private static ArrayDataPointerConstant qq2 = pointerConstant(8, new int[]{
// @formatter:off
0x676733af, 0x3d32e7b9
// @formatter:on
});
private static ArrayDataPointerConstant one = pointerConstant(8, new int[]{
// @formatter:off
0x00000000, 0x3ff00000
// @formatter:on
});
private static ArrayDataPointerConstant twoPow55 = pointerConstant(8, new int[]{
// @formatter:off
0x00000000, 0x43600000
// @formatter:on
});
private static ArrayDataPointerConstant twoPowM55 = pointerConstant(4, new int[]{
// @formatter:off
0x00000000, 0x3c800000
// @formatter:on
});
private static ArrayDataPointerConstant negZero = pointerConstant(4, new int[]{
// @formatter:off
0x00000000, 0x80000000
// @formatter:on
});
@Override
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
/*
* This code relies on recordExternalAddress providing the same address when called
* repeatedly. Especially for piInvTable.
*/
Label block0 = new Label();
Label block1 = new Label();
Label block2 = new Label();
Label block3 = new Label();
Label block4 = new Label();
Label block5 = new Label();
Label block6 = new Label();
Label block7 = new Label();
Label block8 = new Label();
Label block9 = new Label();
Label block10 = new Label();
Label block11 = new Label();
Label block12 = new Label();
Label block13 = new Label();
Label block14 = new Label();
masm.push(rbx);
masm.subq(rsp, 16);
masm.movsd(new AMD64Address(rsp, 8), xmm0);
masm.pextrw(rax, xmm0, 3);
masm.andl(rax, 32767);
masm.subl(rax, 16314);
masm.cmplAndJcc(rax, 270, ConditionFlag.Above, block0, false);
masm.movdqu(xmm5, recordExternalAddress(crb, onehalf)); // 0x00000000, 0x3fe00000,
// 0x00000000, 0x3fe00000
masm.movdqu(xmm6, recordExternalAddress(crb, mul16)); // 0x00000000, 0x40300000,
// 0x00000000, 0x3ff00000
masm.unpcklpd(xmm0, xmm0);
masm.movdqu(xmm4, recordExternalAddress(crb, signMask)); // 0x00000000, 0x80000000,
// 0x00000000, 0x80000000
masm.andpd(xmm4, xmm0);
masm.movdqu(xmm1, recordExternalAddress(crb, pi32Inv)); // 0x6dc9c883, 0x3fe45f30,
// 0x6dc9c883, 0x40245f30
masm.mulpd(xmm1, xmm0);
masm.por(xmm5, xmm4);
masm.addpd(xmm1, xmm5);
masm.movdqu(xmm7, xmm1);
masm.unpckhpd(xmm7, xmm7);
masm.cvttsd2sil(rdx, xmm7);
masm.cvttpd2dq(xmm1, xmm1);
masm.cvtdq2pd(xmm1, xmm1);
masm.mulpd(xmm1, xmm6);
masm.movdqu(xmm3, recordExternalAddress(crb, p1)); // 0x54444000, 0x3fb921fb,
// 0x54440000, 0x3fb921fb
masm.movq(xmm5, recordExternalAddress(crb, qq2)); // 0x676733af, 0x3d32e7b9
masm.addq(rdx, 469248);
masm.movdqu(xmm4, recordExternalAddress(crb, p2)); // 0x67674000, 0xbd32e7b9,
// 0x4c4c0000, 0x3d468c23
masm.mulpd(xmm3, xmm1);
masm.andq(rdx, 31);
masm.mulsd(xmm5, xmm1);
masm.movq(rcx, rdx);
masm.mulpd(xmm4, xmm1);
masm.shlq(rcx, 1);
masm.subpd(xmm0, xmm3);
masm.mulpd(xmm1, recordExternalAddress(crb, p3)); // 0x3707344a, 0x3aa8a2e0,
// 0x03707345, 0x3ae98a2e
masm.addq(rdx, rcx);
masm.shlq(rcx, 2);
masm.addq(rdx, rcx);
masm.addsd(xmm5, xmm0);
masm.movdqu(xmm2, xmm0);
masm.subpd(xmm0, xmm4);
masm.movq(xmm6, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000
masm.shlq(rdx, 4);
masm.leaq(rax, recordExternalAddress(crb, ctable));
masm.andpd(xmm5, recordExternalAddress(crb, mask35)); // 0xfffc0000, 0xffffffff,
// 0x00000000, 0x00000000
masm.movdqu(xmm3, xmm0);
masm.addq(rax, rdx);
masm.subpd(xmm2, xmm0);
masm.unpckhpd(xmm0, xmm0);
masm.divsd(xmm6, xmm5);
masm.subpd(xmm2, xmm4);
masm.movdqu(xmm7, new AMD64Address(rax, 16));
masm.subsd(xmm3, xmm5);
masm.mulpd(xmm7, xmm0);
masm.subpd(xmm2, xmm1);
masm.movdqu(xmm1, new AMD64Address(rax, 48));
masm.mulpd(xmm1, xmm0);
masm.movdqu(xmm4, new AMD64Address(rax, 96));
masm.mulpd(xmm4, xmm0);
masm.addsd(xmm2, xmm3);
masm.movdqu(xmm3, xmm0);
masm.mulpd(xmm0, xmm0);
masm.addpd(xmm7, new AMD64Address(rax, 0));
masm.addpd(xmm1, new AMD64Address(rax, 32));
masm.mulpd(xmm1, xmm0);
masm.addpd(xmm4, new AMD64Address(rax, 80));
masm.addpd(xmm7, xmm1);
masm.movdqu(xmm1, new AMD64Address(rax, 112));
masm.mulpd(xmm1, xmm0);
masm.mulpd(xmm0, xmm0);
masm.addpd(xmm4, xmm1);
masm.movdqu(xmm1, new AMD64Address(rax, 64));
masm.mulpd(xmm1, xmm0);
masm.addpd(xmm7, xmm1);
masm.movdqu(xmm1, xmm3);
masm.mulpd(xmm3, xmm0);
masm.mulsd(xmm0, xmm0);
masm.mulpd(xmm1, new AMD64Address(rax, 144));
masm.mulpd(xmm4, xmm3);
masm.movdqu(xmm3, xmm1);
masm.addpd(xmm7, xmm4);
masm.movdqu(xmm4, xmm1);
masm.mulsd(xmm0, xmm7);
masm.unpckhpd(xmm7, xmm7);
masm.addsd(xmm0, xmm7);
masm.unpckhpd(xmm1, xmm1);
masm.addsd(xmm3, xmm1);
masm.subsd(xmm4, xmm3);
masm.addsd(xmm1, xmm4);
masm.movdqu(xmm4, xmm2);
masm.movq(xmm7, new AMD64Address(rax, 144));
masm.unpckhpd(xmm2, xmm2);
masm.addsd(xmm7, new AMD64Address(rax, 152));
masm.mulsd(xmm7, xmm2);
masm.addsd(xmm7, new AMD64Address(rax, 136));
masm.addsd(xmm7, xmm1);
masm.addsd(xmm0, xmm7);
masm.movq(xmm7, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000
masm.mulsd(xmm4, xmm6);
masm.movq(xmm2, new AMD64Address(rax, 168));
masm.andpd(xmm2, xmm6);
masm.mulsd(xmm5, xmm2);
masm.mulsd(xmm6, new AMD64Address(rax, 160));
masm.subsd(xmm7, xmm5);
masm.subsd(xmm2, new AMD64Address(rax, 128));
masm.subsd(xmm7, xmm4);
masm.mulsd(xmm7, xmm6);
masm.movdqu(xmm4, xmm3);
masm.subsd(xmm3, xmm2);
masm.addsd(xmm2, xmm3);
masm.subsd(xmm4, xmm2);
masm.addsd(xmm0, xmm4);
masm.subsd(xmm0, xmm7);
masm.addsd(xmm0, xmm3);
masm.jmp(block14);
masm.bind(block0);
masm.jcc(AMD64Assembler.ConditionFlag.Greater, block1);
masm.pextrw(rax, xmm0, 3);
masm.movl(rdx, rax);
masm.andlAndJcc(rax, 32752, ConditionFlag.Equal, block2, false);
masm.andl(rdx, 32767);
masm.cmplAndJcc(rdx, 15904, ConditionFlag.Below, block3, false);
masm.movdqu(xmm2, xmm0);
masm.movdqu(xmm3, xmm0);
masm.movq(xmm1, recordExternalAddress(crb, q11)); // 0xb8fe4d77, 0x3f82609a
masm.mulsd(xmm2, xmm0);
masm.mulsd(xmm3, xmm2);
masm.mulsd(xmm1, xmm2);
masm.addsd(xmm1, recordExternalAddress(crb, q9)); // 0xbf847a43, 0x3f9664a0
masm.mulsd(xmm1, xmm2);
masm.addsd(xmm1, recordExternalAddress(crb, q7)); // 0x52c4c8ab, 0x3faba1ba
masm.mulsd(xmm1, xmm2);
masm.addsd(xmm1, recordExternalAddress(crb, q5)); // 0x11092746, 0x3fc11111
masm.mulsd(xmm1, xmm2);
masm.addsd(xmm1, recordExternalAddress(crb, q3)); // 0x55555612, 0x3fd55555
masm.mulsd(xmm1, xmm3);
masm.addsd(xmm0, xmm1);
masm.jmp(block14);
masm.bind(block3);
masm.movq(xmm3, recordExternalAddress(crb, twoPow55)); // 0x00000000, 0x43600000
masm.mulsd(xmm3, xmm0);
masm.addsd(xmm0, xmm3);
masm.mulsd(xmm0, recordExternalAddress(crb, twoPowM55)); // 0x00000000, 0x3c800000
masm.jmp(block14);
masm.bind(block2);
masm.movdqu(xmm1, xmm0);
masm.mulsd(xmm1, xmm1);
masm.jmp(block14);
masm.bind(block1);
masm.pextrw(rax, xmm0, 3);
masm.andl(rax, 32752);
masm.cmplAndJcc(rax, 32752, ConditionFlag.Equal, block4, false);
masm.pextrw(rcx, xmm0, 3);
masm.andl(rcx, 32752);
masm.subl(rcx, 16224);
masm.shrl(rcx, 7);
masm.andl(rcx, 65532);
masm.leaq(r11, recordExternalAddress(crb, piInvTable));
masm.addq(rcx, r11);
masm.movdq(rax, xmm0);
masm.movl(r10, new AMD64Address(rcx, 20));
masm.movl(r8, new AMD64Address(rcx, 24));
masm.movl(rdx, rax);
masm.shrq(rax, 21);
masm.orl(rax, Integer.MIN_VALUE);
masm.shrl(rax, 11);
masm.movl(r9, r10);
masm.imulq(r10, rdx);
masm.imulq(r9, rax);
masm.imulq(r8, rax);
masm.movl(rsi, new AMD64Address(rcx, 16));
masm.movl(rdi, new AMD64Address(rcx, 12));
masm.movl(r11, r10);
masm.shrq(r10, 32);
masm.addq(r9, r10);
masm.addq(r11, r8);
masm.movl(r8, r11);
masm.shrq(r11, 32);
masm.addq(r9, r11);
masm.movl(r10, rsi);
masm.imulq(rsi, rdx);
masm.imulq(r10, rax);
masm.movl(r11, rdi);
masm.imulq(rdi, rdx);
masm.movl(rbx, rsi);
masm.shrq(rsi, 32);
masm.addq(r9, rbx);
masm.movl(rbx, r9);
masm.shrq(r9, 32);
masm.addq(r10, rsi);
masm.addq(r10, r9);
masm.shlq(rbx, 32);
masm.orq(r8, rbx);
masm.imulq(r11, rax);
masm.movl(r9, new AMD64Address(rcx, 8));
masm.movl(rsi, new AMD64Address(rcx, 4));
masm.movl(rbx, rdi);
masm.shrq(rdi, 32);
masm.addq(r10, rbx);
masm.movl(rbx, r10);
masm.shrq(r10, 32);
masm.addq(r11, rdi);
masm.addq(r11, r10);
masm.movq(rdi, r9);
masm.imulq(r9, rdx);
masm.imulq(rdi, rax);
masm.movl(r10, r9);
masm.shrq(r9, 32);
masm.addq(r11, r10);
masm.movl(r10, r11);
masm.shrq(r11, 32);
masm.addq(rdi, r9);
masm.addq(rdi, r11);
masm.movq(r9, rsi);
masm.imulq(rsi, rdx);
masm.imulq(r9, rax);
masm.shlq(r10, 32);
masm.orq(r10, rbx);
masm.movl(rax, new AMD64Address(rcx, 0));
masm.movl(r11, rsi);
masm.shrq(rsi, 32);
masm.addq(rdi, r11);
masm.movl(r11, rdi);
masm.shrq(rdi, 32);
masm.addq(r9, rsi);
masm.addq(r9, rdi);
masm.imulq(rdx, rax);
masm.pextrw(rbx, xmm0, 3);
masm.leaq(rdi, recordExternalAddress(crb, piInvTable));
masm.subq(rcx, rdi);
masm.addl(rcx, rcx);
masm.addl(rcx, rcx);
masm.addl(rcx, rcx);
masm.addl(rcx, 19);
masm.movl(rsi, 32768);
masm.andl(rsi, rbx);
masm.shrl(rbx, 4);
masm.andl(rbx, 2047);
masm.subl(rbx, 1023);
masm.subl(rcx, rbx);
masm.addq(r9, rdx);
masm.movl(rdx, rcx);
masm.addl(rdx, 32);
masm.cmplAndJcc(rcx, 0, ConditionFlag.Less, block5, false);
masm.negl(rcx);
masm.addl(rcx, 29);
masm.shll(r9);
masm.movl(rdi, r9);
masm.andl(r9, 1073741823);
masm.testlAndJcc(r9, 536870912, ConditionFlag.NotEqual, block6, false);
masm.shrl(r9);
masm.movl(rbx, 0);
masm.shlq(r9, 32);
masm.orq(r9, r11);
masm.bind(block7);
masm.bind(block8);
masm.cmpqAndJcc(r9, 0, ConditionFlag.Equal, block9, false);
masm.bind(block10);
masm.bsrq(r11, r9);
masm.movl(rcx, 29);
masm.sublAndJcc(rcx, r11, ConditionFlag.LessEqual, block11, false);
masm.shlq(r9);
masm.movq(rax, r10);
masm.shlq(r10);
masm.addl(rdx, rcx);
masm.negl(rcx);
masm.addl(rcx, 64);
masm.shrq(rax);
masm.shrq(r8);
masm.orq(r9, rax);
masm.orq(r10, r8);
masm.bind(block12);
masm.cvtsi2sdq(xmm0, r9);
masm.shrq(r10, 1);
masm.cvtsi2sdq(xmm3, r10);
masm.xorpd(xmm4, xmm4);
masm.shll(rdx, 4);
masm.negl(rdx);
masm.addl(rdx, 16368);
masm.orl(rdx, rsi);
masm.xorl(rdx, rbx);
masm.pinsrw(xmm4, rdx, 3);
masm.movq(xmm2, recordExternalAddress(crb, pi4)); // 0x00000000, 0x3fe921fb,
// 0x4611a626, 0x3e85110b
masm.movq(xmm7, recordExternalAddress(crb, pi48)); // 0x3fe921fb, 0x4611a626,
// 0x3e85110b
masm.xorpd(xmm5, xmm5);
masm.subl(rdx, 1008);
masm.pinsrw(xmm5, rdx, 3);
masm.mulsd(xmm0, xmm4);
masm.shll(rsi, 16);
masm.sarl(rsi, 31);
masm.mulsd(xmm3, xmm5);
masm.movdqu(xmm1, xmm0);
masm.mulsd(xmm0, xmm2);
masm.shrl(rdi, 30);
masm.addsd(xmm1, xmm3);
masm.mulsd(xmm3, xmm2);
masm.addl(rdi, rsi);
masm.xorl(rdi, rsi);
masm.mulsd(xmm7, xmm1);
masm.movl(rax, rdi);
masm.addsd(xmm7, xmm3);
masm.movdqu(xmm2, xmm0);
masm.addsd(xmm0, xmm7);
masm.subsd(xmm2, xmm0);
masm.addsd(xmm7, xmm2);
masm.movdqu(xmm1, recordExternalAddress(crb, pi32Inv)); // 0x6dc9c883, 0x3fe45f30,
// 0x6dc9c883, 0x40245f30
if (masm.supports(AMD64.CPUFeature.SSE3)) {
masm.movddup(xmm0, xmm0);
} else {
masm.movlhps(xmm0, xmm0);
}
masm.movdqu(xmm4, recordExternalAddress(crb, signMask)); // 0x00000000, 0x80000000,
// 0x00000000, 0x80000000
masm.andpd(xmm4, xmm0);
masm.mulpd(xmm1, xmm0);
if (masm.supports(AMD64.CPUFeature.SSE3)) {
masm.movddup(xmm7, xmm7);
} else {
masm.movlhps(xmm7, xmm7);
}
masm.movdqu(xmm5, recordExternalAddress(crb, onehalf)); // 0x00000000, 0x3fe00000,
// 0x00000000, 0x3fe00000
masm.movdqu(xmm6, recordExternalAddress(crb, mul16)); // 0x00000000, 0x40300000,
// 0x00000000, 0x3ff00000
masm.por(xmm5, xmm4);
masm.addpd(xmm1, xmm5);
masm.movdqu(xmm5, xmm1);
masm.unpckhpd(xmm5, xmm5);
masm.cvttsd2sil(rdx, xmm5);
masm.cvttpd2dq(xmm1, xmm1);
masm.cvtdq2pd(xmm1, xmm1);
masm.mulpd(xmm1, xmm6);
masm.movdqu(xmm3, recordExternalAddress(crb, p1)); // 0x54444000, 0x3fb921fb,
// 0x54440000, 0x3fb921fb
masm.movq(xmm5, recordExternalAddress(crb, qq2)); // 0x676733af, 0x3d32e7b9
masm.shll(rax, 4);
masm.addl(rdx, 469248);
masm.movdqu(xmm4, recordExternalAddress(crb, p2)); // 0x67674000, 0xbd32e7b9,
// 0x4c4c0000, 0x3d468c23
masm.mulpd(xmm3, xmm1);
masm.addl(rdx, rax);
masm.andl(rdx, 31);
masm.mulsd(xmm5, xmm1);
masm.movl(rcx, rdx);
masm.mulpd(xmm4, xmm1);
masm.shll(rcx, 1);
masm.subpd(xmm0, xmm3);
masm.mulpd(xmm1, recordExternalAddress(crb, p3)); // 0x3707344a, 0x3aa8a2e0,
// 0x03707345, 0x3ae98a2e
masm.addl(rdx, rcx);
masm.shll(rcx, 2);
masm.addl(rdx, rcx);
masm.addsd(xmm5, xmm0);
masm.movdqu(xmm2, xmm0);
masm.subpd(xmm0, xmm4);
masm.movq(xmm6, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000
masm.shll(rdx, 4);
masm.leaq(rax, recordExternalAddress(crb, ctable));
masm.andpd(xmm5, recordExternalAddress(crb, mask35)); // 0xfffc0000, 0xffffffff,
// 0x00000000, 0x00000000
masm.movdqu(xmm3, xmm0);
masm.addq(rax, rdx);
masm.subpd(xmm2, xmm0);
masm.unpckhpd(xmm0, xmm0);
masm.divsd(xmm6, xmm5);
masm.subpd(xmm2, xmm4);
masm.subsd(xmm3, xmm5);
masm.subpd(xmm2, xmm1);
masm.movdqu(xmm1, new AMD64Address(rax, 48));
masm.addpd(xmm2, xmm7);
masm.movdqu(xmm7, new AMD64Address(rax, 16));
masm.mulpd(xmm7, xmm0);
masm.movdqu(xmm4, new AMD64Address(rax, 96));
masm.mulpd(xmm1, xmm0);
masm.mulpd(xmm4, xmm0);
masm.addsd(xmm2, xmm3);
masm.movdqu(xmm3, xmm0);
masm.mulpd(xmm0, xmm0);
masm.addpd(xmm7, new AMD64Address(rax, 0));
masm.addpd(xmm1, new AMD64Address(rax, 32));
masm.mulpd(xmm1, xmm0);
masm.addpd(xmm4, new AMD64Address(rax, 80));
masm.addpd(xmm7, xmm1);
masm.movdqu(xmm1, new AMD64Address(rax, 112));
masm.mulpd(xmm1, xmm0);
masm.mulpd(xmm0, xmm0);
masm.addpd(xmm4, xmm1);
masm.movdqu(xmm1, new AMD64Address(rax, 64));
masm.mulpd(xmm1, xmm0);
masm.addpd(xmm7, xmm1);
masm.movdqu(xmm1, xmm3);
masm.mulpd(xmm3, xmm0);
masm.mulsd(xmm0, xmm0);
masm.mulpd(xmm1, new AMD64Address(rax, 144));
masm.mulpd(xmm4, xmm3);
masm.movdqu(xmm3, xmm1);
masm.addpd(xmm7, xmm4);
masm.movdqu(xmm4, xmm1);
masm.mulsd(xmm0, xmm7);
masm.unpckhpd(xmm7, xmm7);
masm.addsd(xmm0, xmm7);
masm.unpckhpd(xmm1, xmm1);
masm.addsd(xmm3, xmm1);
masm.subsd(xmm4, xmm3);
masm.addsd(xmm1, xmm4);
masm.movdqu(xmm4, xmm2);
masm.movq(xmm7, new AMD64Address(rax, 144));
masm.unpckhpd(xmm2, xmm2);
masm.addsd(xmm7, new AMD64Address(rax, 152));
masm.mulsd(xmm7, xmm2);
masm.addsd(xmm7, new AMD64Address(rax, 136));
masm.addsd(xmm7, xmm1);
masm.addsd(xmm0, xmm7);
masm.movq(xmm7, recordExternalAddress(crb, one)); // 0x00000000, 0x3ff00000
masm.mulsd(xmm4, xmm6);
masm.movq(xmm2, new AMD64Address(rax, 168));
masm.andpd(xmm2, xmm6);
masm.mulsd(xmm5, xmm2);
masm.mulsd(xmm6, new AMD64Address(rax, 160));
masm.subsd(xmm7, xmm5);
masm.subsd(xmm2, new AMD64Address(rax, 128));
masm.subsd(xmm7, xmm4);
masm.mulsd(xmm7, xmm6);
masm.movdqu(xmm4, xmm3);
masm.subsd(xmm3, xmm2);
masm.addsd(xmm2, xmm3);
masm.subsd(xmm4, xmm2);
masm.addsd(xmm0, xmm4);
masm.subsd(xmm0, xmm7);
masm.addsd(xmm0, xmm3);
masm.jmp(block14);
masm.bind(block9);
masm.addl(rdx, 64);
masm.movq(r9, r10);
masm.movq(r10, r8);
masm.movl(r8, 0);
masm.cmpqAndJcc(r9, 0, ConditionFlag.NotEqual, block10, false);
masm.addl(rdx, 64);
masm.movq(r9, r10);
masm.movq(r10, r8);
masm.cmpqAndJcc(r9, 0, ConditionFlag.NotEqual, block10, false);
masm.jmp(block12);
masm.bind(block11);
masm.jcc(AMD64Assembler.ConditionFlag.Equal, block12);
masm.negl(rcx);
masm.shrq(r10);
masm.movq(rax, r9);
masm.shrq(r9);
masm.subl(rdx, rcx);
masm.negl(rcx);
masm.addl(rcx, 64);
masm.shlq(rax);
masm.orq(r10, rax);
masm.jmp(block12);
masm.bind(block5);
masm.notl(rcx);
masm.shlq(r9, 32);
masm.orq(r9, r11);
masm.shlq(r9);
masm.movq(rdi, r9);
masm.testlAndJcc(r9, Integer.MIN_VALUE, ConditionFlag.NotEqual, block13, false);
masm.shrl(r9);
masm.movl(rbx, 0);
masm.shrq(rdi, 2);
masm.jmp(block8);
masm.bind(block6);
masm.shrl(r9);
masm.movl(rbx, 1073741824);
masm.shrl(rbx);
masm.shlq(r9, 32);
masm.orq(r9, r11);
masm.shlq(rbx, 32);
masm.addl(rdi, 1073741824);
masm.movl(rcx, 0);
masm.movl(r11, 0);
masm.subq(rcx, r8);
masm.sbbq(r11, r10);
masm.sbbq(rbx, r9);
masm.movq(r8, rcx);
masm.movq(r10, r11);
masm.movq(r9, rbx);
masm.movl(rbx, 32768);
masm.jmp(block7);
masm.bind(block13);
masm.shrl(r9);
masm.movq(rbx, 0x100000000L);
masm.shrq(rbx);
masm.movl(rcx, 0);
masm.movl(r11, 0);
masm.subq(rcx, r8);
masm.sbbq(r11, r10);
masm.sbbq(rbx, r9);
masm.movq(r8, rcx);
masm.movq(r10, r11);
masm.movq(r9, rbx);
masm.movl(rbx, 32768);
masm.shrq(rdi, 2);
masm.addl(rdi, 1073741824);
masm.jmp(block8);
masm.bind(block4);
masm.movq(xmm0, new AMD64Address(rsp, 8));
masm.mulsd(xmm0, recordExternalAddress(crb, negZero)); // 0x00000000, 0x80000000
masm.movq(new AMD64Address(rsp, 0), xmm0);
masm.bind(block14);
masm.addq(rsp, 16);
masm.pop(rbx);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy