All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler Maven / Gradle / Ivy

There is a newer version: 24.2.1
Show newest version
/*
 * Copyright (c) 2021, 2024, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
package jdk.graal.compiler.asm.aarch64;

import static jdk.graal.compiler.asm.aarch64.AArch64Assembler.LoadFlag;
import static jdk.graal.compiler.asm.aarch64.AArch64Assembler.rd;
import static jdk.graal.compiler.asm.aarch64.AArch64Assembler.rn;
import static jdk.graal.compiler.asm.aarch64.AArch64Assembler.rs1;
import static jdk.graal.compiler.asm.aarch64.AArch64Assembler.rs2;
import static jdk.graal.compiler.asm.aarch64.AArch64Assembler.rs3;
import static jdk.vm.ci.aarch64.AArch64.CPU;
import static jdk.vm.ci.aarch64.AArch64.SIMD;
import static jdk.vm.ci.aarch64.AArch64.zr;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

import jdk.graal.compiler.core.common.NumUtil;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.debug.GraalError;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.PlatformKind;

/**
 * This class encapsulates the AArch64 Advanced SIMD (ASIMD) assembler support. The documentation
 * below heavily references the Arm Architecture Reference Manual version G-a. The latest copy of
 * the manual can be found
 * here.
 *
 * 

* In order to minimize confusion between ASIMD and similarly named General-Purpose/FP instructions, * each ASIMD instruction has capitalized a suffix which describes, in the order described in * Section C7.2, the type of each of the instruction's operands using in the following form: *

    *
  • G: General purpose register.
  • *
  • V: ASIMD register.
  • *
  • X: ASIMD register index (V[idx]).
  • *
  • S: ASIMD scalar register. This is primarily used to * perform integer operations within * vector registers without having to transfer the values back * to the general-purpose register, or * performing an operation across a single register, such as addv.
  • *
  • I: Immediate Operand.
  • *
*/ public abstract class AArch64ASIMDAssembler { /** * Calculates and maintains a mapping of all possible ASIMD immediate values. *

* ASIMD immediates use the form op:abc:cmode:defgh with bits 29:18-16:15-12:9-5. How these bits * are expanded into 64-bit values is codified in shared/functions/vectorAdvSIMDExpandImm * (J1-8208). */ public static class ASIMDImmediateTable { private static final int ImmediateOpOffset = 29; private static final int ImmediateCmodeOffset = 12; private static final int ImmediateABCOffset = 16; private static final int ImmediateDEFGHOffset = 5; public enum BitValues { ZERO(0), ONE(1), ANY(0, 1); final int[] values; BitValues(int... values) { this.values = values; } } public static final ASIMDImmediateTable.ImmediateEncodings[] IMMEDIATE_TABLE = buildImmediateTable(); /** * Tests whether an immediate can be encoded within an ASIMD instruction using the provided * ImmediateOp mode. */ public static boolean isEncodable(long imm, ImmediateOp op) { int pos = Arrays.binarySearch(IMMEDIATE_TABLE, ASIMDImmediateTable.ImmediateEncodings.createRepresentativeEncoding(imm)); if (pos < 0) { return false; } ASIMDImmediateTable.ImmediateEncodings immediate = IMMEDIATE_TABLE[pos]; for (byte cmodeOpEncoding : ImmediateOp.getCmodeOpEncodings(op)) { if (immediate.validEncoding[cmodeOpEncoding]) { return true; } } return false; } /** * Returns the instruction encoding for immediate using the provided ImmediateOp mode. */ public static int getEncoding(long imm, ImmediateOp op) { assert isEncodable(imm, op); int pos = Arrays.binarySearch(IMMEDIATE_TABLE, ASIMDImmediateTable.ImmediateEncodings.createRepresentativeEncoding(imm)); ASIMDImmediateTable.ImmediateEncodings immediate = IMMEDIATE_TABLE[pos]; for (byte cmodeOpEncoding : ImmediateOp.getCmodeOpEncodings(op)) { if (immediate.validEncoding[cmodeOpEncoding]) { int imm8Encoding = getImm8Encoding(immediate.imm8[cmodeOpEncoding]); int opBit = cmodeOpEncoding & 0x1; int cmodeBits = (cmodeOpEncoding >> 1) & 0xF; return imm8Encoding | opBit << ImmediateOpOffset | cmodeBits << ImmediateCmodeOffset; } } throw GraalError.shouldNotReachHere("Unable to encode immediate"); // ExcludeFromJacocoGeneratedReport } private static int getImm8Encoding(byte imm8) { int encoding = ((imm8 >>> 5) & 0x7) << ImmediateABCOffset | (imm8 & 0x1F) << ImmediateDEFGHOffset; return encoding; } private static long[] asBitArray(long imm8) { long[] bitArray = new long[8]; long remaining = imm8; for (int i = 0; i < 8; i++) { bitArray[i] = remaining & 0x1L; remaining = remaining >> 1; } return bitArray; } private static int getCmodeOpEncoding(int cmodeBits3to1, int cmodeBit0, int op) { int encoding = cmodeBits3to1 << 2 | cmodeBit0 << 1 | op; return encoding; } private static long replicateBit(long bit, int repeatNum) { if (bit == 0) { return 0; } else { assert bit == 1 : bit; return (1L << repeatNum) - 1; } } private static long notBit(long bit) { if (bit == 0) { return 1; } else { assert bit == 1 : bit; return 0; } } /** * Adds new immediate encoding to the appropriate * {@link ASIMDImmediateTable.ImmediateEncodings} object. */ private static void registerImmediate(Map immediateMap, long imm64, long imm8, int cmodeBits3to1, ASIMDImmediateTable.BitValues cmodeBit0, ASIMDImmediateTable.BitValues op) { immediateMap.compute(imm64, (k, v) -> v == null ? new ASIMDImmediateTable.ImmediateEncodings(k, (byte) imm8, cmodeBits3to1, cmodeBit0, op) : v.addEncoding(imm64, (byte) imm8, cmodeBits3to1, cmodeBit0, op)); } /** * This method generates all possible encodings and stores them in an array sorted by the * generated 64-bit values. This table is generated based on the * shared/functions/vector/AdvSIMDExpandIMM function (J1-8208). */ private static ASIMDImmediateTable.ImmediateEncodings[] buildImmediateTable() { Map immediateMap = new HashMap<>(); /* * Generating all possible immediates and linking them to the proper cmode/op values. */ for (long imm8 = 0; imm8 < 256; imm8++) { long imm64; /* cmode<3:1> == 0 */ imm64 = imm8 << 32 | imm8; registerImmediate(immediateMap, imm64, imm8, 0, ASIMDImmediateTable.BitValues.ANY, ASIMDImmediateTable.BitValues.ANY); /* cmode<3:1> == 1 */ imm64 = imm8 << 40 | imm8 << 8; registerImmediate(immediateMap, imm64, imm8, 1, ASIMDImmediateTable.BitValues.ANY, ASIMDImmediateTable.BitValues.ANY); /* cmode<3:1> == 2 */ imm64 = imm8 << 48 | imm8 << 16; registerImmediate(immediateMap, imm64, imm8, 2, ASIMDImmediateTable.BitValues.ANY, ASIMDImmediateTable.BitValues.ANY); /* cmode<3:1> == 3 */ imm64 = imm8 << 56 | imm8 << 24; registerImmediate(immediateMap, imm64, imm8, 3, ASIMDImmediateTable.BitValues.ANY, ASIMDImmediateTable.BitValues.ANY); /* cmode<3:1> == 4 */ imm64 = imm8 << 48 | imm8 << 32 | imm8 << 16 | imm8; registerImmediate(immediateMap, imm64, imm8, 4, ASIMDImmediateTable.BitValues.ANY, ASIMDImmediateTable.BitValues.ANY); /* cmode<3:1> == 5 */ imm64 = imm8 << 56 | imm8 << 40 | imm8 << 24 | imm8 << 8; registerImmediate(immediateMap, imm64, imm8, 5, ASIMDImmediateTable.BitValues.ANY, ASIMDImmediateTable.BitValues.ANY); /* cmove<3:1> == 6 */ /* cmode<0> == 0 */ imm64 = imm8 << 40 | 0xFFL << 32 | imm8 << 8 | 0xFFL; registerImmediate(immediateMap, imm64, imm8, 6, ASIMDImmediateTable.BitValues.ZERO, ASIMDImmediateTable.BitValues.ANY); /* cmode<0> == 1 */ imm64 = imm8 << 48 | 0xFFFFL << 32 | imm8 << 16 | 0xFFFFL; registerImmediate(immediateMap, imm64, imm8, 6, ASIMDImmediateTable.BitValues.ONE, ASIMDImmediateTable.BitValues.ANY); /* cmode <3:1> == 7 */ long[] bitArray = asBitArray(imm8); /* cmode<0> == 0 && op == 0 */ imm64 = imm8 << 56 | imm8 << 48 | imm8 << 40 | imm8 << 32 | imm8 << 24 | imm8 << 16 | imm8 << 8 | imm8; registerImmediate(immediateMap, imm64, imm8, 7, ASIMDImmediateTable.BitValues.ZERO, ASIMDImmediateTable.BitValues.ZERO); /* cmode<0> == 0 && op == 1 */ imm64 = replicateBit(bitArray[7], 8) << 56 | replicateBit(bitArray[6], 8) << 48 | replicateBit(bitArray[5], 8) << 40 | replicateBit(bitArray[4], 8) << 32 | replicateBit(bitArray[3], 8) << 24 | replicateBit(bitArray[2], 8) << 16 | replicateBit(bitArray[1], 8) << 8 | replicateBit(bitArray[0], 8); registerImmediate(immediateMap, imm64, imm8, 7, ASIMDImmediateTable.BitValues.ZERO, ASIMDImmediateTable.BitValues.ONE); /* cmode<0> == 1 && op == 0 */ long imm32 = bitArray[7] << 31 | notBit(bitArray[6]) << 30 | replicateBit(bitArray[6], 5) << 25 | (imm8 & 0x3F) << 19; imm64 = imm32 << 32 | imm32; registerImmediate(immediateMap, imm64, imm8, 7, ASIMDImmediateTable.BitValues.ONE, ASIMDImmediateTable.BitValues.ZERO); /* cmode<0> == 1 && op == 1 */ imm64 = bitArray[7] << 63 | notBit(bitArray[6]) << 62 | replicateBit(bitArray[6], 8) << 54 | (imm8 & 0x3F) << 48; registerImmediate(immediateMap, imm64, imm8, 7, ASIMDImmediateTable.BitValues.ONE, ASIMDImmediateTable.BitValues.ONE); } ASIMDImmediateTable.ImmediateEncodings[] table = immediateMap.values().toArray(new ASIMDImmediateTable.ImmediateEncodings[0]); Arrays.sort(table); return table; } /* * Contains the encodings associated with each 64-bit immediate value. Since multiple * cmode:op combinations can be used to represent the same value, each possible encoding * combination must be recorded. */ private static final class ImmediateEncodings implements Comparable { public final long imm; /* All of these operations are indexed by the bits cmode<3:0>:op. */ private final boolean[] validEncoding; private final byte[] imm8; /** * This constructor is used when creating a value used to find the matching key in * {@link #IMMEDIATE_TABLE}. */ private ImmediateEncodings(long imm) { this.imm = imm; this.validEncoding = null; this.imm8 = null; } public static ASIMDImmediateTable.ImmediateEncodings createRepresentativeEncoding(long imm) { return new ASIMDImmediateTable.ImmediateEncodings(imm); } ImmediateEncodings(long imm, byte imm8Val, int cmodeBits3to1, ASIMDImmediateTable.BitValues cmodeBit0, ASIMDImmediateTable.BitValues op) { this.imm = imm; this.validEncoding = new boolean[32]; this.imm8 = new byte[32]; for (int bit0 : cmodeBit0.values) { for (int opBit : op.values) { int cmodeOpEncoding = getCmodeOpEncoding(cmodeBits3to1, bit0, opBit); assert !validEncoding[cmodeOpEncoding]; this.validEncoding[cmodeOpEncoding] = true; this.imm8[cmodeOpEncoding] = imm8Val; } } } public ASIMDImmediateTable.ImmediateEncodings addEncoding(long imm64, byte imm8Val, int cmodeBits3to1, ASIMDImmediateTable.BitValues cmodeBit0, ASIMDImmediateTable.BitValues op) { assert imm64 == this.imm : imm64 + " " + this.imm; for (int bit0 : cmodeBit0.values) { for (int opBit : op.values) { int cmodeOpEncoding = getCmodeOpEncoding(cmodeBits3to1, bit0, opBit); assert !validEncoding[cmodeOpEncoding]; this.validEncoding[cmodeOpEncoding] = true; this.imm8[cmodeOpEncoding] = imm8Val; } } return this; } @Override public int compareTo(ASIMDImmediateTable.ImmediateEncodings o) { return Long.compare(imm, o.imm); } } } /** * Enumeration for all vector instructions which can have an immediate operand. */ public enum ImmediateOp { MOVI, MVNI, ORR, BIC, FMOVSP, FMOVDP; private static byte[] moviEncodings = { /* 0xx00 */ 0b00000, 0b00100, 0b01000, 0b01100, /* 10x00 */ 0b10000, 0b10100, /* 110x0 */ 0b11000, 0b11010, /* 1110x */ 0b11100, 0b11101, /* 11110 */ 0b11110, }; private static byte[] mvniEncodings = { /* 0xx01 */ 0b00001, 0b00101, 0b01001, 0b01101, /* 10x01 */ 0b10001, 0b10101, /* 110x1 */ 0b11001, 0b11011, }; private static byte[] orrEncodings = { /* 0xx10 */ 0b00010, 0b00110, 0b01010, 0b01110, /* 10x10 */ 0b10010, 0b10110, }; private static byte[] bicEncodings = { /* 0xx11 */ 0b00011, 0b00111, 0b01011, 0b01111, /* 10x11 */ 0b10011, 0b10111, }; private static byte[] fmovSPEncodings = { /* 11110 */ 0b11110 }; private static byte[] fmovDPEncodings = { /* 11111 */ 0b11111 }; /** * Returns all valid cmode:op encodings for the requested immediate op. */ public static byte[] getCmodeOpEncodings(ImmediateOp op) { switch (op) { case MOVI: return moviEncodings; case MVNI: return mvniEncodings; case ORR: return orrEncodings; case BIC: return bicEncodings; case FMOVSP: return fmovSPEncodings; case FMOVDP: return fmovDPEncodings; } throw GraalError.shouldNotReachHereUnexpectedValue(op); // ExcludeFromJacocoGeneratedReport } } /** * Enumeration of all different SIMD operation sizes. */ public enum ASIMDSize { HalfReg(64), FullReg(128); private final int nbits; ASIMDSize(int nbits) { this.nbits = nbits; } public int bits() { return nbits; } public int bytes() { return nbits / Byte.SIZE; } public static ASIMDSize fromVectorKind(PlatformKind kind) { assert kind instanceof AArch64Kind : kind; assert kind.getVectorLength() > 1 : kind; int bitSize = kind.getSizeInBytes() * Byte.SIZE; assert bitSize == 32 || bitSize == 64 || bitSize == 128 : bitSize; return bitSize == 128 ? FullReg : HalfReg; } } /** * Enumeration of all different lane types of SIMD register. *

* Byte(B):8b/lane; HalfWord(H):16b/lane; Word(S):32b/lane; DoubleWord(D):64b/lane. */ public enum ElementSize { Byte(0, 8), HalfWord(1, 16), Word(2, 32), DoubleWord(3, 64); private final int encoding; private final int nbits; ElementSize(int encoding, int nbits) { this.encoding = encoding; this.nbits = nbits; } public int bits() { return nbits; } public int bytes() { return nbits / java.lang.Byte.SIZE; } public static ElementSize fromKind(PlatformKind kind) { switch (((AArch64Kind) kind).getScalar()) { case BYTE: return Byte; case WORD: return HalfWord; case DWORD: case SINGLE: return Word; case QWORD: case DOUBLE: return DoubleWord; default: throw GraalError.shouldNotReachHereUnexpectedValue(((AArch64Kind) kind).getScalar()); // ExcludeFromJacocoGeneratedReport } } public static ElementSize fromSize(int size) { switch (size) { case 8: return Byte; case 16: return HalfWord; case 32: return Word; case 64: return DoubleWord; default: throw GraalError.shouldNotReachHere("Invalid ASIMD element size."); // ExcludeFromJacocoGeneratedReport } } public static ElementSize fromStride(Stride stride) { switch (stride) { case S1: return Byte; case S2: return HalfWord; case S4: return Word; case S8: return DoubleWord; default: throw GraalError.shouldNotReachHereUnexpectedValue(stride); // ExcludeFromJacocoGeneratedReport } } public ElementSize expand() { return ElementSize.fromSize(nbits * 2); } public ElementSize narrow() { return ElementSize.fromSize(nbits / 2); } } /** * Encodings for ASIMD instructions. These encodings are based on the encodings described in * C4.1.6. */ private static final int UBit = 0b1 << 29; private static final int ReplicateFlag = 0b1 << 21; public enum ASIMDInstruction { /* Advanced SIMD load/store multiple structures (C4-296). */ ST4_MULTIPLE_4R(0b0000 << 12), ST1_MULTIPLE_4R(0b0010 << 12), ST1_MULTIPLE_3R(0b0110 << 12), ST1_MULTIPLE_1R(0b0111 << 12), ST2_MULTIPLE_2R(0b1000 << 12), ST1_MULTIPLE_2R(0b1010 << 12), LD4_MULTIPLE_4R(LoadFlag | 0b0000 << 12), LD1_MULTIPLE_4R(LoadFlag | 0b0010 << 12), LD1_MULTIPLE_3R(LoadFlag | 0b0110 << 12), LD1_MULTIPLE_1R(LoadFlag | 0b0111 << 12), LD2_MULTIPLE_2R(LoadFlag | 0b1000 << 12), LD1_MULTIPLE_2R(LoadFlag | 0b1010 << 12), /* Advanced SIMD load/store single structure (C4-299). */ LD1R(LoadFlag | 0b110 << 13), LD4R(LoadFlag | ReplicateFlag | 0b111 << 13), /* Cryptographic AES (C4-341). */ AESE(0b00100 << 12), AESD(0b00101 << 12), AESMC(0b00110 << 12), AESIMC(0b00111 << 12), /* Cryptographic three-register SHA */ SHA1C(0b000 << 12), SHA1P(0b001 << 12), SHA1M(0b010 << 12), SHA1SU0(0b011 << 12), SHA256H(0b100 << 12), SHA256H2(0b101 << 12), SHA256SU1(0b110 << 12), /* Cryptographic two-register SHA */ SHA1H(0b00000 << 12), SHA1SU1(0b00001 << 12), SHA256SU0(0b00010 << 12), /* Cryptographic three-register SHA512 */ SHA512H(0b00 << 10), SHA512H2(0b01 << 10), SHA512SU1(0b10 << 10), RAX1(0b11 << 10), /* Cryptographic two-register SHA 512 */ SHA512SU0(0b00 << 10), /* Cryptographic four-register */ EOR3(0b00 << 21), BCAX(0b01 << 21), /* Advanced SIMD table lookup (C4-355). */ TBL(0b0 << 12), TBX(0b1 << 12), /* Advanced SIMD permute (C4-355). */ UZP1(0b001 << 12), TRN1(0b010 << 12), ZIP1(0b011 << 12), UZP2(0b101 << 12), TRN2(0b110 << 12), ZIP2(0b111 << 12), /* Advanced SIMD extract (C4-356). */ EXT(0b00 << 22), /* Advanced SIMD copy (C4-356). */ DUPELEM(0b0000 << 11), DUPGEN(0b0001 << 11), INSGEN(0b0011 << 11), SMOV(0b0101 << 11), UMOV(0b0111 << 11), INSELEM(0b1 << 29), /* Advanced SIMD two-register miscellaneous (C4-361). */ /* size xx */ REV64(0b00000 << 12), REV16(0b00001 << 12), CNT(0b00101 << 12), CMGT_ZERO(0b01000 << 12), CMEQ_ZERO(0b01001 << 12), CMLT_ZERO(0b01010 << 12), ABS(0b01011 << 12), XTN(0b10010 << 12), /* size 0x */ FCVTN(0b10110 << 12), FCVTL(0b10111 << 12), SCVTF(0b11101 << 12), /* size 1x */ FCMGT_ZERO(0b01100 << 12), FCMEQ_ZERO(0b01101 << 12), FCMLT_ZERO(0b01110 << 12), FABS(0b01111 << 12), FCVTZS(0b11011 << 12), /* UBit 1, size xx */ REV32(UBit | 0b00000 << 12), CMGE_ZERO(UBit | 0b01000 << 12), CMLE_ZERO(UBit | 0b01001 << 12), NEG(UBit | 0b01011 << 12), /* UBit 1, size 00 */ NOT(UBit | 0b00101 << 12), /* UBit 1, size 01 */ RBIT(UBit | 0b00101 << 12), /* UBit 1, size 1x */ FCMGE_ZERO(UBit | 0b01100 << 12), FCMLE_ZERO(UBit | 0b01101 << 12), FNEG(UBit | 0b01111 << 12), FSQRT(UBit | 0b11111 << 12), /* Advanced SIMD across lanes (C4-364). */ SADDLV(0b00011 << 12), ADDV(0b11011 << 12), UADDLV(UBit | 0b00011 << 12), UMAXV(UBit | 0b01010 << 12), UMINV(UBit | 0b11010 << 12), /* Advanced SIMD three different (C4-365). */ SSUBL(0b0010 << 12), SMLAL(0b1000 << 12), SMLSL(0b1010 << 12), PMULL(0b1110 << 12), USUBL(UBit | 0b0010 << 12), UMLAL(UBit | 0b1000 << 12), UMLSL(UBit | 0b1010 << 12), /* * Advanced SIMD three same (C4-366) & Advanced SIMD scalar three same (C4-349). */ /* size xx */ CMGT(0b00110 << 11), CMGE(0b00111 << 11), SSHL(0b01000 << 11), SMAX(0b01100 << 11), SMIN(0b01101 << 11), SMINP(0b10101 << 11), ADD(0b10000 << 11), CMTST(0b10001 << 11), MLA(0b10010 << 11), MUL(0b10011 << 11), ADDP(0b10111 << 11), /* size 0x */ FMLA(0b11001 << 11), FADD(0b11010 << 11), FCMEQ(0b11100 << 11), FMAX(0b11110 << 11), /* size 00 */ AND(0b00011 << 11), /* size 01 */ BIC(0b00011 << 11), /* size 1x */ FMLS(0b11001 << 11), FSUB(0b11010 << 11), FMIN(0b11110 << 11), /* size 10 */ ORR(0b00011 << 11), /* size 11 */ ORN(0b00011 << 11), /* UBit 1, size xx */ CMHI(UBit | 0b00110 << 11), CMHS(UBit | 0b00111 << 11), USHL(UBit | 0b01000 << 11), UMAX(UBit | 0b01100 << 11), UMAXP(UBit | 0b10100 << 11), UMIN(UBit | 0b01101 << 11), UMINP(UBit | 0b10101 << 11), SUB(UBit | 0b10000 << 11), CMEQ(UBit | 0b10001 << 11), MLS(UBit | 0b10010 << 11), /* UBit 1, size 0x */ FMUL(UBit | 0b11011 << 11), FCMGE(UBit | 0b11100 << 11), FACGE(UBit | 0b11101 << 11), FDIV(UBit | 0b11111 << 11), /* UBit 1, size 00 */ EOR(UBit | 0b00011 << 11), /* UBit 1, size 01 */ BSL(UBit | 0b00011 << 11), /* UBit 1, size 10 */ BIT(UBit | 0b00011 << 11), /* UBit 1, size 11 */ BIF(UBit | 0b00011 << 11), /* UBit 1, size 1x */ FCMGT(UBit | 0b11100 << 11), FACGT(UBit | 0b11101 << 11), /* Advanced SIMD shift by immediate (C4-371). */ SSHR(0b00000 << 11), SHL(0b01010 << 11), SHRN(0b10000 << 11), SSHLL(0b10100 << 11), USHR(UBit | 0b00000 << 11), USRA(UBit | 0b00010 << 11), USHLL(UBit | 0b10100 << 11); public final int encoding; ASIMDInstruction(int encoding) { this.encoding = encoding; } } private final AArch64Assembler asm; protected AArch64ASIMDAssembler(AArch64Assembler asm) { this.asm = asm; } protected void emitInt(int x) { asm.emitInt(x); } /** * Returns whether the operation is utilizing multiple vector lanes. The only scenario when this * isn't true is when performing an operation using a 64-bit register and an element of size 64 * bits. */ private static boolean usesMultipleLanes(ASIMDSize size, ElementSize eSize) { return !(size == ASIMDSize.HalfReg && eSize == ElementSize.DoubleWord); } /** * Checks whether all registers follow one another (modulo 32 - the number of SIMD registers). */ private static boolean assertConsecutiveSIMDRegisters(Register... regs) { int numRegs = AArch64.simdRegisters.size(); assert regs[0].getRegisterCategory().equals(SIMD) : regs; for (int i = 1; i < regs.length; i++) { assert regs[i].getRegisterCategory().equals(SIMD) : regs + " " + i; assert (regs[i - 1].encoding + 1) % numRegs == regs[i].encoding : "registers must be consecutive"; } return true; } /* Helper values/methods for encoding instructions */ private static final int ASIMDSizeOffset = 22; private static final int elemSize00 = 0b00 << ASIMDSizeOffset; private static final int elemSize01 = 0b01 << ASIMDSizeOffset; private static final int elemSize10 = 0b10 << ASIMDSizeOffset; private static final int elemSize11 = 0b11 << ASIMDSizeOffset; private static int elemSizeXX(ElementSize eSize) { return eSize.encoding << ASIMDSizeOffset; } private static int elemSize1X(ElementSize eSize) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; return (0b10 | (eSize == ElementSize.DoubleWord ? 1 : 0)) << ASIMDSizeOffset; } private static int elemSize0X(ElementSize eSize) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; return (eSize == ElementSize.DoubleWord ? 1 : 0) << ASIMDSizeOffset; } /** * Sets the Q-bit if requested. */ private static int qBit(boolean isSet) { return (isSet ? 1 : 0) << 30; } /** * Sets the Q-bit if using all 128-bits. */ private static int qBit(ASIMDSize size) { return (size == ASIMDSize.FullReg ? 1 : 0) << 30; } /** * Single structures encode sizes at a different spot than the traditional eSize offset * ({@link #ASIMDSizeOffset}). */ private static int singleStructureElemSizeEncoding(ASIMDInstruction instr, ElementSize eSize) { int encoding; switch (instr) { case LD1R: case LD4R: encoding = eSize.encoding; break; default: throw GraalError.shouldNotReachHereUnexpectedValue(instr); // ExcludeFromJacocoGeneratedReport } return encoding << 10; } private static int encodeStructureAddress(ASIMDInstruction instr, ASIMDSize size, ElementSize eSize, AArch64Address address) { int postIndexEncoding; int offsetEncoding; Register offset; switch (address.getAddressingMode()) { case BASE_REGISTER_ONLY: postIndexEncoding = 0; offsetEncoding = 0; break; case REGISTER_STRUCTURE_POST_INDEXED: postIndexEncoding = 0b1 << 23; offset = address.getOffset(); assert !offset.equals(zr) : offset; offsetEncoding = rs2(offset); break; case IMMEDIATE_STRUCTURE_POST_INDEXED: postIndexEncoding = 0b1 << 23; offset = address.getOffset(); assert offset.equals(zr) : offset; assert address.getImmediateRaw() == AArch64Address.determineStructureImmediateValue(instr, size, eSize) : address + " " + instr + " " + size + " " + eSize; offsetEncoding = rs2(offset); break; default: throw GraalError.shouldNotReachHereUnexpectedValue(address.getAddressingMode()); // ExcludeFromJacocoGeneratedReport } return postIndexEncoding | offsetEncoding | rn(address.getBase()); } private void loadStoreMultipleStructures(ASIMDInstruction instr, ASIMDSize size, ElementSize eSize, Register value, AArch64Address address) { int baseEncoding = 0b0_0_001100_0_0_0_00000_0000_00_00000_00000; int eSizeEncoding = eSize.encoding << 10; int addressEncoding = encodeStructureAddress(instr, size, eSize, address); emitInt(instr.encoding | baseEncoding | qBit(size) | eSizeEncoding | addressEncoding | rd(value)); } private void loadStoreSingleStructure(ASIMDInstruction instr, ASIMDSize size, ElementSize eSize, Register value, AArch64Address address) { int baseEncoding = 0b0_0_001101_0_0_0_00000_000_0_00_00000_00000; int eSizeEncoding = singleStructureElemSizeEncoding(instr, eSize); int addressEncoding = encodeStructureAddress(instr, size, eSize, address); emitInt(instr.encoding | baseEncoding | qBit(size) | eSizeEncoding | addressEncoding | rd(value)); } private void cryptographicAES(ASIMDInstruction instr, Register dst, Register src) { int baseEncoding = 0b01001110_00_10100_00000_10_00000_00000; emitInt(instr.encoding | baseEncoding | elemSize00 | rd(dst) | rn(src)); } private void cryptographicThreeSHA(ASIMDInstruction instr, Register dst, Register src1, Register src2) { int baseEncoding = 0b01011110_00_0_00000_0_000_00_00000_00000; emitInt(instr.encoding | baseEncoding | elemSize00 | rd(dst) | rs1(src1) | rs2(src2)); } private void cryptographicTwoSHA(ASIMDInstruction instr, Register dst, Register src) { int baseEncoding = 0b01011110_00_10100_00000_10_00000_00000; emitInt(instr.encoding | baseEncoding | elemSize00 | rd(dst) | rn(src)); } private void cryptographicThreeSHA512(ASIMDInstruction instr, Register dst, Register src1, Register src2) { int baseEncoding = 0b11001110011_00000_1_0_00_00_00000_00000; emitInt(instr.encoding | baseEncoding | rd(dst) | rs1(src1) | rs2(src2)); } private void cryptographicTwoSHA512(ASIMDInstruction instr, Register dst, Register src) { int baseEncoding = 0b11001110110000001000_00_00000_00000; emitInt(instr.encoding | baseEncoding | rd(dst) | rn(src)); } private void cryptographicFour(ASIMDInstruction instr, Register dst, Register src1, Register src2, Register src3) { int baseEncoding = 0b110011100_00_00000_0_00000_00000_00000; emitInt(instr.encoding | baseEncoding | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3)); } private void scalarThreeSameEncoding(ASIMDInstruction instr, int eSizeEncoding, Register dst, Register src1, Register src2) { int baseEncoding = 0b01_0_11110_00_1_00000_00000_1_00000_00000; emitInt(instr.encoding | baseEncoding | eSizeEncoding | rd(dst) | rs1(src1) | rs2(src2)); } private void scalarShiftByImmEncoding(ASIMDInstruction instr, int imm7, Register dst, Register src) { assert (imm7 & 0b1111_111) == imm7 : imm7; assert (imm7 & 0b1111_111) != 0 : imm7; assert (imm7 & 0b0000_111) != imm7 : imm7; int baseEncoding = 0b01_0_111110_0000_000_00000_1_00000_00000; emitInt(instr.encoding | baseEncoding | imm7 << 16 | rd(dst) | rs1(src)); } private void tableLookupEncoding(ASIMDInstruction instr, ASIMDSize size, int numTableRegs, Register dst, Register src1, Register src2) { int baseEncoding = 0b0_0_001110_00_0_00000_0_000_00_00000_00000; assert numTableRegs >= 1 && numTableRegs <= 4 : numTableRegs; int numTableRegsEncoding = (numTableRegs - 1) << 13; emitInt(instr.encoding | baseEncoding | qBit(size) | numTableRegsEncoding | rd(dst) | rs1(src1) | rs2(src2)); } private void permuteEncoding(ASIMDInstruction instr, ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { int baseEncoding = 0b0_0_001110_00_0_00000_0_000_10_00000_00000; emitInt(instr.encoding | baseEncoding | qBit(size) | elemSizeXX(eSize) | rd(dst) | rs1(src1) | rs2(src2)); } private void copyEncoding(ASIMDInstruction instr, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) { copyEncoding(instr, 0, setQBit, eSize, dst, src, index); } private void copyEncoding(ASIMDInstruction instr, int extraEncoding, boolean setQBit, ElementSize eSize, Register dst, Register src, int index) { assert index >= 0 : index; assert index < ASIMDSize.FullReg.bytes() / eSize.bytes() : "index=" + index + " " + eSize; int baseEncoding = 0b0_0_0_01110000_00000_0_0000_1_00000_00000; int imm5Encoding = (index * 2 * eSize.bytes() | eSize.bytes()) << 16; emitInt(instr.encoding | extraEncoding | baseEncoding | qBit(setQBit) | imm5Encoding | rd(dst) | rs1(src)); } private void twoRegMiscEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src) { twoRegMiscEncoding(instr, size == ASIMDSize.FullReg, eSizeEncoding, dst, src); } private void twoRegMiscEncoding(ASIMDInstruction instr, boolean setQBit, int eSizeEncoding, Register dst, Register src) { int baseEncoding = 0b0_0_0_01110_00_10000_00000_10_00000_00000; emitInt(instr.encoding | baseEncoding | qBit(setQBit) | eSizeEncoding | rd(dst) | rs1(src)); } private void acrossLanesEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src) { int baseEncoding = 0b0_0_0_01110_00_11000_00000_10_00000_00000; emitInt(instr.encoding | baseEncoding | qBit(size) | eSizeEncoding | rd(dst) | rs1(src)); } private void threeDifferentEncoding(ASIMDInstruction instr, boolean setQBit, int eSizeEncoding, Register dst, Register src1, Register src2) { int baseEncoding = 0b0_0_0_01110_00_1_00000_0000_00_00000_00000; emitInt(instr.encoding | baseEncoding | qBit(setQBit) | eSizeEncoding | rd(dst) | rs1(src1) | rs2(src2)); } private void threeSameEncoding(ASIMDInstruction instr, ASIMDSize size, int eSizeEncoding, Register dst, Register src1, Register src2) { int baseEncoding = 0b0_0_0_01110_00_1_00000_00000_1_00000_00000; emitInt(instr.encoding | baseEncoding | qBit(size) | eSizeEncoding | rd(dst) | rs1(src1) | rs2(src2)); } private void modifiedImmEncoding(ImmediateOp op, ASIMDSize size, Register dst, long imm) { int baseEncoding = 0b0_0_0_0111100000_000_0000_0_1_00000_00000; int immEncoding = ASIMDImmediateTable.getEncoding(imm, op); emitInt(baseEncoding | qBit(size) | immEncoding | rd(dst)); } private void shiftByImmEncoding(ASIMDInstruction instr, ASIMDSize size, int imm7, Register dst, Register src) { shiftByImmEncoding(instr, size == ASIMDSize.FullReg, imm7, dst, src); } private void shiftByImmEncoding(ASIMDInstruction instr, boolean setQBit, int imm7, Register dst, Register src) { assert (imm7 & 0b1111_111) == imm7 : imm7; assert (imm7 & 0b1111_111) != 0 : imm7; assert (imm7 & 0b0000_111) != imm7 : imm7; int baseEncoding = 0b0_0_0_011110_0000_000_00000_1_00000_00000; emitInt(instr.encoding | baseEncoding | qBit(setQBit) | imm7 << 16 | rd(dst) | rs1(src)); } /** * C7.2.1 Integer absolute value.
* * for i in 0..n-1 do dst[i] = int_abs(src[i]) * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src SIMD register. */ public void absVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; twoRegMiscEncoding(ASIMDInstruction.ABS, size, elemSizeXX(eSize), dst, src); } /** * C7.2.2 Integer add scalar.
* * dst[0] = int_add(src1[0], src2[0]) *

* Note that only 64-bit (DoubleWord) operations are available. * * @param eSize element size. Must be of type ElementSize.DoubleWord * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void addSSS(ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize == ElementSize.DoubleWord : eSize; // only size supported scalarThreeSameEncoding(ASIMDInstruction.ADD, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.2 Integer add vector.
* * for i in 0..n-1 do dst[i] = int_add(src1[i], src2[i]) * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void addVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.ADD, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.5 Add pairwise vector.
*

* From the manual: "This instruction creates a vector by concatenating the vector elements of * the first source SIMD&FP register after the vector elements of the second source * SIMD&FP register, reads each pair of adjacent vector elements from the concatenated * vector, adds each pair of values together, places the result into a vector, and writes the * vector to the destination SIMD&FP register." * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void addpVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.ADDP, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.6 Add across vector.
* * dst = src[0] + ....+ src[n]. * * @param size register size. * @param elementSize width of each addition operand. * @param dst SIMD register. * @param src SIMD register. */ public void addvSV(ASIMDSize size, ElementSize elementSize, Register dst, Register src) { assert !(size == ASIMDSize.HalfReg && elementSize == ElementSize.Word) : "Invalid size and lane combination for addv"; assert elementSize != ElementSize.DoubleWord : "Invalid lane width for addv"; acrossLanesEncoding(ASIMDInstruction.ADDV, size, elemSizeXX(elementSize), dst, src); } /** * C7.2.7 AES single round decryption.
* * @param dst SIMD register. * @param src SIMD register. */ public void aesd(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicAES(ASIMDInstruction.AESD, dst, src); } /** * C7.2.8 AES single round encryption.
* * @param dst SIMD register. * @param src SIMD register. */ public void aese(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicAES(ASIMDInstruction.AESE, dst, src); } /** * C7.2.9 AES inverse mix columns.
* * @param dst SIMD register. * @param src SIMD register. */ public void aesimc(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicAES(ASIMDInstruction.AESIMC, dst, src); } /** * C7.2.10 AES mix columns.
* * @param dst SIMD register. * @param src SIMD register. */ public void aesmc(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicAES(ASIMDInstruction.AESMC, dst, src); } /** * C7.2.11 Bitwise and vector.
* * for i in 0..n-1 do dst[i] = src1[i] & src2[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void andVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.AND, size, elemSize00, dst, src1, src2); } /** * C7.2.12 Bit Clear and exclusive-OR.
* * Bit Clear and exclusive-OR performs a bitwise AND of the 128-bit vector in a source * SIMD&FP register and the complement of the vector in another source SIMD&FP register, * then performs a bitwise exclusive-OR of the resulting vector and the vector in a third source * SIMD&FP register, and writes the result to the destination SIMD&FP register. * * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. * @param src3 SIMD register. */ public void bcaxVVVV(Register dst, Register src1, Register src2, Register src3) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert src3.getRegisterCategory().equals(SIMD) : src3; cryptographicFour(ASIMDInstruction.BCAX, dst, src1, src2, src3); } /** * C7.2.20 Bitwise bit clear.
* This instruction performs a bitwise and between the SIMD register and the complement of the * provided immediate value. * * dst = dst & ~(imm{1,2}) * * @param size register size. * @param dst SIMD register. * @param imm long value to move. If size is 128, then this value is copied twice */ public void bicVI(ASIMDSize size, Register dst, long imm) { modifiedImmEncoding(ImmediateOp.BIC, size, dst, imm); } /** * C7.2.21 Bitwise bit clear (vector, register).
* This instruction performs a bitwise and between the first source and the complement of the * second source. * * for i in 0..n-1 do dst[i] = src1[i] & ~src2[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void bicVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.BIC, size, elemSize01, dst, src1, src2); } /** * C7.2.22 Bitwise insert if false.
* This instruction inserts each bit from the first source register into the destination * register if the corresponding bit of the second source register is 0, otherwise leave the bit * in the destination register unchanged. * * for i in 0..n-1 do dst[i] = src2[i] == 0 ? src1[i] : dst[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void bifVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.BIF, size, elemSize11, dst, src1, src2); } /** * C7.2.23 Bitwise insert if true.
* This instruction inserts each bit from the first source register into the destination * register if the corresponding bit of the second source register is 1, otherwise leave the bit * in the destination register unchanged. * * for i in 0..n-1 do dst[i] = src2[i] == 1 ? src1[i] : dst[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void bitVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.BIT, size, elemSize10, dst, src1, src2); } /** * C7.2.24 Bitwise select.
* This instruction sets each bit in the destination register to the corresponding bit from the * first source register when the original destination bit was 1, otherwise from the second * source register. * * for i in 0..n-1 do dst[i] = dst[i] == 1 ? src1[i] : src2[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void bslVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.BSL, size, elemSize01, dst, src1, src2); } /** * C7.2.27 Compare bitwise equal.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src1[i] == src2[i] ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void cmeqVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.CMEQ, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.28 Compare bitwise equal to zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src[i] == 0 ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void cmeqZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; twoRegMiscEncoding(ASIMDInstruction.CMEQ_ZERO, size, elemSizeXX(eSize), dst, src); } /** * C7.2.29 Compare signed greater than or equal.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src1[i] >= src2[i] ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void cmgeVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.CMGE, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.30 Compare signed greater than or equal to zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] >= src[i] == 0 ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void cmgeZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; twoRegMiscEncoding(ASIMDInstruction.CMGE_ZERO, size, elemSizeXX(eSize), dst, src); } /** * C7.2.31 Compare signed greater than.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src1[i] > src2[i] ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void cmgtVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.CMGT, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.32 Compare signed greater than zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src[i] > 0 ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void cmgtZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; twoRegMiscEncoding(ASIMDInstruction.CMGT_ZERO, size, elemSizeXX(eSize), dst, src); } /** * C7.2.33 Compare unsigned higher.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = unsigned(src1[i]) > unsigned(src2[i]) ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void cmhiVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.CMHI, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.34 Compare unsigned higher or same.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = unsigned(src1[i]) >= unsigned(src2[i]) ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void cmhsVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.CMHS, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.35 Compare signed less than or equal to zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src[i] <= 0 ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void cmleZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; twoRegMiscEncoding(ASIMDInstruction.CMLE_ZERO, size, elemSizeXX(eSize), dst, src); } /** * C7.2.36 Compare signed less than zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src[i] < 0 ? -1 : 0 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void cmltZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; twoRegMiscEncoding(ASIMDInstruction.CMLT_ZERO, size, elemSizeXX(eSize), dst, src); } /** * C7.2.37 Compare bitwise test bits nonzero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = (src1[i] & src2[i]) == 0 ? 0 : -1 * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void cmtstVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.CMTST, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.38 Population Count per byte.
* * dst[0...n] = countBitCountOfEachByte(src[0...n]), n = size/8. * * @param size register size. * @param dst SIMD register. Should not be null. * @param src SIMD register. Should not be null. */ public void cntVV(ASIMDSize size, Register dst, Register src) { twoRegMiscEncoding(ASIMDInstruction.CNT, size, elemSize00, dst, src); } /** * C7.2.39 Duplicate vector element to scalar.
* Note that, regardless of the source vector element's index, the value is always copied into * the beginning of the destination register (offset 0). * * dst[0] = src[index] * * @param eSize size of value to duplicate. * @param dst SIMD register * @param src SIMD register * @param index offset of value to duplicate */ public void dupSX(ElementSize eSize, Register dst, Register src, int index) { assert src.getRegisterCategory().equals(SIMD) : src; assert dst.getRegisterCategory().equals(SIMD) : dst; assert index >= 0 && index < ASIMDSize.FullReg.bytes() / eSize.bytes() : index + " " + eSize; /* * Technically, this is instruction's encoding format is "advanced simd scalar copy" * (C4-343). */ int baseEncoding = 0b01_0_11110000_00000_0_0000_1_00000_00000; int imm5Encoding = ((index * 2 * eSize.bytes()) | eSize.bytes()) << 16; emitInt(ASIMDInstruction.DUPELEM.encoding | baseEncoding | imm5Encoding | rd(dst) | rn(src)); } /** * C7.2.39 Duplicate vector element to vector.
* * dst[0..n-1] = src[index]{n} * * @param dstSize total size of all duplicates. * @param eSize size of value to duplicate. * @param dst SIMD register. * @param src SIMD register. * @param index offset of value to duplicate */ public void dupVX(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src, int index) { assert src.getRegisterCategory().equals(SIMD) : src; assert dst.getRegisterCategory().equals(SIMD) : dst; copyEncoding(ASIMDInstruction.DUPELEM, dstSize == ASIMDSize.FullReg, eSize, dst, src, index); } /** * C7.2.40 Duplicate general-purpose register to vector.
* * dst(simd) = src(gp){n} * * @param dstSize total size of all duplicates. * @param eSize size of value to duplicate. * @param dst SIMD register. * @param src general-purpose register. */ public void dupVG(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(dstSize, eSize) : dstSize + " " + eSize; assert src.getRegisterCategory().equals(CPU) : src; assert dst.getRegisterCategory().equals(SIMD) : dst; copyEncoding(ASIMDInstruction.DUPGEN, dstSize == ASIMDSize.FullReg, eSize, dst, src, 0); } /** * C7.2.41 Bitwise exclusive or vector.
* * for i in 0..n-1 do dst[i] = src1[i] ^ src2[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void eorVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.EOR, size, elemSize00, dst, src1, src2); } /** * C7.2.42 Bitwise three-way exclusive or vector.
* * for i in 0..127 do dst[i] = src1[i] ^ src2[i] ^ src3[i] * * Three-way Exclusive-OR performs a three-way exclusive-OR of the values in the three source * SIMD&FP registers, and writes the result to the destination SIMD&FP register. * * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. * @param src3 SIMD register. */ public void eor3VVVV(Register dst, Register src1, Register src2, Register src3) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert src3.getRegisterCategory().equals(SIMD) : src3; cryptographicFour(ASIMDInstruction.EOR3, dst, src1, src2, src3); } /** * C7.2.43 Extract from pair of vectors.
*

* From the manual: "This instruction extracts the lowest vector elements from the second source * SIMD&FP register and the highest vector elements from the first source SIMD&FP * register, concatenates the results into a vector, and writes the vector to the destination * SIMD&FP register vector. The index value specifies the lowest vector element to extract * from the first source register, and consecutive elements are extracted from the first, then * second, source registers until the destination vector is filled." For this operation, vector * elements are always byte sized. * * @param size operation size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. * @param src1LowIdx The lowest index of the first source registers to extract */ public void extVVV(ASIMDSize size, Register dst, Register src1, Register src2, int src1LowIdx) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; /* Must include at least one byte from src1 */ assert src1LowIdx >= 0 && src1LowIdx < size.bytes() : src1LowIdx + " " + size; /* * Technically, this instruction's encoding format is "advanced simd extract" (C4-356) */ int baseEncoding = 0b0_0_101110_00_0_00000_0_0000_0_00000_00000; emitInt(ASIMDInstruction.EXT.encoding | baseEncoding | qBit(size) | src1LowIdx << 11 | rd(dst) | rs1(src1) | rs2(src2)); } /** * C7.2.45 Floating-point absolute value.
* * for i in 0..n-1 do dst[i] = fp_abs(src[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fabsVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; twoRegMiscEncoding(ASIMDInstruction.FABS, size, elemSize1X(eSize), dst, src); } /** * C7.2.47 Floating-point absolute compare greater than or equal.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = fp_abs(src1[i]) >= fp_abs(src2[i]) ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void facgeVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.FACGE, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.48 Floating-point absolute compare greater than.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = fp_abs(src1[i]) > fp_abs(src2[i]) ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void facgtVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.FACGT, size, elemSize1X(eSize), dst, src1, src2); } /** * C7.2.48 Floating-point absolute compare greater than.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * dst = fp_abs(src1) > fp_abs(src2) > -1 : 0 * * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void facgtSSS(ElementSize eSize, Register dst, Register src1, Register src2) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; scalarThreeSameEncoding(ASIMDInstruction.FACGT, elemSize1X(eSize), dst, src1, src2); } /** * C7.2.49 floating point add vector.
* * for i in 0..n-1 do dst[i] = fp_add(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void faddVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; threeSameEncoding(ASIMDInstruction.FADD, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.56 Floating-point compare equal.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src1[i] == src2[i] ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fcmeqVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.FCMEQ, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.57 Floating-point compare equal to zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src[i] == 0 ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fcmeqZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.FCMEQ_ZERO, size, elemSize1X(eSize), dst, src); } /** * C7.2.58 Floating-point compare greater than or equal.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src1[i] >= src2[i] ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fcmgeVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.FCMGE, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.59 Floating-point compare greater than or equal to zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] >= src[i] == 0 ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fcmgeZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.FCMGE_ZERO, size, elemSize1X(eSize), dst, src); } /** * C7.2.60 Floating-point compare greater than.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] = src1[i] > src2[i] ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fcmgtVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.FCMGT, size, elemSize1X(eSize), dst, src1, src2); } /** * C7.2.61 Floating-point compare greater than zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] > src[i] == 0 ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fcmgtZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.FCMGT_ZERO, size, elemSize1X(eSize), dst, src); } /** * C7.2.64 Floating-point compare less than or equal to zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] <= src[i] == 0 ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fcmleZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.FCMLE_ZERO, size, elemSize1X(eSize), dst, src); } /** * C7.2.65 Floating-point compare less than zero.
*

* For elements which the comparison is true, all bits of the corresponding dst lane are set to * 1. Otherwise, if the comparison is false, then the corresponding dst lane is cleared. * * for i in 0..n-1 do dst[i] < src[i] == 0 ? -1 : 0 * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fcmltZeroVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.FCMLT_ZERO, size, elemSize1X(eSize), dst, src); } /** * C7.2.74 Floating-point convert to higher precision long.
* * @param srcESize source element size. Must be ElementSize.HalfWord or ElementSize.Word. The * destination element size will be double this width. * @param dst SIMD register. * @param src SIMD register. */ public void fcvtlVV(ElementSize srcESize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert srcESize == ElementSize.HalfWord || srcESize == ElementSize.Word : srcESize; twoRegMiscEncoding(ASIMDInstruction.FCVTL, false, elemSize0X(srcESize), dst, src); } /** * C7.2.79 Floating-point convert to lower precision narrow.
* * @param srcESize source element size. Must be ElementSize.Word or ElementSize.DoubleWord. The * destination element size will be half this width. * @param dst SIMD register. * @param src SIMD register. */ public void fcvtnVV(ElementSize srcESize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert srcESize == ElementSize.Word || srcESize == ElementSize.DoubleWord : srcESize; twoRegMiscEncoding(ASIMDInstruction.FCVTN, false, elemSize0X(srcESize), dst, src); } /** * C7.2.90 Floating-point convert to to signed integer, rounding toward zero.
* * @param size register size. * @param eSize source element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fcvtzsVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.FCVTZS, size, elemSize1X(eSize), dst, src); } /** * C7.2.97 floating point divide vector.
* * for i in 0..n-1 do dst[i] = fp_div(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fdivVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.FDIV, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.101 floating-point maximum.
* * for i in 0..n-1 do dst[i] = fp_max(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fmaxVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.FMAX, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.111 floating-point minimum.
* * for i in 0..n-1 do dst[i] = fp_min(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fminVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.FMIN, size, elemSize1X(eSize), dst, src1, src2); } /** * C7.2.122 Floating-point fused multiply-add to accumulator.
* * for i in 0..n-1 do dst[i] += fp_multiply(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fmlaVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.FMLA, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.126 Floating-point fused multiply-subtract from accumulator.
* * for i in 0..n-1 do dst[i] -= fp_multiply(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fmlsVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.FMLS, size, elemSize1X(eSize), dst, src1, src2); } /** * C7.2.132 Floating-point move immediate.
* * dst = imm64{1,2} * * @param size register size. * @param dst SIMD register. * @param imm64 64-bit value to move. Is copied twice if register size is 128. */ public void fmovVI(ASIMDSize size, ElementSize eSize, Register dst, long imm64) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; ImmediateOp op = eSize == ElementSize.DoubleWord ? ImmediateOp.FMOVDP : ImmediateOp.FMOVSP; modifiedImmEncoding(op, size, dst, imm64); } /** * C7.2.135 floating point multiply vector.
* * for i in 0..n-1 do dst[i] = fp_mul(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fmulVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.FMUL, size, elemSize0X(eSize), dst, src1, src2); } /** * C7.2.139 Floating-point negate.
* * for i in 0..n-1 do dst[i] = -src[i] * * @param size register size. * @param eSize source element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fnegVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.FNEG, size, elemSize1X(eSize), dst, src); } /** * C7.2.171 Floating-point square root.
* * for i in 0..n-1 do dst[i] = fp_sqrt(src[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void fsqrtVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; twoRegMiscEncoding(ASIMDInstruction.FSQRT, size, elemSize1X(eSize), dst, src); } /** * C7.2.173 floating point subtract vector.
* * for i in 0..n-1 do dst[i] = fp_sub(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Must be ElementSize.Word or ElementSize.DoubleWord. Note * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void fsubVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.FSUB, size, elemSize1X(eSize), dst, src1, src2); } /** * C7.2.175 Insert vector element from another vector element.
* * This instruction copies the vector element of the source register to the specified vector * element of the destination register. * * @param eSize size of value to duplicate. * @param dst SIMD register. * @param dstIdx offset of value to store. * @param src SIMD register. * @param srcIdx offset of value to duplicate. */ public void insXX(ElementSize eSize, Register dst, int dstIdx, Register src, int srcIdx) { assert dstIdx >= 0 && dstIdx < ASIMDSize.FullReg.bytes() / eSize.bytes() : dstIdx + " " + eSize; assert srcIdx >= 0 && srcIdx < ASIMDSize.FullReg.bytes() / eSize.bytes() : srcIdx + " " + eSize; int srcIdxEncoding = (srcIdx * eSize.bytes()) << 11; copyEncoding(ASIMDInstruction.INSELEM, srcIdxEncoding, true, eSize, dst, src, dstIdx); } /** * C7.2.176 Insert vector element from general-purpose register.
* * dst[index] = src * * Note the rest of the dst register is unaltered. * * @param eSize size of value to duplicate. * @param dst SIMD register. * @param index offset of value to duplicate * @param src SIMD register. */ public void insXG(ElementSize eSize, Register dst, int index, Register src) { copyEncoding(ASIMDInstruction.INSGEN, true, eSize, dst, src, index); } /** * C7.2.177 Load multiple single-element structures to one register.
* * This instruction loads multiple single-element structures from memory and writes the result * to one register. * * @param size register size. * @param eSize element size. * @param dst destination of first structure's value * @param addr address of first structure. */ public void ld1MultipleV(ASIMDSize size, ElementSize eSize, Register dst, AArch64Address addr) { assert dst.getRegisterCategory().equals(SIMD) : dst; loadStoreMultipleStructures(ASIMDInstruction.LD1_MULTIPLE_1R, size, eSize, dst, addr); } /** * C7.2.177 Load multiple single-element structures to two registers.
* * This instruction loads multiple single-element structures from memory and writes the result * to two registers. Note the two registers must be consecutive (modulo the number of SIMD * registers). * * @param size register size. * @param eSize element size. * @param dst1 destination of first structure's value. * @param dst2 destination of second structure's value. Must be register after dst1. * @param addr address of first structure. */ public void ld1MultipleVV(ASIMDSize size, ElementSize eSize, Register dst1, Register dst2, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(dst1, dst2); loadStoreMultipleStructures(ASIMDInstruction.LD1_MULTIPLE_2R, size, eSize, dst1, addr); } /** * C7.2.177 Load multiple single-element structures to three registers.
* * This instruction loads multiple single-element structures from memory and writes the result * to three registers. Note the three registers must be consecutive (modulo the number of SIMD * registers). * * @param size register size. * @param eSize element size. * @param dst1 destination of first structure's value. * @param dst2 destination of second structure's value. Must be register after dst1. * @param dst3 destination of third structure's value. Must be register after dst2. * @param addr address of first structure. */ public void ld1MultipleVVV(ASIMDSize size, ElementSize eSize, Register dst1, Register dst2, Register dst3, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(dst1, dst2, dst3); loadStoreMultipleStructures(ASIMDInstruction.LD1_MULTIPLE_3R, size, eSize, dst1, addr); } /** * C7.2.177 Load multiple single-element structures to four registers.
* * This instruction loads multiple single-element structures from memory and writes the result * to four registers. Note the four registers must be consecutive (modulo the number of SIMD * registers). * * @param size register size. * @param eSize element size. * @param dst1 destination of first structure's value. * @param dst2 destination of second structure's value. Must be register after dst1. * @param dst3 destination of third structure's value. Must be register after dst2. * @param dst4 destination of fourth structure's value. Must be register after dst3. * @param addr address of first structure. */ public void ld1MultipleVVVV(ASIMDSize size, ElementSize eSize, Register dst1, Register dst2, Register dst3, Register dst4, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(dst1, dst2, dst3, dst4); loadStoreMultipleStructures(ASIMDInstruction.LD1_MULTIPLE_4R, size, eSize, dst1, addr); } /** * C7.2.179 Load one single-element structure and replicate to all lanes (of one register).
* * This instruction loads a single-element structure from memory and replicates the structure to * all lanes of the register. * * @param size register size. * @param eSize element size of value to replicate. * @param dst SIMD register. * @param addr address of structure. */ public void ld1rV(ASIMDSize size, ElementSize eSize, Register dst, AArch64Address addr) { assert dst.getRegisterCategory().equals(SIMD) : dst; loadStoreSingleStructure(ASIMDInstruction.LD1R, size, eSize, dst, addr); } /** * C7.2.180 Load multiple 2-element structures to two registers, with de-interleaving.
* * This instruction loads multiple 2-element structures from memory and writes the result to two * registers. Note the two registers must be consecutive (modulo the number of SIMD * registers).
* * * memory at addr: b0 b1 b2 b3 b4 ...
* result in dst1: b0 b2 b4 ...
* result in dst2: b1 b3 b5 ...
*
* * @param size register size. * @param eSize element size. * @param dst1 destination of structure's first value. * @param dst2 destination of structure's second value. Must be register after dst1. * @param addr address of first structure. */ public void ld2MultipleVV(ASIMDSize size, ElementSize eSize, Register dst1, Register dst2, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(dst1, dst2); assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; loadStoreMultipleStructures(ASIMDInstruction.LD2_MULTIPLE_2R, size, eSize, dst1, addr); } /** * C7.2.186 Load multiple 4-element structures to four registers, with de-interleaving.
* * This instruction loads multiple 4-element structures from memory and writes the result to * four registers. Note the four registers must be consecutive (modulo the number of SIMD * registers).
* * * memory at addr: b0 b1 b2 b3 b4 ...
* result in dst1: b0 b4 b8 ...
* result in dst2: b1 b5 b9 ...
* result in dst3: b2 b6 b10 ...
* result in dst4: b3 b7 b11 ...
*
* * @param size register size. * @param eSize element size. * @param dst1 destination of structure's first value. * @param dst2 destination of structure's second value. Must be register after dst1. * @param dst3 destination of structure's third value. Must be register after dst2. * @param dst4 destination of structure's fourth value. Must be register after dst3. * @param addr address of first structure. */ public void ld4MultipleVVVV(ASIMDSize size, ElementSize eSize, Register dst1, Register dst2, Register dst3, Register dst4, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(dst1, dst2, dst3, dst4); assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; loadStoreMultipleStructures(ASIMDInstruction.LD4_MULTIPLE_4R, size, eSize, dst1, addr); } /** * C7.2.188 Load single 4-element structure and Replicate to all lanes of four registers.
* * This instruction loads a 4-element structure from memory and replicates the structure to all * lanes of the four registers. * * @param size register size. * @param eSize element size of value to replicate. * @param dst1 destination of first structure's value. * @param dst2 destination of second structure's value. Must be register after dst1. * @param dst3 destination of third structure's value. Must be register after dst2. * @param dst4 destination of fourth structure's value. Must be register after dst3. * @param addr address of first structure. */ public void ld4rVVVV(ASIMDSize size, ElementSize eSize, Register dst1, Register dst2, Register dst3, Register dst4, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(dst1, dst2, dst3, dst4); loadStoreSingleStructure(ASIMDInstruction.LD4R, size, eSize, dst1, addr); } /** * C7.2.196 Multiply-add to accumulator.
* * for i in 0..n-1 do dst[i] += int_multiply(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Cannot be ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void mlaVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.MLA, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.198 Multiply-subtract from accumulator.
* * for i in 0..n-1 do dst[i] -= int_multiply(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Cannot be ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void mlsVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.MLS, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.204 Move immediate.
* * dst = imm{1,2} * * @param size register size. * @param dst SIMD register. * @param imm long value to move. If size is 128, then this value is copied twice */ public void moviVI(ASIMDSize size, Register dst, long imm) { assert dst.getRegisterCategory().equals(SIMD) : dst; modifiedImmEncoding(ImmediateOp.MOVI, size, dst, imm); } /** * C7.2.206 Integer multiply vector.
* * for i in 0..n-1 do dst[i] = int_mul(src1[i], src2[i]) * * @param size register size. * @param eSize element size. Cannot be ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void mulVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : eSize; threeSameEncoding(ASIMDInstruction.MUL, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.208 Move inverted immediate.
* * dst = ~(imm{1,2}) * * @param size register size. * @param dst SIMD register. * @param imm long value to move. If size is 128, then this value is copied twice */ public void mvniVI(ASIMDSize size, Register dst, long imm) { modifiedImmEncoding(ImmediateOp.MVNI, size, dst, imm); } /** * C7.2.209 Negate.
* * for i in 0..n-1 do dst[i] = -src[i] * * @param size register size. * @param eSize source element size. ElementSize.DoubleWord is only applicable when size is 128 * (i.e. the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void negVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; twoRegMiscEncoding(ASIMDInstruction.NEG, size, elemSizeXX(eSize), dst, src); } /** * C7.2.210 Bitwise not vector.
* * for i in 0..n-1 do dst[i] = ~src[i] * * @param size register size. * @param dst SIMD register. * @param src SIMD register. */ public void notVV(ASIMDSize size, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; twoRegMiscEncoding(ASIMDInstruction.NOT, size, elemSize00, dst, src); } /** * C7.2.211 Bitwise inclusive or not vector.
* * for i in 0..n-1 do dst[i] = src1[i] | ~src2[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void ornVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.ORN, size, elemSize11, dst, src1, src2); } /** * C7.2.212 Bitwise inclusive or.
* * dst = dst | imm{1,2} * * @param size register size. * @param dst SIMD register. * @param imm long value to move. If size is 128, then this value is copied twice */ public void orrVI(ASIMDSize size, Register dst, long imm) { modifiedImmEncoding(ImmediateOp.ORR, size, dst, imm); } /** * C7.2.213 Bitwise inclusive or vector.
* * for i in 0..n-1 do dst[i] = src1[i] | src2[i] * * @param size register size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void orrVVV(ASIMDSize size, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.ORR, size, elemSize10, dst, src1, src2); } /** * C7.2.215 Polynomial Multiply Long (lower half).
* * This instruction multiplies corresponding elements in the lower half of the vectors. * * @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void pmullVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.PMULL, false, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.215 Polynomial Multiply Long (upper half).
* * This instruction multiplies corresponding elements in the upper half of the vectors. * * @param srcESize source element size. Must be ElementSize.Byte or ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void pmull2VVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize == ElementSize.Byte || srcESize == ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.PMULL, true, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.217 Rotate and Exclusive-OR.
* * Rotate and Exclusive-OR rotates each 64-bit element of the 128-bit vector in a source * SIMD&FP register left by 1, performs a bitwise exclusive-OR of the resulting 128-bit * vector and the vector in another source SIMD&FP register, and writes the result to the * destination SIMD&FP register. * * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void rax1VVV(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA512(ASIMDInstruction.RAX1, dst, src1, src2); } /** * C7.2.218 Reverse Bit order.
* This instruction reverses the bits in each byte. * * @param size register size. * @param dst SIMD register. * @param src SIMD register. */ public void rbitVV(ASIMDSize size, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; twoRegMiscEncoding(ASIMDInstruction.RBIT, size, elemSize01, dst, src); } /** * C7.2.219 Reverse elements in 16-bit halfwords.
* This instruction reverses the order of 8-bit elements in each halfword. * * @param size register size. * @param dst SIMD register. * @param src SIMD register. */ public void rev16VV(ASIMDSize size, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; twoRegMiscEncoding(ASIMDInstruction.REV16, size, elemSize00, dst, src); } /** * C7.2.220 Reverse elements in 32-bit words.
* This instruction reverses the order of elements of size revGranularity in each 32-bit word. * * * @param size register size. * @param revGranularity within each element at what granularity the bits should be reversed. * Can be of size Byte of HalfWord * @param dst SIMD register. * @param src SIMD register. */ public void rev32VV(ASIMDSize size, ElementSize revGranularity, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert revGranularity == ElementSize.Byte || revGranularity == ElementSize.HalfWord : revGranularity; twoRegMiscEncoding(ASIMDInstruction.REV32, size, elemSizeXX(revGranularity), dst, src); } /** * C7.2.221 Reverse elements in 64-bit words.
* This instruction reverses the order of elements of size revGranularity in each 64-bit word. * * @param size register size. * @param revGranularity within each element at what granularity the bits should be reversed. * DoubleWord is not allowed. * @param dst SIMD register. * @param src SIMD register. */ public void rev64VV(ASIMDSize size, ElementSize revGranularity, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert revGranularity != ElementSize.DoubleWord : revGranularity; twoRegMiscEncoding(ASIMDInstruction.REV64, size, elemSizeXX(revGranularity), dst, src); } /** * C7.2.231 Signed add across long vector.
* * dst = src[0] + ....+ src[n].
*

* Dst is twice the width of the vector elements, so overflow is not possible. * * @param size register size. * @param elementSize Unexpanded width of each addition operand. * @param dst SIMD register. Should not be null. * @param src SIMD register. Should not be null. */ public void saddlvSV(ASIMDSize size, ElementSize elementSize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert !(size == ASIMDSize.HalfReg && elementSize == ElementSize.Word) : "Invalid size and lane combination for saddlv"; assert elementSize != ElementSize.DoubleWord : "Invalid lane width for saddlv"; acrossLanesEncoding(ASIMDInstruction.SADDLV, size, elemSizeXX(elementSize), dst, src); } /** * C7.2.234 Signed integer convert to floating-point.
* * @param size register size. * @param eSize source element size. Must be ElementSize.Word or ElementSize.DoubleWord. * ElementSize.DoubleWord is only applicable when size is 128 (i.e. the operation is * performed on more than one element). * @param dst SIMD register. * @param src SIMD register. */ public void scvtfVV(ASIMDSize size, ElementSize eSize, Register dst, Register src) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert eSize == ElementSize.Word || eSize == ElementSize.DoubleWord : eSize; twoRegMiscEncoding(ASIMDInstruction.SCVTF, size, elemSize0X(eSize), dst, src); } /** * C7.2.239 SHA1 hash update.
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha1c(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA(ASIMDInstruction.SHA1C, dst, src1, src2); } /** * C7.2.240 SHA1 fixed rotate.
* * @param dst SIMD register. * @param src SIMD register. */ public void sha1h(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicTwoSHA(ASIMDInstruction.SHA1H, dst, src); } /** * C7.2.241 SHA1 hash update (majority).
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha1m(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA(ASIMDInstruction.SHA1M, dst, src1, src2); } /** * C7.2.242 SHA1 hash update (parity).
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha1p(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA(ASIMDInstruction.SHA1P, dst, src1, src2); } /** * C7.2.243 SHA1 schedule update 0.
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha1su0(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA(ASIMDInstruction.SHA1SU0, dst, src1, src2); } /** * C7.2.244 SHA1 schedule update 1.
* * @param dst SIMD register. * @param src SIMD register. */ public void sha1su1(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicTwoSHA(ASIMDInstruction.SHA1SU1, dst, src); } /** * C7.2.245 SHA256 hash update (part 2).
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha256h2(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA(ASIMDInstruction.SHA256H2, dst, src1, src2); } /** * C7.2.246 SHA256 hash update (part 1).
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha256h(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA(ASIMDInstruction.SHA256H, dst, src1, src2); } /** * C7.2.247 SHA256 schedule update 0.
* * @param dst SIMD register. * @param src SIMD register. */ public void sha256su0(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicTwoSHA(ASIMDInstruction.SHA256SU0, dst, src); } /** * C7.2.248 SHA256 schedule update 1.
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha256su1(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA(ASIMDInstruction.SHA256SU1, dst, src1, src2); } /** * C7.2.249 SHA512 Hash update part 1.
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha512h(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA512(ASIMDInstruction.SHA512H, dst, src1, src2); } /** * C7.2.250 SHA512 Hash update part 2.
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha512h2(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA512(ASIMDInstruction.SHA512H2, dst, src1, src2); } /** * C7.2.251 SHA512 Schedule Update 0.
* * @param dst SIMD register. * @param src SIMD register. */ public void sha512su0(Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; cryptographicTwoSHA512(ASIMDInstruction.SHA512SU0, dst, src); } /** * C7.2.252 SHA512 Schedule Update 1.
* * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sha512su1(Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; cryptographicThreeSHA512(ASIMDInstruction.SHA512SU1, dst, src1, src2); } /** * C7.2.254 shift left (immediate).
* * for i in 0..n-1 do dst[i] = src[i] << imm * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift amount. */ public void shlVVI(ASIMDSize size, ElementSize eSize, Register dst, Register src, int shiftAmt) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; /* Accepted shift range */ assert shiftAmt >= 0 && shiftAmt < eSize.nbits : shiftAmt + " " + eSize; /* shift = imm7 - eSize.nbits */ int imm7 = eSize.nbits + shiftAmt; shiftByImmEncoding(ASIMDInstruction.SHL, size, imm7, dst, src); } /** * C7.2.258 shift right narrow *

* From the manual: "This instruction reads each unsigned integer value from the source * SIMD&FP register, right shifts each result by an immediate value, put the final result * into a vector, and writes the vector to the lower or upper half of the destination * SIMD&FP register. The destination vector elements are half as long as the source vector * elements. The results are truncated..." * * * for i in 0..(n/2)-1 do dst_bits[i * size, (i+1) * size] = truncate(src_bits[i * 2 * size, (i+1) * 2 * size] >>> shift) * for i in n/2..n-1 do dst[i] = 0 * * * @param dstESize destination element size. * @param dst SIMD register. * @param src SIMD register. * @param shift the shift amount. */ public void shrnVV(ElementSize dstESize, Register dst, Register src, int shift) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert dstESize != ElementSize.DoubleWord : "Invalid lane width for shrn"; assert shift > 0 && shift <= dstESize.nbits : shift + " " + dstESize; // shift = dstESize.nbits * 2 - imm7 int imm7 = dstESize.nbits * 2 - shift; shiftByImmEncoding(ASIMDInstruction.SHRN, false, imm7, dst, src); } /** * C7.2.268 Signed maximum.
* * for i in 0..n-1 do dst[i] = int_max(src1[i], src2[i]) * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void smaxVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : "Invalid lane width for smax"; threeSameEncoding(ASIMDInstruction.SMAX, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.271 Signed minimum.
* * for i in 0..n-1 do dst[i] = int_min(src1[i], src2[i]) * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sminVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : "Invalid lane width for smin"; threeSameEncoding(ASIMDInstruction.SMIN, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.272 Signed minimum pairwise.
* * * concat = src2:src1 * for i in 0..n-1 do dst[i] = int_min(concat[2 * i], concat[2 * i + 1]) * * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sminpVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : "Invalid lane width for sminp"; threeSameEncoding(ASIMDInstruction.SMINP, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.275 Signed Multiply-Add Long.
* * for i in 0..n-1 do dst[i] += int_multiply(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be double this width. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void smlalVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.SMLAL, false, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.277 Signed Multiply-Subtract Long.
* * for i in 0..n-1 do dst[i] -= int_multiply(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be double this width. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void smlslVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.SMLSL, false, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.279 Signed move vector element to general-purpose register.
* * dst (gp) = sign-extend(src[index]) (simd). *

* Note that the target register size (dst) must be greater than the source element size. * * @param dstESize width of sign-extended. * @param srcESize width of element to move. * @param dst general-purpose register. * @param src SIMD register. * @param index offset of value to move. */ public void smovGX(ElementSize dstESize, ElementSize srcESize, Register dst, Register src, int index) { assert srcESize != ElementSize.DoubleWord : srcESize; assert dstESize == ElementSize.Word || dstESize == ElementSize.DoubleWord : dstESize; assert srcESize.nbits < dstESize.nbits : "the target size must be larger than the source size"; assert dst.getRegisterCategory().equals(CPU) : dst; assert src.getRegisterCategory().equals(SIMD) : src; copyEncoding(ASIMDInstruction.SMOV, dstESize == ElementSize.DoubleWord, srcESize, dst, src, index); } /** * C7.2.315 signed shift left (register).
* * for i in 0..n-1 do
* if(byte(src2[i] > 0
* dst[i] = (src1[i] << byte(src2[i]
* else
* dst[i] = (src1[i] >> byte(src2[i])
* * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void sshlVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.SSHL, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.316 Signed shift left long (immediate).
*

* From the manual: "This instruction reads each vector element from the source SIMD&FP * register, left shifts each vector element by the specified shift amount ... The destination * vector elements are twice as long as the source vector elements. All the values in this * instruction are signed integer values." * * Extracts vector elements from the lower half of the source register. * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be double this width. * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift left amount. */ public void sshllVVI(ElementSize srcESize, Register dst, Register src, int shiftAmt) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert srcESize != ElementSize.DoubleWord : srcESize; /* Accepted shift range */ assert shiftAmt >= 0 && shiftAmt < srcESize.nbits : shiftAmt + " " + srcESize; /* shift = imm7 - srcESize.nbits */ int imm7 = srcESize.nbits + shiftAmt; shiftByImmEncoding(ASIMDInstruction.SSHLL, false, imm7, dst, src); } /** * C7.2.316 Signed shift left long (immediate).
* *

* From the manual: "This instruction reads each vector element from the source SIMD&FP * register, left shifts each vector element by the specified shift amount ... The destination * vector elements are twice as long as the source vector elements. All the values in this * instruction are signed integer values." * * Extracts vector elements from the upper half of the source register. * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be double this width. * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift left amount. */ public void sshll2VVI(ElementSize srcESize, Register dst, Register src, int shiftAmt) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert srcESize != ElementSize.DoubleWord : srcESize; /* Accepted shift range */ assert shiftAmt >= 0 && shiftAmt < srcESize.nbits : shiftAmt + " " + srcESize; /* shift = imm7 - srcESize.nbits */ int imm7 = srcESize.nbits + shiftAmt; shiftByImmEncoding(ASIMDInstruction.SSHLL, true, imm7, dst, src); } /** * C7.2.317 signed shift right (immediate).
* * for i in 0..n-1 do dst[i] = src[i] >> imm * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift right amount. */ public void sshrVVI(ASIMDSize size, ElementSize eSize, Register dst, Register src, int shiftAmt) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; /* Accepted shift range */ assert shiftAmt > 0 && shiftAmt <= eSize.nbits : shiftAmt + " " + eSize; /* shift = eSize.nbits * 2 - imm7 */ int imm7 = eSize.nbits * 2 - shiftAmt; shiftByImmEncoding(ASIMDInstruction.SSHR, size, imm7, dst, src); } /** * C7.2.319 Integer subtract vector Long.
* The destination vector elements are twice as long as the source vector elements.
* * for i in 0..(n/2)-1 do dst[i] = int_sub(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void ssublVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.SSUBL, false, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.319 Integer subtract vector Long upper half.
* The destination vector elements are twice as long as the source vector elements.
* * for i in (n/2)..n-1 do dst[i] = int_sub(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void ssubl2VVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.SSUBL, true, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.321 Store multiple single-element structures from one register.
* * This instruction stores elements to memory from one register. * * @param size register size. * @param eSize element size. * @param src value to store in structure. * @param addr address of first structure. */ public void st1MultipleV(ASIMDSize size, ElementSize eSize, Register src, AArch64Address addr) { assert src.getRegisterCategory().equals(SIMD) : src; loadStoreMultipleStructures(ASIMDInstruction.ST1_MULTIPLE_1R, size, eSize, src, addr); } /** * C7.2.321 Store multiple single-element structures from two registers.
* * This instruction stores elements to memory from two registers. Note the two registers must be * consecutive (modulo the number of SIMD registers). * * @param size register size. * @param eSize element size. * @param src1 value to store in first structure. * @param src2 value to store in second structure. Must be register after src1. * @param addr address of first structure. */ public void st1MultipleVV(ASIMDSize size, ElementSize eSize, Register src1, Register src2, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(src1, src2); loadStoreMultipleStructures(ASIMDInstruction.ST1_MULTIPLE_2R, size, eSize, src1, addr); } /** * C7.2.321 Store multiple single-element structures from three registers.
* * This instruction stores elements to memory from three registers. Note the three registers * must be consecutive (modulo the number of SIMD registers). * * @param size register size. * @param eSize element size. * @param src1 value to store in first structure. * @param src2 value to store in second structure. Must be register after src1. * @param src3 value to store in third structure. Must be register after src2. * @param addr address of first structure. */ public void st1MultipleVVV(ASIMDSize size, ElementSize eSize, Register src1, Register src2, Register src3, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(src1, src2, src3); loadStoreMultipleStructures(ASIMDInstruction.ST1_MULTIPLE_3R, size, eSize, src1, addr); } /** * C7.2.321 Store multiple single-element structures from four registers.
* * This instruction stores elements to memory from four registers. Note the four registers must * be consecutive (modulo the number of SIMD registers). * * @param size register size. * @param eSize element size. * @param src1 value to store in first structure. * @param src2 value to store in second structure. Must be register after src1. * @param src3 value to store in third structure. Must be register after src2. * @param src4 value to store in fourth structure. Must be register after src3. * @param addr address of first structure. */ public void st1MultipleVVVV(ASIMDSize size, ElementSize eSize, Register src1, Register src2, Register src3, Register src4, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(src1, src2, src3, src4); loadStoreMultipleStructures(ASIMDInstruction.ST1_MULTIPLE_4R, size, eSize, src1, addr); } /** * C7.2.323 Store multiple 2-element structures to memory, with interleaving.
* * Note the registers must be consecutive (modulo the number of SIMD registers).
* * * src1: b0 b2 b4 ...
* src2: b1 b3 b5 ...
* result in memory at addr: b0 b1 b2 b3 b4 ...
*
* * @param size register size. * @param eSize element size. * @param src1 structure's first value. * @param src2 structure's second value. Must be register after src1. * @param addr destination address of first structure. */ public void st2MultipleVV(ASIMDSize size, ElementSize eSize, Register src1, Register src2, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(src1, src2); assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; loadStoreMultipleStructures(ASIMDInstruction.ST2_MULTIPLE_2R, size, eSize, src1, addr); } /** * C7.2.327 Store multiple 4-element structures to memory, with interleaving.
* * Note the registers must be consecutive (modulo the number of SIMD registers).
* * * src1: b0 b4 b8 ...
* src2: b1 b5 b9 ...
* src3: b2 b6 b10 ...
* src4: b3 b7 b11 ...
* result in memory at addr: b0 b1 b2 b3 b4 ...
*
* * @param size register size. * @param eSize element size. * @param src1 structure's first value. * @param src2 structure's second value. Must be register after src1. * @param src3 structure's third value. Must be register after src2. * @param src4 structure's fourth value. Must be register after src3. * @param addr destination address of first structure. */ public void st4MultipleVVVV(ASIMDSize size, ElementSize eSize, Register src1, Register src2, Register src3, Register src4, AArch64Address addr) { assert assertConsecutiveSIMDRegisters(src1, src2, src3, src4); assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; loadStoreMultipleStructures(ASIMDInstruction.ST4_MULTIPLE_4R, size, eSize, src1, addr); } /** * C7.2.334 Integer subtract scalar.
* * dst[0] = int_sub(src1[0], src2[0]) *

* Note that only 64-bit (DoubleWord) operations are available. * * @param eSize element size. Must be of type ElementSize.DoubleWord * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void subSSS(ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize == ElementSize.DoubleWord : eSize; // only size supported scalarThreeSameEncoding(ASIMDInstruction.SUB, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.334 Integer subtract vector.
* * for i in 0..n-1 do dst[i] = int_sub(src1[i], src2[i]) * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void subVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.SUB, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.339 Table vector lookup (single register table variant).
* * This instruction is used to perform permutations at a byte granularity. Within the * destination, each byte is determined by using the index register to pick either a value * within the table register, or if the index exceeds the table's boundary, then the dst * register is set to zero. * *

     * tbl[0..n-1] = table[0..n-1]
     * for i in 0..n-1 {
     *     idx = index[i]
     *     if (index < n)
     *      dst[i] = tbl[idx]
     *     else
     *      dst[i] = 0
     * }
     * 
* * @param size register size. * @param dst SIMD register. * @param table SIMD register. * @param index SIMD register. */ public void tblVVV(ASIMDSize size, Register dst, Register table, Register index) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert table.getRegisterCategory().equals(SIMD) : table; assert index.getRegisterCategory().equals(SIMD) : index; tableLookupEncoding(ASIMDInstruction.TBL, size, 1, dst, table, index); } /** * C7.2.339 Table vector lookup (two register table variant).
* * This instruction is used to perform permutations at a byte granularity. A table is formed by * combining the two table registers. Within the destination, each byte is determined by using * the index register to pick either a value within the table, or if the index exceeds the * table's boundary, then the dst register is set to 0. * *
     * tbl[0..n-1] = table1[0..n-1]
     * tbl[n..2n-1] = table2[0..n-1]
     * for i in 0..n-1 {
     *     idx = index[i]
     *     if (index < 2n)
     *      dst[i] = tbl[idx]
     *     else
     *      dst[i] = 0
     * }
     * 
* * @param size register size. * @param dst SIMD register. * @param table1 SIMD register. * @param table2 SIMD register. * @param index SIMD register. */ public void tblVVVV(ASIMDSize size, Register dst, Register table1, Register table2, Register index) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert assertConsecutiveSIMDRegisters(table1, table2); assert index.getRegisterCategory().equals(SIMD) : index; tableLookupEncoding(ASIMDInstruction.TBL, size, 2, dst, table1, index); } /** * C7.2.440 Table vector lookup extension (single register table variant).
* * This instruction is used to perform permutations at a byte granularity. Within the * destination, each byte is determined by using the index register to pick either a value * within the table register, or if the index exceeds the table's boundary, then the dst * register is unchanged. * *
     * tbl[0..n-1] = table[0..n-1]
     * for i in 0..n-1 {
     *     idx = index[i]
     *     if (index < n)
     *      dst[i] = tbl[idx]
     * }
     * 
* * @param size register size. * @param dst SIMD register. * @param table SIMD register. * @param index SIMD register. */ public void tbxVVV(ASIMDSize size, Register dst, Register table, Register index) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert table.getRegisterCategory().equals(SIMD) : table; assert index.getRegisterCategory().equals(SIMD) : index; tableLookupEncoding(ASIMDInstruction.TBX, size, 1, dst, table, index); } /** * C7.2.341 Transpose vectors (primary). *

* From the manual: "This instructions reads corresponding even-numbered vector elements from * the two registers, starting at zero, [and] places each result into consecutive elements of a * vector." * * @param dstSize register size of destination register. Note only half of this size will be * used within the source registers. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void trn1VVV(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert usesMultipleLanes(dstSize, eSize) : dstSize + " " + eSize; permuteEncoding(ASIMDInstruction.TRN1, dstSize, eSize, dst, src1, src2); } /** * C7.2.342 Transpose vectors (secondary). *

* From the manual: "This instructions reads corresponding odd-numbered vector elements from the * two registers, starting at zero, [and] places each result into consecutive elements of a * vector." * * @param dstSize register size of destination register. Note only half of this size will be * used within the source registers. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void trn2VVV(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert usesMultipleLanes(dstSize, eSize) : dstSize + " " + eSize; permuteEncoding(ASIMDInstruction.TRN2, dstSize, eSize, dst, src1, src2); } /** * C7.2.350 Unsigned add across long vector.
* * dst = src[0] + ....+ src[n].
*

* Dst is twice the width of the vector elements, so overflow is not possible. * * @param size register size. * @param elementSize Unexpanded width of each addition operand. * @param dst SIMD register. Should not be null. * @param src SIMD register. Should not be null. */ public void uaddlvSV(ASIMDSize size, ElementSize elementSize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert !(size == ASIMDSize.HalfReg && elementSize == ElementSize.Word) : "Invalid size and lane combination for uaddlv"; assert elementSize != ElementSize.DoubleWord : "Invalid lane width for uaddlv " + elementSize; acrossLanesEncoding(ASIMDInstruction.UADDLV, size, elemSizeXX(elementSize), dst, src); } /** * C7.2.360 Unigned maximum.
* * for i in 0..n-1 do dst[i] = uint_max(src1[i], src2[i]) * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void umaxVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : "Invalid lane width for umax " + eSize; threeSameEncoding(ASIMDInstruction.UMAX, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.361 Unsigned maximum pairwise.
* * * concat = src2:src1 * for i in 0..n-1 do dst[i] = uint_max(concat[2 * i], concat[2 * i + 1]) * * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void umaxpVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : "Invalid lane width for umaxp"; threeSameEncoding(ASIMDInstruction.UMAXP, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.362 Unsigned maximum across vector.
* * dst = uint_max(src[0], ..., src[n]). * * @param size register size. * @param elementSize width of each operand. * @param dst SIMD register. * @param src SIMD register. */ public void umaxvSV(ASIMDSize size, ElementSize elementSize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert !(size == ASIMDSize.HalfReg && elementSize == ElementSize.Word) : "Invalid size and lane combination for umaxv"; assert elementSize != ElementSize.DoubleWord : "Invalid lane width for umaxv"; acrossLanesEncoding(ASIMDInstruction.UMAXV, size, elemSizeXX(elementSize), dst, src); } /** * C7.2.363 Unsigned minimum.
* * for i in 0..n-1 do dst[i] = uint_min(src1[i], src2[i]) * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void uminVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : "Invalid lane width for umin"; threeSameEncoding(ASIMDInstruction.UMIN, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.364 Unsigned minimum pairwise.
* * * concat = src2:src1 * for i in 0..n-1 do dst[i] = uint_min(concat[2 * i], concat[2 * i + 1]) * * * @param size register size. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void uminpVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert eSize != ElementSize.DoubleWord : "Invalid lane width for uminp"; threeSameEncoding(ASIMDInstruction.UMINP, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.365 Unsigned minimum across vector.
* * dst = uint_min(src[0], ..., src[n]). * * @param size register size. * @param elementSize width of each operand. * @param dst SIMD register. * @param src SIMD register. */ public void uminvSV(ASIMDSize size, ElementSize elementSize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert !(size == ASIMDSize.HalfReg && elementSize == ElementSize.Word) : "Invalid size and lane combination for uminv"; assert elementSize != ElementSize.DoubleWord : "Invalid lane width for uminv"; acrossLanesEncoding(ASIMDInstruction.UMINV, size, elemSizeXX(elementSize), dst, src); } /** * C7.2.367 Unsigned Multiply-Add Long.
* * for i in 0..n-1 do dst[i] += uint_multiply(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be double this width. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void umlalVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.UMLAL, false, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.369 Unsigned Multiply-Subtract Long.
* * for i in 0..n-1 do dst[i] -= uint_multiply(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be double this width. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void umlslVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.UMLSL, false, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.371 Unsigned move vector element to general-purpose register.
* * dst (gp) = src[index] (simd). * * @param eSize width of element to move. * @param dst general-purpose register. * @param src SIMD register. * @param index offset of value to move. */ public void umovGX(ElementSize eSize, Register dst, Register src, int index) { assert dst.getRegisterCategory().equals(CPU) : dst; assert src.getRegisterCategory().equals(SIMD) : src; copyEncoding(ASIMDInstruction.UMOV, eSize == ElementSize.DoubleWord, eSize, dst, src, index); } /** * C7.2.390 unsigned shift left (register).
* * for i in 0..n-1 do
* if(byte(src2[i] > 0)
* dst[i] = (src1[i] << byte(src2[i])
* else
* dst[i] = (src1[i] >>> byte(src2[i])
* * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void ushlVVV(ASIMDSize size, ElementSize eSize, Register dst, Register src1, Register src2) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; threeSameEncoding(ASIMDInstruction.USHL, size, elemSizeXX(eSize), dst, src1, src2); } /** * C7.2.391 Unsigned shift left long (immediate).
*

* From the manual: "This instruction reads each vector element in the lower half of the source * SIMD&FP register, shifts the unsigned integer value left by the specified number of bits * ... The destination vector elements are twice as long as the source vector elements." * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be double this width. * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift left amount. */ public void ushllVVI(ElementSize srcESize, Register dst, Register src, int shiftAmt) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert srcESize != ElementSize.DoubleWord : srcESize; /* Accepted shift range */ assert shiftAmt >= 0 && shiftAmt < srcESize.nbits : shiftAmt + " " + srcESize; /* shift = imm7 - srcESize.nbits */ int imm7 = srcESize.nbits + shiftAmt; shiftByImmEncoding(ASIMDInstruction.USHLL, false, imm7, dst, src); } /** * C7.2.391 Unsigned shift left long (immediate).
*

* From the manual: "This instruction reads each vector element in the upper half of the source * SIMD&FP register, shifts the unsigned integer value left by the specified number of bits * ... The destination vector elements are twice as long as the source vector elements." * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. The destination * element size will be twice this width. * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift left amount. */ public void ushll2VVI(ElementSize srcESize, Register dst, Register src, int shiftAmt) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert srcESize != ElementSize.DoubleWord : srcESize; /* Accepted shift range */ assert shiftAmt >= 0 && shiftAmt < srcESize.nbits : shiftAmt + " " + srcESize; /* shift = imm7 - srcESize.nbits */ int imm7 = srcESize.nbits + shiftAmt; shiftByImmEncoding(ASIMDInstruction.USHLL, true, imm7, dst, src); } /** * C7.2.392 unsigned shift right (immediate) scalar.
* * dst = src >>> imm * * @param eSize element size. Must be ElementSize.DoubleWord. * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift right amount. */ public void ushrSSI(ElementSize eSize, Register dst, Register src, int shiftAmt) { assert eSize == ElementSize.DoubleWord : eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; /* Accepted shift range */ assert shiftAmt > 0 && shiftAmt <= eSize.nbits : shiftAmt + " " + eSize; /* shift = eSize.nbits * 2 - imm7 */ int imm7 = eSize.nbits * 2 - shiftAmt; scalarShiftByImmEncoding(ASIMDInstruction.USHR, imm7, dst, src); } /** * C7.2.392 unsigned shift right (immediate) vector.
* * for i in 0..n-1 do dst[i] = src[i] >>> imm * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift right amount. */ public void ushrVVI(ASIMDSize size, ElementSize eSize, Register dst, Register src, int shiftAmt) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; /* Accepted shift range */ assert shiftAmt > 0 && shiftAmt <= eSize.nbits : shiftAmt + " " + eSize; /* shift = eSize.nbits * 2 - imm7 */ int imm7 = eSize.nbits * 2 - shiftAmt; shiftByImmEncoding(ASIMDInstruction.USHR, size, imm7, dst, src); } /** * C7.2.395 Unsigned shift right (immediate) and accumulate vector.
* * for i in 0..n-1 do dst[i] += src[i] >>> imm * * @param size register size. * @param eSize element size. ElementSize.DoubleWord is only applicable when size is 128 (i.e. * the operation is performed on more than one element). * @param dst SIMD register. * @param src SIMD register. * @param shiftAmt shift right amount. */ public void usraVVI(ASIMDSize size, ElementSize eSize, Register dst, Register src, int shiftAmt) { assert usesMultipleLanes(size, eSize) : "Must use multiple lanes " + size + " " + eSize; assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; /* Accepted shift range */ assert shiftAmt > 0 && shiftAmt <= eSize.nbits : shiftAmt + " " + eSize; /* shift = eSize.nbits * 2 - imm7 */ int imm7 = eSize.nbits * 2 - shiftAmt; shiftByImmEncoding(ASIMDInstruction.USRA, size, imm7, dst, src); } /** * C7.2.396 Unsigned integer subtract vector Long.
* The destination vector elements are twice as long as the source vector elements.
* * for i in 0..(n/2)-1 do dst[i] = uint_sub(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void usublVVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.USUBL, false, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.396 Unsigned integer subtract vector Long upper half.
* The destination vector elements are twice as long as the source vector elements.
* * for i in (n/2)..n-1 do dst[i] = uint_sub(src1[i], src2[i]) * * @param srcESize source element size. Cannot be ElementSize.DoubleWord. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void usubl2VVV(ElementSize srcESize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert srcESize != ElementSize.DoubleWord : srcESize; threeDifferentEncoding(ASIMDInstruction.USUBL, true, elemSizeXX(srcESize), dst, src1, src2); } /** * C7.2.399 Unzip vectors (primary). *

* From the manual: "This instructions reads corresponding even-numbered vector elements from * the two source registers, starting at zero, places the result from the first source register * into consecutive elements in the lower half of a vector, and the result from the second * source register into consecutive elements in the upper half of a vector." * * @param dstSize register size of destination register. Note only half of this size will be * used within the source registers. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void uzp1VVV(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert usesMultipleLanes(dstSize, eSize) : dstSize + " " + eSize; permuteEncoding(ASIMDInstruction.UZP1, dstSize, eSize, dst, src1, src2); } /** * C7.2.400 Unzip vectors (secondary). *

* From the manual: "This instructions reads corresponding odd-numbered vector elements from the * two source registers, starting at zero, places the result from the first source register into * consecutive elements in the lower half of a vector, and the result from the second source * register into consecutive elements in the upper half of a vector." * * @param dstSize register size of destination register. Note only half of this size will be * used within the source registers. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void uzp2VVV(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert usesMultipleLanes(dstSize, eSize) : dstSize + " " + eSize; permuteEncoding(ASIMDInstruction.UZP2, dstSize, eSize, dst, src1, src2); } /** * C7.2.401 Exclusive-OR and Rotate.
* * Exclusive-OR and Rotate performs a bitwise exclusive-OR of the 128-bit vectors in the two * source SIMD&FP registers, rotates each 64-bit element of the resulting 128-bit vector * right by the value specified by a 6-bit immediate value, and writes the result to the * destination SIMD&FP register. * * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. * @param imm6 6-bit immediate */ public void xarVVVI(Register dst, Register src1, Register src2, int imm6) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert NumUtil.isUnsignedNbit(6, imm6) : imm6; int baseEncoding = 0b110011101_00_00000_000000_00000_00000; emitInt(baseEncoding | rd(dst) | rs1(src1) | rs2(src2) | imm6 << 10); } /** * C7.2.402 Extract narrow.
*

* From the manual: "This instruction reads each vector element from the source SIMD&FP * register, narrows each value to half the original width, and writes into the lower half of * the destination register..." * * @param dstESize destination element size. Cannot be ElementSize.DoubleWord. The source * element size is twice this width. * @param dst SIMD register. * @param src SIMD register. */ public void xtnVV(ElementSize dstESize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert dstESize != ElementSize.DoubleWord : dstESize; twoRegMiscEncoding(ASIMDInstruction.XTN, false, elemSizeXX(dstESize), dst, src); } /** * C7.2.402 Extract narrow.
*

* From the manual: "This instruction reads each vector element from the source SIMD&FP * register, narrows each value to half the original width, and writes into the upper half of * the destination register..." * * @param dstESize destination element size. Cannot be ElementSize.DoubleWord. The source * element size is twice this width. * @param dst SIMD register. * @param src SIMD register. */ public void xtn2VV(ElementSize dstESize, Register dst, Register src) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src.getRegisterCategory().equals(SIMD) : src; assert dstESize != ElementSize.DoubleWord : dstESize; twoRegMiscEncoding(ASIMDInstruction.XTN, true, elemSizeXX(dstESize), dst, src); } /** * C7.2.403 Zip vectors (primary). *

* From the manual: "This instructions reads adjacent vector elements from the lower half of two * source registers as pairs, interleaves the pairs ... and writes the vector to the destination * register." * * @param dstSize register size of destination register. Note only half of this size will be * used within the source registers. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void zip1VVV(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert usesMultipleLanes(dstSize, eSize) : dstSize + " " + eSize; permuteEncoding(ASIMDInstruction.ZIP1, dstSize, eSize, dst, src1, src2); } /** * C7.2.404 Zip vectors (secondary). *

* From the manual: "This instructions reads adjacent vector elements from the upper half of two * source registers as pairs, interleaves the pairs ... and writes the vector to the destination * register." * * @param dstSize register size of destination register. Note only half of this size will be * used within the source registers. * @param eSize element size. * @param dst SIMD register. * @param src1 SIMD register. * @param src2 SIMD register. */ public void zip2VVV(ASIMDSize dstSize, ElementSize eSize, Register dst, Register src1, Register src2) { assert dst.getRegisterCategory().equals(SIMD) : dst; assert src1.getRegisterCategory().equals(SIMD) : src1; assert src2.getRegisterCategory().equals(SIMD) : src2; assert usesMultipleLanes(dstSize, eSize) : dstSize + " " + eSize; permuteEncoding(ASIMDInstruction.ZIP2, dstSize, eSize, dst, src1, src2); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy