org.graalvm.compiler.lir.aarch64.AArch64ZeroMemoryOp Maven / Gradle / Ivy
/*
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2019, Arm Limited and affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.graalvm.compiler.lir.aarch64;
import static jdk.vm.ci.aarch64.AArch64.zr;
import static jdk.vm.ci.code.ValueUtil.asRegister;
import static org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler.PREFERRED_LOOP_ALIGNMENT;
import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
import org.graalvm.compiler.asm.Label;
import org.graalvm.compiler.asm.aarch64.AArch64Address;
import org.graalvm.compiler.asm.aarch64.AArch64Assembler;
import org.graalvm.compiler.asm.aarch64.AArch64Assembler.ConditionFlag;
import org.graalvm.compiler.asm.aarch64.AArch64MacroAssembler;
import org.graalvm.compiler.lir.LIRInstructionClass;
import org.graalvm.compiler.lir.Opcode;
import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.meta.Value;
/**
* Zero a chunk of memory on AArch64.
*/
@Opcode("ZERO_MEMORY")
public final class AArch64ZeroMemoryOp extends AArch64LIRInstruction {
public static final LIRInstructionClass TYPE = LIRInstructionClass.create(AArch64ZeroMemoryOp.class);
@Use({REG}) protected Value addressValue;
@Use({REG}) protected Value lengthValue;
@Temp({REG}) protected Value addressValueTemp;
@Temp({REG}) protected Value lengthValueTemp;
private final boolean isAligned;
private final boolean useDcZva;
private final int zvaLength;
/**
* Constructor of AArch64ZeroMemoryOp.
*
* @param address starting address of the memory chunk to be zeroed.
* @param length size of the memory chunk to be zeroed, in bytes.
* @param isAligned whether both address and size are aligned to 8 bytes.
* @param useDcZva is DC ZVA instruction is able to use.
* @param zvaLength the ZVA length info of current AArch64 CPU, negative value indicates length
* is unknown at compile time.
*/
public AArch64ZeroMemoryOp(Value address, Value length, boolean isAligned, boolean useDcZva, int zvaLength) {
super(TYPE);
this.addressValue = address;
this.lengthValue = length;
this.addressValueTemp = address;
this.lengthValueTemp = length;
this.useDcZva = useDcZva;
this.zvaLength = zvaLength;
this.isAligned = isAligned;
}
@Override
protected void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
Register base = asRegister(addressValue);
Register size = asRegister(lengthValue);
try (AArch64MacroAssembler.ScratchRegister scratchRegister = masm.getScratchRegister()) {
Register alignmentBits = scratchRegister.getRegister();
Label tail = new Label();
Label done = new Label();
// Jump to DONE if size is zero.
masm.cbz(64, size, done);
if (!isAligned) {
Label baseAlignedTo2Bytes = new Label();
Label baseAlignedTo4Bytes = new Label();
Label baseAlignedTo8Bytes = new Label();
// Jump to per-byte zeroing loop if the zeroing size is less than 8
masm.compare(64, size, 8);
masm.branchConditionally(ConditionFlag.LT, tail);
// Make base 8-byte aligned
masm.neg(64, alignmentBits, base);
masm.and(64, alignmentBits, alignmentBits, 7);
masm.tbz(alignmentBits, 0, baseAlignedTo2Bytes);
masm.sub(64, size, size, 1);
masm.str(8, zr, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 1));
masm.bind(baseAlignedTo2Bytes);
masm.tbz(alignmentBits, 1, baseAlignedTo4Bytes);
masm.sub(64, size, size, 2);
masm.str(16, zr, AArch64Address.createImmediateAddress(16, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 2));
masm.bind(baseAlignedTo4Bytes);
masm.tbz(alignmentBits, 2, baseAlignedTo8Bytes);
masm.sub(64, size, size, 4);
masm.str(32, zr, AArch64Address.createImmediateAddress(32, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 4));
masm.bind(baseAlignedTo8Bytes);
// At this point base is 8-byte aligned.
}
if (useDcZva && zvaLength > 0) {
// From ARMv8-A architecture reference manual D12.2.35 Data Cache Zero ID register:
// A valid ZVA length should be a power-of-2 value in [4, 2048]
assert (CodeUtil.isPowerOf2(zvaLength) && 4 <= zvaLength && zvaLength <= 2048);
Label preCheck = new Label();
Label preLoop = new Label();
Label mainCheck = new Label();
Label mainLoop = new Label();
Label postCheck = new Label();
Label postLoop = new Label();
masm.neg(64, alignmentBits, base);
masm.and(64, alignmentBits, alignmentBits, zvaLength - 1);
// Is size less than number of bytes to be pre-zeroed? Jump to post check if so.
masm.cmp(64, size, alignmentBits);
masm.branchConditionally(AArch64Assembler.ConditionFlag.LE, postCheck);
masm.sub(64, size, size, alignmentBits);
// Pre loop: align base according to the supported bulk zeroing stride.
masm.jmp(preCheck);
masm.align(PREFERRED_LOOP_ALIGNMENT);
masm.bind(preLoop);
masm.str(64, zr, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 8));
masm.bind(preCheck);
masm.subs(64, alignmentBits, alignmentBits, 8);
masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, preLoop);
// Main loop: bulk zeroing
masm.jmp(mainCheck);
masm.align(PREFERRED_LOOP_ALIGNMENT);
masm.bind(mainLoop);
masm.dc(AArch64Assembler.DataCacheOperationType.ZVA, base);
masm.add(64, base, base, zvaLength);
masm.bind(mainCheck);
masm.subs(64, size, size, zvaLength);
masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, mainLoop);
masm.add(64, size, size, zvaLength);
// Post loop: handle bytes after the main loop
masm.jmp(postCheck);
masm.align(PREFERRED_LOOP_ALIGNMENT);
masm.bind(postLoop);
masm.str(64, zr, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 8));
masm.bind(postCheck);
masm.subs(64, size, size, 8);
masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, postLoop);
if (!isAligned) {
// Restore size for tail zeroing
masm.add(64, size, size, 8);
}
} else {
Label mainCheck = new Label();
Label mainLoop = new Label();
if (!isAligned) {
// After aligning base, we may have size less than 8. Need to check again.
masm.compare(64, size, 8);
masm.branchConditionally(ConditionFlag.LT, tail);
}
masm.tbz(base, 3, mainCheck);
masm.sub(64, size, size, 8);
masm.str(64, zr, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 8));
masm.jmp(mainCheck);
// The STP loop that zeros 16 bytes in each iteration.
masm.align(PREFERRED_LOOP_ALIGNMENT);
masm.bind(mainLoop);
masm.stp(64, zr, zr, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, base, 2 * 8));
masm.bind(mainCheck);
masm.subs(64, size, size, 16);
masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, mainLoop);
// We may need to zero the tail 8 bytes of the memory chunk.
masm.add(64, size, size, 16);
masm.tbz(size, 3, tail);
masm.str(64, zr, AArch64Address.createImmediateAddress(64, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 8));
if (!isAligned) {
// Adjust size for tail zeroing
masm.sub(64, size, size, 8);
}
}
masm.bind(tail);
if (!isAligned) {
Label perByteZeroingLoop = new Label();
masm.cbz(64, size, done);
// We have to ensure size > 0 when entering the following loop
masm.align(PREFERRED_LOOP_ALIGNMENT);
masm.bind(perByteZeroingLoop);
masm.str(8, zr, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, base, 1));
masm.subs(64, size, size, 1);
masm.branchConditionally(AArch64Assembler.ConditionFlag.NE, perByteZeroingLoop);
}
masm.bind(done);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy