Skip to content

Commit 14abd91

Browse files
committed
[GR-70003] Merge vector stack moves of the same type in StackMoveOptimizationPhase
PullRequest: graal/22221
2 parents 2258930 + 0fbb782 commit 14abd91

File tree

13 files changed

+184
-77
lines changed

13 files changed

+184
-77
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/amd64/AMD64MoveFactory.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@
2525

2626
package jdk.graal.compiler.core.amd64;
2727

28-
import static jdk.vm.ci.code.ValueUtil.isRegister;
2928
import static jdk.graal.compiler.lir.LIRValueUtil.asConstant;
3029
import static jdk.graal.compiler.lir.LIRValueUtil.isConstantValue;
3130
import static jdk.graal.compiler.lir.LIRValueUtil.isStackSlotValue;
31+
import static jdk.vm.ci.code.ValueUtil.isRegister;
3232

3333
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
34+
import jdk.graal.compiler.core.common.NumUtil;
3435
import jdk.graal.compiler.core.common.type.DataPointerConstant;
3536
import jdk.graal.compiler.debug.GraalError;
36-
import jdk.graal.compiler.core.common.NumUtil;
3737
import jdk.graal.compiler.lir.LIRInstruction;
3838
import jdk.graal.compiler.lir.amd64.AMD64AddressValue;
3939
import jdk.graal.compiler.lir.amd64.AMD64LIRInstruction;
@@ -44,7 +44,6 @@
4444
import jdk.graal.compiler.lir.amd64.AMD64Move.MoveFromConstOp;
4545
import jdk.graal.compiler.lir.amd64.AMD64Move.MoveFromRegOp;
4646
import jdk.graal.compiler.lir.amd64.AMD64Move.MoveToRegOp;
47-
4847
import jdk.vm.ci.amd64.AMD64Kind;
4948
import jdk.vm.ci.code.Register;
5049
import jdk.vm.ci.meta.AllocatableValue;

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/core/amd64/AMD64MoveFactoryBase.java

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import jdk.graal.compiler.lir.VirtualStackSlot;
2929
import jdk.graal.compiler.lir.amd64.AMD64LIRInstruction;
3030
import jdk.graal.compiler.lir.gen.MoveFactory;
31+
import jdk.vm.ci.amd64.AMD64Kind;
3132
import jdk.vm.ci.code.Register;
3233
import jdk.vm.ci.meta.AllocatableValue;
3334

@@ -41,11 +42,25 @@ public AMD64MoveFactoryBase(BackupSlotProvider backupSlotProvider) {
4142

4243
@Override
4344
public final AMD64LIRInstruction createStackMove(AllocatableValue result, AllocatableValue input) {
44-
RegisterBackupPair backup = backupSlotProvider.getScratchRegister(input.getPlatformKind());
45+
AMD64Kind backupKind = (AMD64Kind) input.getPlatformKind();
46+
if (backupKind.isXMM() && backupKind.getSizeInBytes() <= Long.BYTES) {
47+
// backup using a gp register
48+
backupKind = AMD64Kind.QWORD;
49+
}
50+
51+
RegisterBackupPair backup = backupSlotProvider.getScratchRegister(backupKind, backupKind.isInteger() ? getPreferredGeneralPurposeScratchRegister() : null);
4552
Register scratchRegister = backup.register;
4653
VirtualStackSlot backupSlot = backup.backupSlot;
4754
return createStackMove(result, input, scratchRegister, backupSlot);
4855
}
4956

5057
public abstract AMD64LIRInstruction createStackMove(AllocatableValue result, AllocatableValue input, Register scratchRegister, AllocatableValue backupSlot);
58+
59+
/**
60+
* @return a concrete register as the preferred scratch register for integer AMD64Kind; or null
61+
* if there is no preference
62+
*/
63+
public Register getPreferredGeneralPurposeScratchRegister() {
64+
return null;
65+
}
5166
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/amd64/AMD64HotSpotLIRGenerator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ protected static BarrierSetLIRGeneratorTool getBarrierSet(GraalHotSpotVMConfig c
128128
}
129129

130130
private AMD64HotSpotLIRGenerator(HotSpotProviders providers, GraalHotSpotVMConfig config, LIRGenerationResult lirGenRes, BackupSlotProvider backupSlotProvider) {
131-
this(new AMD64HotSpotLIRKindTool(), new AMD64ArithmeticLIRGenerator(null), getBarrierSet(config, providers), new AMD64HotSpotMoveFactory(backupSlotProvider), providers, config, lirGenRes);
131+
this(new AMD64HotSpotLIRKindTool(), new AMD64ArithmeticLIRGenerator(null), getBarrierSet(config, providers),
132+
new AMD64HotSpotMoveFactory(backupSlotProvider, providers.getRegisters().getZeroValueRegister(config)), providers, config, lirGenRes);
132133
}
133134

134135
protected AMD64HotSpotLIRGenerator(LIRKindTool lirKindTool, AMD64ArithmeticLIRGenerator arithmeticLIRGen, BarrierSetLIRGeneratorTool barrierSetLIRGen, MoveFactory moveFactory,

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/hotspot/amd64/AMD64HotSpotMoveFactory.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
import jdk.graal.compiler.core.amd64.AMD64MoveFactory;
2828
import jdk.graal.compiler.lir.LIRInstruction;
2929
import jdk.graal.compiler.lir.amd64.AMD64LIRInstruction;
30-
30+
import jdk.graal.compiler.lir.amd64.AMD64Move;
31+
import jdk.vm.ci.code.Register;
3132
import jdk.vm.ci.hotspot.HotSpotCompressedNullConstant;
3233
import jdk.vm.ci.hotspot.HotSpotConstant;
3334
import jdk.vm.ci.hotspot.HotSpotMetaspaceConstant;
@@ -38,8 +39,11 @@
3839

3940
public class AMD64HotSpotMoveFactory extends AMD64MoveFactory {
4041

41-
public AMD64HotSpotMoveFactory(BackupSlotProvider backupSlotProvider) {
42+
private final Register zeroValueRegister;
43+
44+
public AMD64HotSpotMoveFactory(BackupSlotProvider backupSlotProvider, Register zeroValueRegister) {
4245
super(backupSlotProvider);
46+
this.zeroValueRegister = zeroValueRegister;
4347
}
4448

4549
@Override
@@ -90,4 +94,17 @@ public LIRInstruction createStackLoad(AllocatableValue dst, Constant src) {
9094
return super.createStackLoad(dst, src);
9195
}
9296
}
97+
98+
@Override
99+
public AMD64LIRInstruction createStackMove(AllocatableValue result, AllocatableValue input, Register scratchRegister, AllocatableValue backupSlot) {
100+
if (scratchRegister.equals(zeroValueRegister)) {
101+
return new AMD64Move.AMD64StackMove(result, input, scratchRegister, backupSlot, true);
102+
}
103+
return super.createStackMove(result, input, scratchRegister, backupSlot);
104+
}
105+
106+
@Override
107+
public Register getPreferredGeneralPurposeScratchRegister() {
108+
return zeroValueRegister;
109+
}
93110
}

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/alloc/lsra/LinearScanEliminateSpillMovePhase.java

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,11 @@
3939
import jdk.graal.compiler.lir.alloc.lsra.LinearScan.IntervalPredicate;
4040
import jdk.graal.compiler.lir.gen.LIRGenerationResult;
4141
import jdk.graal.compiler.lir.phases.AllocationPhase;
42-
import jdk.graal.compiler.lir.phases.LIRPhase;
43-
import jdk.graal.compiler.options.NestedBooleanOptionKey;
44-
import jdk.graal.compiler.options.Option;
45-
import jdk.graal.compiler.options.OptionKey;
46-
import jdk.graal.compiler.options.OptionType;
4742
import jdk.vm.ci.code.TargetDescription;
4843
import jdk.vm.ci.meta.AllocatableValue;
4944

5045
public class LinearScanEliminateSpillMovePhase extends LinearScanAllocationPhase {
5146

52-
public static class Options {
53-
// @formatter:off
54-
@Option(help = "Enable spill move elimination.", type = OptionType.Debug)
55-
public static final OptionKey<Boolean> LIROptLSRAEliminateSpillMoves = new NestedBooleanOptionKey(LIRPhase.Options.LIROptimization, true);
56-
// @formatter:on
57-
}
58-
5947
private static final IntervalPredicate mustStoreAtDefinition = new LinearScan.IntervalPredicate() {
6048

6149
@Override
@@ -115,11 +103,11 @@ void eliminateSpillMoves(LIRGenerationResult res) {
115103
if (opId == -1) {
116104
StandardOp.MoveOp move = StandardOp.MoveOp.asMoveOp(op);
117105
/*
118-
* Remove move from register to stack if the stack slot is guaranteed to
119-
* be correct. Only moves that have been inserted by LinearScan can be
120-
* removed.
106+
* Remove move from register/stack to stack if the stack slot is
107+
* guaranteed to be correct. Only moves that have been inserted by
108+
* LinearScan can be removed.
121109
*/
122-
if (Options.LIROptLSRAEliminateSpillMoves.getValue(allocator.getOptions()) && canEliminateSpillMove(block, move)) {
110+
if (canEliminateSpillMove(block, move)) {
123111
/*
124112
* Move target is a stack slot that is always correct, so eliminate
125113
* instruction.

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/amd64/AMD64Move.java

Lines changed: 79 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import jdk.graal.compiler.asm.Label;
4242
import jdk.graal.compiler.asm.amd64.AMD64Address;
4343
import jdk.graal.compiler.asm.amd64.AMD64Assembler.AMD64MIOp;
44+
import jdk.graal.compiler.asm.amd64.AMD64Assembler.AMD64SIMDInstructionEncoding;
4445
import jdk.graal.compiler.asm.amd64.AMD64BaseAssembler.OperandSize;
4546
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
4647
import jdk.graal.compiler.core.common.CompressEncoding;
@@ -56,6 +57,7 @@
5657
import jdk.graal.compiler.lir.Opcode;
5758
import jdk.graal.compiler.lir.StandardOp;
5859
import jdk.graal.compiler.lir.VirtualStackSlot;
60+
import jdk.graal.compiler.lir.amd64.vector.AMD64VectorMove;
5961
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
6062
import jdk.vm.ci.amd64.AMD64;
6163
import jdk.vm.ci.amd64.AMD64Kind;
@@ -179,22 +181,43 @@ public boolean canRematerializeToStack() {
179181
}
180182
}
181183

184+
/**
185+
* Represents a LIR operation that moves data from one stack location to another, using a
186+
* scratch register and a backup stack location to temporarily store the contents of the scratch
187+
* register.
188+
*/
189+
public interface StackMoveOp extends StandardOp.ValueMoveOp {
190+
191+
Register getScratchRegister();
192+
193+
/**
194+
* The backup slot must be distinct from both the input and output stack slots.
195+
*/
196+
AllocatableValue getBackupSlot();
197+
}
198+
182199
@Opcode("STACKMOVE")
183-
public static final class AMD64StackMove extends AMD64LIRInstruction implements StandardOp.ValueMoveOp {
200+
public static final class AMD64StackMove extends AMD64LIRInstruction implements StackMoveOp {
184201
public static final LIRInstructionClass<AMD64StackMove> TYPE = LIRInstructionClass.create(AMD64StackMove.class);
185202

186203
@Def({STACK}) protected AllocatableValue result;
187204
@Use({STACK, HINT}) protected AllocatableValue input;
188205
@Alive({STACK, UNINITIALIZED}) private AllocatableValue backupSlot;
189206

190207
private Register scratch;
208+
private final boolean isScratchAlwaysZero;
191209

192210
public AMD64StackMove(AllocatableValue result, AllocatableValue input, Register scratch, AllocatableValue backupSlot) {
211+
this(result, input, scratch, backupSlot, false);
212+
}
213+
214+
public AMD64StackMove(AllocatableValue result, AllocatableValue input, Register scratch, AllocatableValue backupSlot, boolean isScratchAlwaysZero) {
193215
super(TYPE);
194216
this.result = result;
195217
this.input = input;
196218
this.backupSlot = backupSlot;
197219
this.scratch = scratch;
220+
this.isScratchAlwaysZero = isScratchAlwaysZero;
198221
assert result.getPlatformKind().getSizeInBytes() <= input.getPlatformKind().getSizeInBytes() : "cannot move " + input + " into a larger Value " + result;
199222
}
200223

@@ -208,32 +231,58 @@ public AllocatableValue getResult() {
208231
return result;
209232
}
210233

234+
@Override
211235
public Register getScratchRegister() {
212236
return scratch;
213237
}
214238

239+
@Override
215240
public AllocatableValue getBackupSlot() {
216241
return backupSlot;
217242
}
218243

219244
@Override
220245
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
221246
AMD64Kind backupKind = (AMD64Kind) backupSlot.getPlatformKind();
222-
if (backupKind.isXMM()) {
223-
// graal doesn't use vector values, so it's safe to backup using DOUBLE
224-
backupKind = AMD64Kind.DOUBLE;
247+
// In SVM CEntryPoint stubs, each callee-saved register from the native calling
248+
// convention is backed up to a separate interval, which is then spilled before the Java
249+
// method invocation, since the Java calling convention treats all caller-saved
250+
// registers as volatile. On Windows, some xmm registers are callee-saved in the native
251+
// convention. As a result, we may insert StackMove operations for xmm register values
252+
// larger than 64 bits prior to the Java invocation. These StackMoves are removed by
253+
// LinearScanEliminateSpillMovePhase.
254+
255+
// back up scratch register
256+
if (isScratchAlwaysZero) {
257+
// no need to back up
258+
} else {
259+
reg2stack(backupKind, crb, masm, backupSlot, scratch);
225260
}
226-
227-
// backup scratch register
228-
reg2stack(backupKind, crb, masm, backupSlot, scratch);
229261
// move stack slot
230-
stack2reg((AMD64Kind) getInput().getPlatformKind(), crb, masm, scratch, getInput());
231-
reg2stack((AMD64Kind) getResult().getPlatformKind(), crb, masm, getResult(), scratch);
262+
stack2reg(getCompatibleKind((AMD64Kind) getInput().getPlatformKind(), backupKind), crb, masm, scratch, getInput());
263+
reg2stack(getCompatibleKind((AMD64Kind) getResult().getPlatformKind(), backupKind), crb, masm, getResult(), scratch);
232264
// restore scratch register
233-
stack2reg(backupKind, crb, masm, scratch, backupSlot);
265+
if (isScratchAlwaysZero) {
266+
masm.xorl(scratch, scratch);
267+
} else {
268+
stack2reg(backupKind, crb, masm, scratch, backupSlot);
269+
}
234270
}
235271
}
236272

273+
private static AMD64Kind getCompatibleKind(AMD64Kind resultType, AMD64Kind backupKind) {
274+
if (backupKind.isInteger() && resultType.isXMM()) {
275+
return switch (resultType.getSizeInBytes()) {
276+
case 1 -> AMD64Kind.BYTE;
277+
case 2 -> AMD64Kind.WORD;
278+
case 4 -> AMD64Kind.DWORD;
279+
case 8 -> AMD64Kind.QWORD;
280+
default -> throw GraalError.shouldNotReachHere(resultType + " cannot fit in " + backupKind);
281+
};
282+
}
283+
return resultType;
284+
}
285+
237286
@Opcode("MULTISTACKMOVE")
238287
public static final class AMD64MultiStackMove extends AMD64LIRInstruction {
239288
public static final LIRInstructionClass<AMD64MultiStackMove> TYPE = LIRInstructionClass.create(AMD64MultiStackMove.class);
@@ -244,26 +293,25 @@ public static final class AMD64MultiStackMove extends AMD64LIRInstruction {
244293
@Alive({STACK, UNINITIALIZED}) private AllocatableValue backupSlot;
245294

246295
private Register scratch;
296+
private AMD64SIMDInstructionEncoding encoding;
247297

248-
public AMD64MultiStackMove(AllocatableValue[] results, Value[] inputs, Value[] tmps, Register scratch, AllocatableValue backupSlot) {
298+
public AMD64MultiStackMove(AllocatableValue[] results, Value[] inputs, Value[] tmps, Register scratch,
299+
AllocatableValue backupSlot, AMD64SIMDInstructionEncoding encoding) {
249300
super(TYPE);
250301
this.results = results;
251302
this.inputs = inputs;
252303
this.tmps = tmps;
253304
this.backupSlot = backupSlot;
254305
this.scratch = scratch;
306+
this.encoding = encoding;
255307
}
256308

257309
@Override
258310
public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
259311
AMD64Kind backupKind = (AMD64Kind) backupSlot.getPlatformKind();
260-
if (backupKind.isXMM()) {
261-
// graal doesn't use vector values, so it's safe to backup using DOUBLE
262-
backupKind = AMD64Kind.DOUBLE;
263-
}
264-
265312
// backup scratch register
266-
move(backupKind, crb, masm, backupSlot, scratch.asValue(backupSlot.getValueKind()));
313+
AMD64SIMDInstructionEncoding backupEnc = encoding != null ? AMD64VectorMove.maybeOverrideEvex(masm, encoding, backupSlot) : null;
314+
move(backupKind, crb, masm, backupSlot, scratch.asValue(backupSlot.getValueKind()), backupEnc);
267315
for (int i = 0; i < results.length; i++) {
268316
Value input = inputs[i];
269317
if (Value.ILLEGAL.equals(input)) {
@@ -274,11 +322,22 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
274322
}
275323
AllocatableValue result = results[i];
276324
// move stack slot
277-
move((AMD64Kind) input.getPlatformKind(), crb, masm, scratch.asValue(input.getValueKind()), input);
278-
move((AMD64Kind) result.getPlatformKind(), crb, masm, result, scratch.asValue(result.getValueKind()));
325+
AMD64Kind inputKind = getCompatibleKind((AMD64Kind) input.getPlatformKind(), backupKind);
326+
move(inputKind, crb, masm, scratch.asValue(LIRKind.value(inputKind)), input, encoding);
327+
AMD64Kind resultKind = getCompatibleKind((AMD64Kind) result.getPlatformKind(), backupKind);
328+
move(resultKind, crb, masm, result, scratch.asValue(LIRKind.value(resultKind)), encoding);
279329
}
280330
// restore scratch register
281-
move(backupKind, crb, masm, scratch.asValue(backupSlot.getValueKind()), backupSlot);
331+
move(backupKind, crb, masm, scratch.asValue(backupSlot.getValueKind()), backupSlot, backupEnc);
332+
}
333+
334+
private static void move(AMD64Kind moveKind, CompilationResultBuilder crb, AMD64MacroAssembler masm,
335+
AllocatableValue result, Value input, AMD64SIMDInstructionEncoding encoding) {
336+
if (encoding != null && moveKind.getVectorLength() > 1) {
337+
AMD64VectorMove.move(crb, masm, result, input, encoding);
338+
} else {
339+
AMD64Move.move(moveKind, crb, masm, result, input);
340+
}
282341
}
283342
}
284343

0 commit comments

Comments
 (0)