4141import jdk .graal .compiler .asm .Label ;
4242import jdk .graal .compiler .asm .amd64 .AMD64Address ;
4343import jdk .graal .compiler .asm .amd64 .AMD64Assembler .AMD64MIOp ;
44+ import jdk .graal .compiler .asm .amd64 .AMD64Assembler .AMD64SIMDInstructionEncoding ;
4445import jdk .graal .compiler .asm .amd64 .AMD64BaseAssembler .OperandSize ;
4546import jdk .graal .compiler .asm .amd64 .AMD64MacroAssembler ;
4647import jdk .graal .compiler .core .common .CompressEncoding ;
5657import jdk .graal .compiler .lir .Opcode ;
5758import jdk .graal .compiler .lir .StandardOp ;
5859import jdk .graal .compiler .lir .VirtualStackSlot ;
60+ import jdk .graal .compiler .lir .amd64 .vector .AMD64VectorMove ;
5961import jdk .graal .compiler .lir .asm .CompilationResultBuilder ;
6062import jdk .vm .ci .amd64 .AMD64 ;
6163import jdk .vm .ci .amd64 .AMD64Kind ;
@@ -179,22 +181,43 @@ public boolean canRematerializeToStack() {
179181 }
180182 }
181183
184+ /**
185+ * Represents a LIR operation that moves data from one stack location to another, using a
186+ * scratch register and a backup stack location to temporarily store the contents of the scratch
187+ * register.
188+ */
189+ public interface StackMoveOp extends StandardOp .ValueMoveOp {
190+
191+ Register getScratchRegister ();
192+
193+ /**
194+ * The backup slot must be distinct from both the input and output stack slots.
195+ */
196+ AllocatableValue getBackupSlot ();
197+ }
198+
182199 @ Opcode ("STACKMOVE" )
183- public static final class AMD64StackMove extends AMD64LIRInstruction implements StandardOp . ValueMoveOp {
200+ public static final class AMD64StackMove extends AMD64LIRInstruction implements StackMoveOp {
184201 public static final LIRInstructionClass <AMD64StackMove > TYPE = LIRInstructionClass .create (AMD64StackMove .class );
185202
186203 @ Def ({STACK }) protected AllocatableValue result ;
187204 @ Use ({STACK , HINT }) protected AllocatableValue input ;
188205 @ Alive ({STACK , UNINITIALIZED }) private AllocatableValue backupSlot ;
189206
190207 private Register scratch ;
208+ private final boolean isScratchAlwaysZero ;
191209
192210 public AMD64StackMove (AllocatableValue result , AllocatableValue input , Register scratch , AllocatableValue backupSlot ) {
211+ this (result , input , scratch , backupSlot , false );
212+ }
213+
214+ public AMD64StackMove (AllocatableValue result , AllocatableValue input , Register scratch , AllocatableValue backupSlot , boolean isScratchAlwaysZero ) {
193215 super (TYPE );
194216 this .result = result ;
195217 this .input = input ;
196218 this .backupSlot = backupSlot ;
197219 this .scratch = scratch ;
220+ this .isScratchAlwaysZero = isScratchAlwaysZero ;
198221 assert result .getPlatformKind ().getSizeInBytes () <= input .getPlatformKind ().getSizeInBytes () : "cannot move " + input + " into a larger Value " + result ;
199222 }
200223
@@ -208,32 +231,58 @@ public AllocatableValue getResult() {
208231 return result ;
209232 }
210233
234+ @ Override
211235 public Register getScratchRegister () {
212236 return scratch ;
213237 }
214238
239+ @ Override
215240 public AllocatableValue getBackupSlot () {
216241 return backupSlot ;
217242 }
218243
219244 @ Override
220245 public void emitCode (CompilationResultBuilder crb , AMD64MacroAssembler masm ) {
221246 AMD64Kind backupKind = (AMD64Kind ) backupSlot .getPlatformKind ();
222- if (backupKind .isXMM ()) {
223- // graal doesn't use vector values, so it's safe to backup using DOUBLE
224- backupKind = AMD64Kind .DOUBLE ;
247+ // In SVM CEntryPoint stubs, each callee-saved register from the native calling
248+ // convention is backed up to a separate interval, which is then spilled before the Java
249+ // method invocation, since the Java calling convention treats all caller-saved
250+ // registers as volatile. On Windows, some xmm registers are callee-saved in the native
251+ // convention. As a result, we may insert StackMove operations for xmm register values
252+ // larger than 64 bits prior to the Java invocation. These StackMoves are removed by
253+ // LinearScanEliminateSpillMovePhase.
254+
255+ // back up scratch register
256+ if (isScratchAlwaysZero ) {
257+ // no need to back up
258+ } else {
259+ reg2stack (backupKind , crb , masm , backupSlot , scratch );
225260 }
226-
227- // backup scratch register
228- reg2stack (backupKind , crb , masm , backupSlot , scratch );
229261 // move stack slot
230- stack2reg (( AMD64Kind ) getInput ().getPlatformKind (), crb , masm , scratch , getInput ());
231- reg2stack (( AMD64Kind ) getResult ().getPlatformKind (), crb , masm , getResult (), scratch );
262+ stack2reg (getCompatibleKind (( AMD64Kind ) getInput ().getPlatformKind (), backupKind ), crb , masm , scratch , getInput ());
263+ reg2stack (getCompatibleKind (( AMD64Kind ) getResult ().getPlatformKind (), backupKind ), crb , masm , getResult (), scratch );
232264 // restore scratch register
233- stack2reg (backupKind , crb , masm , scratch , backupSlot );
265+ if (isScratchAlwaysZero ) {
266+ masm .xorl (scratch , scratch );
267+ } else {
268+ stack2reg (backupKind , crb , masm , scratch , backupSlot );
269+ }
234270 }
235271 }
236272
273+ private static AMD64Kind getCompatibleKind (AMD64Kind resultType , AMD64Kind backupKind ) {
274+ if (backupKind .isInteger () && resultType .isXMM ()) {
275+ return switch (resultType .getSizeInBytes ()) {
276+ case 1 -> AMD64Kind .BYTE ;
277+ case 2 -> AMD64Kind .WORD ;
278+ case 4 -> AMD64Kind .DWORD ;
279+ case 8 -> AMD64Kind .QWORD ;
280+ default -> throw GraalError .shouldNotReachHere (resultType + " cannot fit in " + backupKind );
281+ };
282+ }
283+ return resultType ;
284+ }
285+
237286 @ Opcode ("MULTISTACKMOVE" )
238287 public static final class AMD64MultiStackMove extends AMD64LIRInstruction {
239288 public static final LIRInstructionClass <AMD64MultiStackMove > TYPE = LIRInstructionClass .create (AMD64MultiStackMove .class );
@@ -244,26 +293,25 @@ public static final class AMD64MultiStackMove extends AMD64LIRInstruction {
244293 @ Alive ({STACK , UNINITIALIZED }) private AllocatableValue backupSlot ;
245294
246295 private Register scratch ;
296+ private AMD64SIMDInstructionEncoding encoding ;
247297
248- public AMD64MultiStackMove (AllocatableValue [] results , Value [] inputs , Value [] tmps , Register scratch , AllocatableValue backupSlot ) {
298+ public AMD64MultiStackMove (AllocatableValue [] results , Value [] inputs , Value [] tmps , Register scratch ,
299+ AllocatableValue backupSlot , AMD64SIMDInstructionEncoding encoding ) {
249300 super (TYPE );
250301 this .results = results ;
251302 this .inputs = inputs ;
252303 this .tmps = tmps ;
253304 this .backupSlot = backupSlot ;
254305 this .scratch = scratch ;
306+ this .encoding = encoding ;
255307 }
256308
257309 @ Override
258310 public void emitCode (CompilationResultBuilder crb , AMD64MacroAssembler masm ) {
259311 AMD64Kind backupKind = (AMD64Kind ) backupSlot .getPlatformKind ();
260- if (backupKind .isXMM ()) {
261- // graal doesn't use vector values, so it's safe to backup using DOUBLE
262- backupKind = AMD64Kind .DOUBLE ;
263- }
264-
265312 // backup scratch register
266- move (backupKind , crb , masm , backupSlot , scratch .asValue (backupSlot .getValueKind ()));
313+ AMD64SIMDInstructionEncoding backupEnc = encoding != null ? AMD64VectorMove .maybeOverrideEvex (masm , encoding , backupSlot ) : null ;
314+ move (backupKind , crb , masm , backupSlot , scratch .asValue (backupSlot .getValueKind ()), backupEnc );
267315 for (int i = 0 ; i < results .length ; i ++) {
268316 Value input = inputs [i ];
269317 if (Value .ILLEGAL .equals (input )) {
@@ -274,11 +322,22 @@ public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
274322 }
275323 AllocatableValue result = results [i ];
276324 // move stack slot
277- move ((AMD64Kind ) input .getPlatformKind (), crb , masm , scratch .asValue (input .getValueKind ()), input );
278- move ((AMD64Kind ) result .getPlatformKind (), crb , masm , result , scratch .asValue (result .getValueKind ()));
325+ AMD64Kind inputKind = getCompatibleKind ((AMD64Kind ) input .getPlatformKind (), backupKind );
326+ move (inputKind , crb , masm , scratch .asValue (LIRKind .value (inputKind )), input , encoding );
327+ AMD64Kind resultKind = getCompatibleKind ((AMD64Kind ) result .getPlatformKind (), backupKind );
328+ move (resultKind , crb , masm , result , scratch .asValue (LIRKind .value (resultKind )), encoding );
279329 }
280330 // restore scratch register
281- move (backupKind , crb , masm , scratch .asValue (backupSlot .getValueKind ()), backupSlot );
331+ move (backupKind , crb , masm , scratch .asValue (backupSlot .getValueKind ()), backupSlot , backupEnc );
332+ }
333+
334+ private static void move (AMD64Kind moveKind , CompilationResultBuilder crb , AMD64MacroAssembler masm ,
335+ AllocatableValue result , Value input , AMD64SIMDInstructionEncoding encoding ) {
336+ if (encoding != null && moveKind .getVectorLength () > 1 ) {
337+ AMD64VectorMove .move (crb , masm , result , input , encoding );
338+ } else {
339+ AMD64Move .move (moveKind , crb , masm , result , input );
340+ }
282341 }
283342 }
284343
0 commit comments