@@ -1211,16 +1211,81 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
12111211 llvm_unreachable (" AAAMDWavesPerEU is only valid for function position" );
12121212}
12131213
1214- static bool inlineAsmUsesAGPRs (const InlineAsm *IA) {
1215- for (const auto &CI : IA->ParseConstraints ()) {
1214+ // / Compute the minimum number of AGPRs required to allocate the inline asm.
1215+ static unsigned inlineAsmGetNumRequiredAGPRs (const InlineAsm *IA,
1216+ const CallBase &Call) {
1217+ unsigned ArgNo = 0 ;
1218+ unsigned ResNo = 0 ;
1219+ unsigned AGPRDefCount = 0 ;
1220+ unsigned AGPRUseCount = 0 ;
1221+ unsigned MaxPhysReg = 0 ;
1222+ const DataLayout &DL = Call.getFunction ()->getParent ()->getDataLayout ();
1223+
1224+ // TODO: Overestimates due to not accounting for tied operands
1225+ for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints ()) {
1226+ Type *Ty = nullptr ;
1227+ switch (CI.Type ) {
1228+ case InlineAsm::isOutput: {
1229+ Ty = Call.getType ();
1230+ if (auto *STy = dyn_cast<StructType>(Ty))
1231+ Ty = STy->getElementType (ResNo);
1232+ ++ResNo;
1233+ break ;
1234+ }
1235+ case InlineAsm::isInput: {
1236+ Ty = Call.getArgOperand (ArgNo++)->getType ();
1237+ break ;
1238+ }
1239+ case InlineAsm::isLabel:
1240+ continue ;
1241+ case InlineAsm::isClobber:
1242+ // Parse the physical register reference.
1243+ break ;
1244+ }
1245+
12161246 for (StringRef Code : CI.Codes ) {
1217- Code.consume_front (" {" );
1218- if (Code.starts_with (" a" ))
1219- return true ;
1247+ unsigned RegCount = 0 ;
1248+ if (Code.starts_with (" a" )) {
1249+ // Virtual register, compute number of registers based on the type.
1250+ //
1251+ // We ought to be going through TargetLowering to get the number of
1252+ // registers, but we should avoid the dependence on CodeGen here.
1253+ RegCount = divideCeil (DL.getTypeSizeInBits (Ty), 32 );
1254+ } else {
1255+ // Physical register reference
1256+ auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg (Code);
1257+ if (Kind == ' a' ) {
1258+ RegCount = NumRegs;
1259+ MaxPhysReg = std::max (MaxPhysReg, std::min (RegIdx + NumRegs, 256u ));
1260+ }
1261+
1262+ continue ;
1263+ }
1264+
1265+ if (CI.Type == InlineAsm::isOutput) {
1266+ // Apply tuple alignment requirement
1267+ //
1268+ // TODO: This is more conservative than necessary.
1269+ AGPRDefCount = alignTo (AGPRDefCount, RegCount);
1270+
1271+ AGPRDefCount += RegCount;
1272+ if (CI.isEarlyClobber ) {
1273+ AGPRUseCount = alignTo (AGPRUseCount, RegCount);
1274+ AGPRUseCount += RegCount;
1275+ }
1276+ } else {
1277+ AGPRUseCount = alignTo (AGPRUseCount, RegCount);
1278+ AGPRUseCount += RegCount;
1279+ }
12201280 }
12211281 }
12221282
1223- return false ;
1283+ unsigned MaxVirtReg = std::max (AGPRUseCount, AGPRDefCount);
1284+
1285+ // TODO: This is overly conservative. If there are any physical registers,
1286+ // allocate any virtual registers after them so we don't have to solve optimal
1287+ // packing.
1288+ return std::min (MaxVirtReg + MaxPhysReg, 256u );
12241289}
12251290
12261291// TODO: Migrate to range merge of amdgpu-agpr-alloc.
@@ -1259,7 +1324,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
12591324 const Function *Callee = dyn_cast<Function>(CalleeOp);
12601325 if (!Callee) {
12611326 if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
1262- return ! inlineAsmUsesAGPRs (IA) ;
1327+ return inlineAsmGetNumRequiredAGPRs (IA, CB) == 0 ;
12631328 return false ;
12641329 }
12651330
0 commit comments