Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 93 additions & 6 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2244,17 +2244,49 @@ bool SIFrameLowering::allocateScavengingFrameIndexesNearIncomingSP(
return true;
}

static bool isLiveIntoMBB(MCRegister Reg, MachineBasicBlock &MBB,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This shouldn't be here. I'm still unclear on what the rules are for when register aliases will appear in the live in list. This also should account for the LaneMask

const TargetRegisterInfo *TRI) {
for (MCRegAliasIterator R(Reg, TRI, true); R.isValid(); ++R) {
if (MBB.isLiveIn(*R)) {
return true;
}
}
return false;
}

bool SIFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
if (!ST.useVGPRBlockOpsForCSR())
return false;
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo *SITRI = static_cast<const SIRegisterInfo *>(TRI);

if (!ST.useVGPRBlockOpsForCSR()) {
for (const CalleeSavedInfo &CS : CSI) {
// Insert the spill to the stack frame.
unsigned Reg = CS.getReg();

if (CS.isSpilledToReg()) {
BuildMI(MBB, MI, DebugLoc(), TII->get(TargetOpcode::COPY),
CS.getDstReg())
.addReg(Reg, getKillRegState(true));
} else {
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(
Reg, Reg == SITRI->getReturnAddressReg(*MF) ? MVT::i64 : MVT::i32);
// If this value was already livein, we probably have a direct use of
// the incoming register value, so don't kill at the spill point. This
// happens since we pass some special inputs (workgroup IDs) in the
// callee saved range.
const bool IsLiveIn = isLiveIntoMBB(Reg, MBB, TRI);
TII->storeRegToStackSlotCFI(MBB, MI, Reg, !IsLiveIn, CS.getFrameIdx(),
RC, TRI);
}
}
return true;
}

MachineFrameInfo &FrameInfo = MF->getFrameInfo();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
const SIInstrInfo *TII = ST.getInstrInfo();
SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();

const TargetRegisterClass *BlockRegClass =
Expand All @@ -2278,10 +2310,10 @@ bool SIFrameLowering::spillCalleeSavedRegisters(
FrameInfo.getObjectAlign(FrameIndex));

BuildMI(MBB, MI, MI->getDebugLoc(),
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_SAVE))
TII->get(AMDGPU::SI_BLOCK_SPILL_V1024_CFI_SAVE))
.addReg(Reg, getKillRegState(false))
.addFrameIndex(FrameIndex)
.addReg(MFI->getStackPtrOffsetReg())
.addReg(FuncInfo->getStackPtrOffsetReg())
.addImm(0)
.addImm(Mask)
.addMemOperand(MMO);
Expand Down Expand Up @@ -2467,6 +2499,22 @@ MachineInstr *SIFrameLowering::buildCFI(MachineBasicBlock &MBB,
.setMIFlag(flag);
}

MachineInstr *SIFrameLowering::buildCFIForVRegToVRegSpill(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const Register Reg, const Register RegCopy) const {
MachineFunction &MF = *MBB.getParent();
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

unsigned MaskReg = MCRI.getDwarfRegNum(
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
auto CFIInst = MCCFIInstruction::createLLVMVectorRegisterMask(
nullptr, MCRI.getDwarfRegNum(Reg, false),
MCRI.getDwarfRegNum(RegCopy, false), VGPRLaneBitSize, MaskReg,
ST.getWavefrontSize());
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
}

MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const Register SGPR, const Register VGPR,
Expand Down Expand Up @@ -2515,6 +2563,34 @@ MachineInstr *SIFrameLowering::buildCFIForSGPRToVGPRSpill(
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
}

MachineInstr *SIFrameLowering::buildCFIForSGPRToVMEMSpill(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned SGPR, int64_t Offset) const {
MachineFunction &MF = *MBB.getParent();
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
return buildCFI(MBB, MBBI, DL,
llvm::MCCFIInstruction::createOffset(
nullptr, MCRI.getDwarfRegNum(SGPR, false), Offset));
}

MachineInstr *SIFrameLowering::buildCFIForVGPRToVMEMSpill(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned VGPR, int64_t Offset) const {
const MachineFunction &MF = *MBB.getParent();
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

int DwarfVGPR = MCRI.getDwarfRegNum(VGPR, false);
assert(DwarfVGPR != -1);

unsigned MaskReg = MCRI.getDwarfRegNum(
ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC, false);
auto CFIInst = MCCFIInstruction::createLLVMVectorOffset(
nullptr, DwarfVGPR, VGPRLaneBitSize, MaskReg, ST.getWavefrontSize(),
Offset);
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
}

MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, const Register Reg, const Register SGPRPair) const {
Expand All @@ -2535,3 +2611,14 @@ MachineInstr *SIFrameLowering::buildCFIForRegToSGPRPairSpill(
nullptr, DwarfReg, DwarfSGPR0, SGPRBitSize, DwarfSGPR1, SGPRBitSize);
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
}

MachineInstr *
SIFrameLowering::buildCFIForSameValue(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register Reg) const {
const MachineFunction &MF = *MBB.getParent();
const MCRegisterInfo &MCRI = *MF.getContext().getRegisterInfo();
int DwarfReg = MCRI.getDwarfRegNum(Reg, /*isEH=*/false);
auto CFIInst = MCCFIInstruction::createSameValue(nullptr, DwarfReg);
return buildCFI(MBB, MBBI, DL, std::move(CFIInst));
}
22 changes: 22 additions & 0 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
const DebugLoc &DL, const MCCFIInstruction &CFIInst,
MachineInstr::MIFlag flag = MachineInstr::FrameSetup) const;

/// Create a CFI index describing a spill of the VGPR/AGPR \p Reg to another
/// VGPR/AGPR \p RegCopy and build a MachineInstr around it.
MachineInstr *buildCFIForVRegToVRegSpill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL,
const Register Reg,
const Register RegCopy) const;
/// Create a CFI index describing a spill of an SGPR to a single lane of
/// a VGPR and build a MachineInstr around it.
MachineInstr *buildCFIForSGPRToVGPRSpill(MachineBasicBlock &MBB,
Expand All @@ -134,10 +141,25 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register SGPR,
ArrayRef<SIRegisterInfo::SpilledReg> VGPRSpills) const;
/// Create a CFI index describing a spill of a SGPR to VMEM and
/// build a MachineInstr around it.
MachineInstr *buildCFIForSGPRToVMEMSpill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned SGPR,
int64_t Offset) const;
/// Create a CFI index describing a spill of a VGPR to VMEM and
/// build a MachineInstr around it.
MachineInstr *buildCFIForVGPRToVMEMSpill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, unsigned VGPR,
int64_t Offset) const;
MachineInstr *buildCFIForRegToSGPRPairSpill(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register Reg,
Register SGPRPair) const;
MachineInstr *buildCFIForSameValue(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register Reg) const;
// Returns true if the function may need to reserve space on the stack for the
// CWSR trap handler.
bool mayReserveScratchForCWSR(const MachineFunction &MF) const;
Expand Down
Loading
Loading