Skip to content

Commit 8d667a9

Browse files
committed
SPU: SPURS oriented thread waiting
1 parent 16f619d commit 8d667a9

File tree

3 files changed

+246
-4
lines changed

3 files changed

+246
-4
lines changed

rpcs3/Emu/Cell/SPUThread.cpp

Lines changed: 236 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,11 @@ namespace vm
490490

491491
namespace spu
492492
{
493+
struct raw_spu_atomic_info_t
494+
{
495+
std::array<atomic_t<spu_atomic_op_info_for_group>, 8> raw_atomic_ops;
496+
};
497+
493498
namespace scheduler
494499
{
495500
std::array<atomic_t<u8>, 65536> atomic_instruction_table = {};
@@ -4699,6 +4704,159 @@ u32 evaluate_spin_optimization(std::span<u8> stats, u64 evaluate_time, const cfg
46994704
return busy_waiting_switch;
47004705
}
47014706

4707+
inline u8 spu_to_index(const spu_thread* spu) noexcept
4708+
{
4709+
return spu->group ? (spu->lv2_id >> 24) : spu->lv2_id;
4710+
}
4711+
4712+
inline std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& get_spu_atomic_op_info(const spu_thread* spu) noexcept
4713+
{
4714+
return spu->group ? spu->group->atomic_ops : g_fxo->get<spu::raw_spu_atomic_info_t>().raw_atomic_ops;
4715+
}
4716+
4717+
// To be used by GETLLAR
4718+
// Returns none-zero if needs to wait
4719+
int test_and_update_atomic_op_info(std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4720+
{
4721+
auto info = spu_info[index].load();
4722+
4723+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4724+
{
4725+
if (info.addr % 128)
4726+
{
4727+
info.addr &= -128;
4728+
spu_info[index].release(info);
4729+
return 0;
4730+
}
4731+
4732+
// Repeated GETLLAR: disable entry
4733+
}
4734+
4735+
info = {};
4736+
4737+
spu_info[index].release(info);
4738+
4739+
for (usz i = 0; i < spu_info.size(); i++)
4740+
{
4741+
info = spu_info[i].load();
4742+
4743+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4744+
{
4745+
int wait = 0;
4746+
4747+
spu_info[i].fetch_op([&](spu_atomic_op_info_for_group& value)
4748+
{
4749+
wait = 0;
4750+
4751+
if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4752+
{
4753+
if (value.addr % 128 == 0)
4754+
{
4755+
wait = 2;
4756+
return false;
4757+
}
4758+
4759+
if (value.addr & (1u << index))
4760+
{
4761+
value.addr &= ~(1u << index);
4762+
wait = 1;
4763+
return true;
4764+
}
4765+
}
4766+
4767+
return false;
4768+
});
4769+
4770+
if (wait)
4771+
{
4772+
return wait;
4773+
}
4774+
}
4775+
}
4776+
4777+
return 0;
4778+
}
4779+
4780+
// To be used when PUTLLC finishes to create a temporary barrier until the SPURS loop restarts
4781+
void downgrade_to_temporary_atomic_op_info(std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4782+
{
4783+
auto info = spu_info[index].load();
4784+
4785+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4786+
{
4787+
info.addr |= 127;
4788+
spu_info[index].release(info);
4789+
return;
4790+
}
4791+
4792+
info = {};
4793+
spu_info[index].release(info);
4794+
}
4795+
4796+
void release_atomic_op_info(std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& spu_info, u8 index)
4797+
{
4798+
spu_info[index].release(spu_atomic_op_info_for_group{});
4799+
}
4800+
4801+
// To be used by PUTLLC initiates
4802+
// Returns none-zero if needs to wait
4803+
int init_atomic_op_info(std::array<atomic_t<spu_atomic_op_info_for_group>, 8>& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4804+
{
4805+
// Initialiy store locked entry with temporary lock
4806+
spu_atomic_op_info_for_group info{};
4807+
info.addr = raddr | 127;
4808+
info.getllar = getllar_pc;
4809+
4810+
spu_info[index].release(info);
4811+
4812+
for (usz i = 0; i < spu_info.size(); i++)
4813+
{
4814+
if (i == index)
4815+
{
4816+
continue;
4817+
}
4818+
4819+
info = spu_info[i].load();
4820+
4821+
if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4822+
{
4823+
int wait = 0;
4824+
4825+
spu_info[i].fetch_op([&](spu_atomic_op_info_for_group& value)
4826+
{
4827+
wait = 0;
4828+
4829+
if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4830+
{
4831+
if (value.addr % 128 == 0)
4832+
{
4833+
wait = 2;
4834+
return false;
4835+
}
4836+
4837+
if (value.addr & (1u << index))
4838+
{
4839+
value.addr &= ~(1u << index);
4840+
wait = 1;
4841+
return true;
4842+
}
4843+
}
4844+
4845+
return false;
4846+
});
4847+
4848+
return wait;
4849+
}
4850+
}
4851+
4852+
// If exclusive, upgrade to full lock
4853+
info.addr = raddr;
4854+
info.getllar = getllar_pc;
4855+
spu_info[index].store(info);
4856+
4857+
return 0;
4858+
}
4859+
47024860
bool spu_thread::process_mfc_cmd()
47034861
{
47044862
// Stall infinitely if MFC queue is full
@@ -5015,11 +5173,50 @@ bool spu_thread::process_mfc_cmd()
50155173
last_getllar = pc;
50165174
last_gtsc = perf0.get();
50175175
}
5176+
else
5177+
{
5178+
last_getllar = pc;
5179+
}
50185180

50195181
last_getllar_addr = addr;
50205182
getllar_spin_count = 0;
50215183
getllar_busy_waiting_switch = umax;
50225184

5185+
if (ch_mfc_cmd.eal == spurs_addr)
5186+
{
5187+
u64 timeout = 0;
5188+
5189+
while (true)
5190+
{
5191+
const int wait = test_and_update_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this), ch_mfc_cmd.eal, last_getllar);
5192+
5193+
if (!wait)
5194+
{
5195+
break;
5196+
}
5197+
5198+
const u64 current = get_system_time();
5199+
5200+
if (!timeout)
5201+
{
5202+
timeout = current + g_cfg.core.spu_delay_penalty * 1000;
5203+
}
5204+
else if (current >= timeout)
5205+
{
5206+
break;
5207+
}
5208+
5209+
if (wait == 2)
5210+
{
5211+
std::this_thread::yield();
5212+
}
5213+
else
5214+
{
5215+
busy_wait(50000);
5216+
}
5217+
}
5218+
}
5219+
50235220
u64 ntime = 0;
50245221
rsx::reservation_lock rsx_lock(addr, 128);
50255222

@@ -5232,6 +5429,41 @@ bool spu_thread::process_mfc_cmd()
52325429
}
52335430
}
52345431

5432+
if (ch_mfc_cmd.eal == spurs_addr)
5433+
{
5434+
u64 timeout = 0;
5435+
5436+
while (true)
5437+
{
5438+
const int wait = init_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this), raddr, last_getllar);
5439+
5440+
if (!wait)
5441+
{
5442+
break;
5443+
}
5444+
5445+
const u64 current = get_system_time();
5446+
5447+
if (!timeout)
5448+
{
5449+
timeout = current + g_cfg.core.spu_delay_penalty * 1000;
5450+
}
5451+
else if (current >= timeout)
5452+
{
5453+
break;
5454+
}
5455+
5456+
if (wait == 2)
5457+
{
5458+
std::this_thread::yield();
5459+
}
5460+
else
5461+
{
5462+
busy_wait(50000);
5463+
}
5464+
}
5465+
}
5466+
52355467
if (do_putllc(ch_mfc_cmd))
52365468
{
52375469
ch_atomic_stat.set_value(MFC_PUTLLC_SUCCESS);
@@ -5299,6 +5531,7 @@ bool spu_thread::process_mfc_cmd()
52995531
std::memcpy(dump.data, _ptr<u8>(ch_mfc_cmd.lsa & 0x3ff80), 128);
53005532
}
53015533

5534+
downgrade_to_temporary_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this), raddr, last_getllar);
53025535
static_cast<void>(test_stopped());
53035536
return true;
53045537
}
@@ -6180,7 +6413,9 @@ s64 spu_thread::get_ch_value(u32 ch)
61806413

61816414
eventstat_busy_waiting_switch = value ? 1 : 0;
61826415
}
6183-
6416+
6417+
release_atomic_op_info(get_spu_atomic_op_info(this), spu_to_index(this));
6418+
61846419
for (bool is_first = true; !events.count; events = get_events(mask1 & ~SPU_EVENT_LR, true, true), is_first = false)
61856420
{
61866421
const auto old = +state;

rpcs3/Emu/Cell/SPUThread.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,12 @@ struct spu_imm_table_t
497497

498498
extern const spu_imm_table_t g_spu_imm;
499499

500+
struct spu_atomic_op_info_for_group
501+
{
502+
u32 addr;
503+
u32 getllar;
504+
};
505+
500506
enum FPSCR_EX
501507
{
502508
//Single-precision exceptions

rpcs3/Emu/Cell/lv2/sys_spu.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,9 +297,10 @@ struct lv2_spu_group
297297
bool set_terminate = false;
298298

299299
std::array<shared_ptr<named_thread<spu_thread>>, 8> threads; // SPU Threads
300-
std::array<s8, 256> threads_map; // SPU Threads map based number
301-
std::array<std::pair<u32, std::vector<sys_spu_segment>>, 8> imgs; // Entry points, SPU image segments
302-
std::array<std::array<u64, 4>, 8> args; // SPU Thread Arguments
300+
std::array<s8, 256> threads_map{}; // SPU Threads map based number
301+
std::array<std::pair<u32, std::vector<sys_spu_segment>>, 8> imgs{}; // Entry points, SPU image segments
302+
std::array<std::array<u64, 4>, 8> args{}; // SPU Thread Arguments
303+
std::array<atomic_t<spu_atomic_op_info_for_group>, 8> atomic_ops{};
303304

304305
shared_ptr<lv2_event_queue> ep_run; // port for SYS_SPU_THREAD_GROUP_EVENT_RUN events
305306
shared_ptr<lv2_event_queue> ep_exception; // TODO: SYS_SPU_THREAD_GROUP_EVENT_EXCEPTION

0 commit comments

Comments
 (0)