@@ -490,6 +490,11 @@ namespace vm
490490
491491namespace spu
492492{
493+ struct raw_spu_atomic_info_t
494+ {
495+ std::array<atomic_t <spu_atomic_op_info_for_group>, 8 > raw_atomic_ops;
496+ };
497+
493498 namespace scheduler
494499 {
495500 std::array<atomic_t <u8 >, 65536 > atomic_instruction_table = {};
@@ -4699,6 +4704,159 @@ u32 evaluate_spin_optimization(std::span<u8> stats, u64 evaluate_time, const cfg
46994704 return busy_waiting_switch;
47004705}
47014706
4707+ inline u8 spu_to_index (const spu_thread* spu) noexcept
4708+ {
4709+ return spu->group ? (spu->lv2_id >> 24 ) : spu->lv2_id ;
4710+ }
4711+
4712+ inline std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& get_spu_atomic_op_info (const spu_thread* spu) noexcept
4713+ {
4714+ return spu->group ? spu->group ->atomic_ops : g_fxo->get <spu::raw_spu_atomic_info_t >().raw_atomic_ops ;
4715+ }
4716+
4717+ // To be used by GETLLAR
4718+ // Returns none-zero if needs to wait
4719+ int test_and_update_atomic_op_info (std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4720+ {
4721+ auto info = spu_info[index].load ();
4722+
4723+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4724+ {
4725+ if (info.addr % 128 )
4726+ {
4727+ info.addr &= -128 ;
4728+ spu_info[index].release (info);
4729+ return 0 ;
4730+ }
4731+
4732+ // Repeated GETLLAR: disable entry
4733+ }
4734+
4735+ info = {};
4736+
4737+ spu_info[index].release (info);
4738+
4739+ for (usz i = 0 ; i < spu_info.size (); i++)
4740+ {
4741+ info = spu_info[i].load ();
4742+
4743+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4744+ {
4745+ int wait = 0 ;
4746+
4747+ spu_info[i].fetch_op ([&](spu_atomic_op_info_for_group& value)
4748+ {
4749+ wait = 0 ;
4750+
4751+ if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4752+ {
4753+ if (value.addr % 128 == 0 )
4754+ {
4755+ wait = 2 ;
4756+ return false ;
4757+ }
4758+
4759+ if (value.addr & (1u << index))
4760+ {
4761+ value.addr &= ~(1u << index);
4762+ wait = 1 ;
4763+ return true ;
4764+ }
4765+ }
4766+
4767+ return false ;
4768+ });
4769+
4770+ if (wait)
4771+ {
4772+ return wait;
4773+ }
4774+ }
4775+ }
4776+
4777+ return 0 ;
4778+ }
4779+
4780+ // To be used when PUTLLC finishes to create a temporary barrier until the SPURS loop restarts
4781+ void downgrade_to_temporary_atomic_op_info (std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4782+ {
4783+ auto info = spu_info[index].load ();
4784+
4785+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4786+ {
4787+ info.addr |= 127 ;
4788+ spu_info[index].release (info);
4789+ return ;
4790+ }
4791+
4792+ info = {};
4793+ spu_info[index].release (info);
4794+ }
4795+
4796+ void release_atomic_op_info (std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& spu_info, u8 index)
4797+ {
4798+ spu_info[index].release (spu_atomic_op_info_for_group{});
4799+ }
4800+
4801+ // To be used by PUTLLC initiates
4802+ // Returns none-zero if needs to wait
4803+ int init_atomic_op_info (std::array<atomic_t <spu_atomic_op_info_for_group>, 8 >& spu_info, u8 index, u32 raddr, u32 getllar_pc)
4804+ {
4805+ // Initialiy store locked entry with temporary lock
4806+ spu_atomic_op_info_for_group info{};
4807+ info.addr = raddr | 127 ;
4808+ info.getllar = getllar_pc;
4809+
4810+ spu_info[index].release (info);
4811+
4812+ for (usz i = 0 ; i < spu_info.size (); i++)
4813+ {
4814+ if (i == index)
4815+ {
4816+ continue ;
4817+ }
4818+
4819+ info = spu_info[i].load ();
4820+
4821+ if (info.addr / 128 == raddr / 128 && info.getllar == getllar_pc)
4822+ {
4823+ int wait = 0 ;
4824+
4825+ spu_info[i].fetch_op ([&](spu_atomic_op_info_for_group& value)
4826+ {
4827+ wait = 0 ;
4828+
4829+ if (value.addr / 128 == raddr / 128 && value.getllar == getllar_pc)
4830+ {
4831+ if (value.addr % 128 == 0 )
4832+ {
4833+ wait = 2 ;
4834+ return false ;
4835+ }
4836+
4837+ if (value.addr & (1u << index))
4838+ {
4839+ value.addr &= ~(1u << index);
4840+ wait = 1 ;
4841+ return true ;
4842+ }
4843+ }
4844+
4845+ return false ;
4846+ });
4847+
4848+ return wait;
4849+ }
4850+ }
4851+
4852+ // If exclusive, upgrade to full lock
4853+ info.addr = raddr;
4854+ info.getllar = getllar_pc;
4855+ spu_info[index].store (info);
4856+
4857+ return 0 ;
4858+ }
4859+
47024860bool spu_thread::process_mfc_cmd ()
47034861{
47044862 // Stall infinitely if MFC queue is full
@@ -5015,11 +5173,50 @@ bool spu_thread::process_mfc_cmd()
50155173 last_getllar = pc;
50165174 last_gtsc = perf0.get ();
50175175 }
5176+ else
5177+ {
5178+ last_getllar = pc;
5179+ }
50185180
50195181 last_getllar_addr = addr;
50205182 getllar_spin_count = 0 ;
50215183 getllar_busy_waiting_switch = umax;
50225184
5185+ if (ch_mfc_cmd.eal == spurs_addr)
5186+ {
5187+ u64 timeout = 0 ;
5188+
5189+ while (true )
5190+ {
5191+ const int wait = test_and_update_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ), ch_mfc_cmd.eal , last_getllar);
5192+
5193+ if (!wait)
5194+ {
5195+ break ;
5196+ }
5197+
5198+ const u64 current = get_system_time ();
5199+
5200+ if (!timeout)
5201+ {
5202+ timeout = current + g_cfg.core .spu_delay_penalty * 1000 ;
5203+ }
5204+ else if (current >= timeout)
5205+ {
5206+ break ;
5207+ }
5208+
5209+ if (wait == 2 )
5210+ {
5211+ std::this_thread::yield ();
5212+ }
5213+ else
5214+ {
5215+ busy_wait (50000 );
5216+ }
5217+ }
5218+ }
5219+
50235220 u64 ntime = 0 ;
50245221 rsx::reservation_lock rsx_lock (addr, 128 );
50255222
@@ -5232,6 +5429,41 @@ bool spu_thread::process_mfc_cmd()
52325429 }
52335430 }
52345431
5432+ if (ch_mfc_cmd.eal == spurs_addr)
5433+ {
5434+ u64 timeout = 0 ;
5435+
5436+ while (true )
5437+ {
5438+ const int wait = init_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ), raddr, last_getllar);
5439+
5440+ if (!wait)
5441+ {
5442+ break ;
5443+ }
5444+
5445+ const u64 current = get_system_time ();
5446+
5447+ if (!timeout)
5448+ {
5449+ timeout = current + g_cfg.core .spu_delay_penalty * 1000 ;
5450+ }
5451+ else if (current >= timeout)
5452+ {
5453+ break ;
5454+ }
5455+
5456+ if (wait == 2 )
5457+ {
5458+ std::this_thread::yield ();
5459+ }
5460+ else
5461+ {
5462+ busy_wait (50000 );
5463+ }
5464+ }
5465+ }
5466+
52355467 if (do_putllc (ch_mfc_cmd))
52365468 {
52375469 ch_atomic_stat.set_value (MFC_PUTLLC_SUCCESS);
@@ -5299,6 +5531,7 @@ bool spu_thread::process_mfc_cmd()
52995531 std::memcpy (dump.data , _ptr<u8 >(ch_mfc_cmd.lsa & 0x3ff80 ), 128 );
53005532 }
53015533
5534+ downgrade_to_temporary_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ), raddr, last_getllar);
53025535 static_cast <void >(test_stopped ());
53035536 return true ;
53045537 }
@@ -6180,7 +6413,9 @@ s64 spu_thread::get_ch_value(u32 ch)
61806413
61816414 eventstat_busy_waiting_switch = value ? 1 : 0 ;
61826415 }
6183-
6416+
6417+ release_atomic_op_info (get_spu_atomic_op_info (this ), spu_to_index (this ));
6418+
61846419 for (bool is_first = true ; !events.count ; events = get_events (mask1 & ~SPU_EVENT_LR, true , true ), is_first = false )
61856420 {
61866421 const auto old = +state;
0 commit comments