@@ -46,21 +46,49 @@ struct inclusive_scan
4646
4747 // NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
4848
49- type_t operator ()(NBL_CONST_REF_ARG (type_t) value)
49+ // type_t operator()(NBL_CONST_REF_ARG(type_t) value)
50+ // {
51+ // binop_t binop;
52+ // type_t retval;
53+ // retval[0] = value[0];
54+ // [unroll]
55+ // for (uint32_t i = 1; i < ItemsPerInvocation; i++)
56+ // retval[i] = binop(retval[i-1], value[i]);
57+
58+ // exclusive_scan_op_t op;
59+ // scalar_t exclusive = op(retval[ItemsPerInvocation-1]);
60+
61+ // [unroll]
62+ // for (uint32_t i = 0; i < ItemsPerInvocation; i++)
63+ // retval[i] = binop(retval[i], exclusive);
64+ // return retval;
65+ // }
66+
67+ type_t operator ()(type_t value)
5068 {
5169 binop_t binop;
5270 type_t retval;
53- retval[0 ] = value[0 ];
71+
72+ // rhs = shuffleUp
73+ type_t rhs = glsl::subgroupShuffleUp<type_t>(value, 1u);
74+ // value = op(value, is 1st invoc ? op::identity : rhs)
75+ value = binop (value, hlsl::mix (binop_t::identity, rhs, bool (glsl::gl_SubgroupInvocationID ())));
76+
77+ // ex_scan = exclusive_scan(value)
78+ type_t exclusive;
79+ exclusive[0 ] = binop_t::identity;
5480 [unroll]
5581 for (uint32_t i = 1 ; i < ItemsPerInvocation; i++)
56- retval[i] = binop (retval[i-1 ], value[i]);
82+ exclusive[i] = binop (value[i-1 ], exclusive[i-1 ]);
83+ // last_ex_scan = broadcast_last(ex_scan)
84+ exclusive = BroadcastLast<type_t>(exclusive);
5785
58- exclusive_scan_op_t op;
59- scalar_t exclusive = op (retval[ItemsPerInvocation- 1 ]);
86+ // for i in 0->N
87+ // retval[i] = op(value[i], last_ex_scan[i])
6088
6189 [unroll]
6290 for (uint32_t i = 0 ; i < ItemsPerInvocation; i++)
63- retval[i] = binop (retval [i], exclusive);
91+ retval[i] = binop (value [i], exclusive[i] );
6492 return retval;
6593 }
6694};
@@ -75,19 +103,35 @@ struct exclusive_scan
75103
76104 // NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = vector_traits<T>::Dimension;
77105
106+ // type_t operator()(type_t value)
107+ // {
108+ // inclusive_scan_op_t op;
109+ // value = op(value);
110+
111+ // type_t left = glsl::subgroupShuffleUp<type_t>(value,1);
112+
113+ // type_t retval;
114+ // retval[0] = hlsl::mix(binop_t::identity, left[ItemsPerInvocation-1], bool(glsl::gl_SubgroupInvocationID()));
115+ // [unroll]
116+ // for (uint32_t i = 1; i < ItemsPerInvocation; i++)
117+ // retval[i] = value[i-1];
118+ // return retval;
119+ // }
120+
78121 type_t operator ()(type_t value)
79122 {
80123 inclusive_scan_op_t op;
81124 value = op (value);
82125
83- type_t left = glsl::subgroupShuffleUp<type_t>(value,1 );
126+ const uint32_t SubgroupSizeMinusOne = config_t::Size - 1u;
127+ type_t left = ItemsPerInvocation > 1u ? glsl::subgroupShuffle<type_t>(value,(glsl::gl_SubgroupInvocationID ()+SubgroupSizeMinusOne)&SubgroupSizeMinusOne) : glsl::subgroupShuffleUp<type_t>(value,1 );
84128
85- type_t retval ;
86- retval [0 ] = hlsl:: mix ( binop_t::identity, left[ItemsPerInvocation- 1 ], bool (glsl:: gl_SubgroupInvocationID ())) ;
129+ type_t newFirst ;
130+ newFirst [0 ] = binop_t::identity;
87131 [unroll]
88132 for (uint32_t i = 1 ; i < ItemsPerInvocation; i++)
89- retval [i] = value [i-1 ];
90- return retval ;
133+ newFirst [i] = left [i-1 ];
134+ return hlsl:: mix (newFirst, left, bool (glsl:: gl_SubgroupInvocationID ())) ;
91135 }
92136};
93137
0 commit comments