File tree Expand file tree Collapse file tree 1 file changed +12
-31
lines changed
Expand file tree Collapse file tree 1 file changed +12
-31
lines changed Original file line number Diff line number Diff line change @@ -41,41 +41,21 @@ function test_subgroup_kernel(results)
4141 return
4242end
4343
44- # Do NOT use this kernel as an example for your code.
45- # It was written assuming one workgroup of size 32 and
46- # is only valid for those
4744function shfl_down_test_kernel (a, b, :: Val{N} ) where N
48- # This is not valid
4945 idx = KI. get_sub_group_local_id ()
5046
51- temp = KI. localmemory (eltype (b), N)
52- temp[idx] = a[idx]
47+ val = a[idx]
5348
54- KI. barrier ()
55-
56- if idx == 1
57- value = temp[idx]
58-
59- if KI. get_sub_group_size () > 32
60- value = value + KI. shfl_down (value, 32 )
61- KI. sub_group_barrier ()
62- end
63- value = value + KI. shfl_down (value, 16 )
64- KI. sub_group_barrier ()
65-
66- value = value + KI. shfl_down (value, 8 )
67- KI. sub_group_barrier ()
68-
69- value = value + KI. shfl_down (value, 4 )
70- KI. sub_group_barrier ()
71-
72- value = value + KI. shfl_down (value, 2 )
73- KI. sub_group_barrier ()
49+ offset = 0x00000001
50+ while offset < N
51+ val += KI. shfl_down (val, offset)
52+ offset <<= 1
53+ end
7454
75- value = value + KI. shfl_down (value, 1 )
76- KI. sub_group_barrier ()
55+ KI. sub_group_barrier ()
7756
78- b[idx] = value
57+ if idx == 1
58+ b[idx] = val
7959 end
8060 return
8161end
@@ -215,8 +195,9 @@ function intrinsics_testsuite(backend, AT)
215195 end
216196 @testset " shfl_down(::$T )" for T in KI. shfl_down_types (backend ())
217197 N = KI. sub_group_size (backend ())
218- a = zeros (T, N)
219- rand! (a, (1 : 4 ))
198+ a = ones (T, N)
199+ # a = zeros(T, N)
200+ # rand!(a, (1:4))
220201
221202 dev_a = AT (a)
222203 dev_b = AT (zeros (T, N))
You can’t perform that action at this time.
0 commit comments