Skip to content

Commit 6852410

Browse files
committed
Fix shfl_down test
1 parent ec429a2 commit 6852410

File tree

1 file changed

+12
-31
lines changed

1 file changed

+12
-31
lines changed

test/intrinsics.jl

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -41,41 +41,21 @@ function test_subgroup_kernel(results)
4141
return
4242
end
4343

44-
# Do NOT use this kernel as an example for your code.
45-
# It was written assuming one workgroup of size 32 and
46-
# is only valid for those
4744
function shfl_down_test_kernel(a, b, ::Val{N}) where N
48-
# This is not valid
4945
idx = KI.get_sub_group_local_id()
5046

51-
temp = KI.localmemory(eltype(b), N)
52-
temp[idx] = a[idx]
47+
val = a[idx]
5348

54-
KI.barrier()
55-
56-
if idx == 1
57-
value = temp[idx]
58-
59-
if KI.get_sub_group_size() > 32
60-
value = value + KI.shfl_down(value, 32)
61-
KI.sub_group_barrier()
62-
end
63-
value = value + KI.shfl_down(value, 16)
64-
KI.sub_group_barrier()
65-
66-
value = value + KI.shfl_down(value, 8)
67-
KI.sub_group_barrier()
68-
69-
value = value + KI.shfl_down(value, 4)
70-
KI.sub_group_barrier()
71-
72-
value = value + KI.shfl_down(value, 2)
73-
KI.sub_group_barrier()
49+
offset = 0x00000001
50+
while offset < N
51+
val += KI.shfl_down(val, offset)
52+
offset <<= 1
53+
end
7454

75-
value = value + KI.shfl_down(value, 1)
76-
KI.sub_group_barrier()
55+
KI.sub_group_barrier()
7756

78-
b[idx] = value
57+
if idx == 1
58+
b[idx] = val
7959
end
8060
return
8161
end
@@ -215,8 +195,9 @@ function intrinsics_testsuite(backend, AT)
215195
end
216196
@testset "shfl_down(::$T)" for T in KI.shfl_down_types(backend())
217197
N = KI.sub_group_size(backend())
218-
a = zeros(T, N)
219-
rand!(a, (1:4))
198+
a = ones(T, N)
199+
# a = zeros(T, N)
200+
# rand!(a, (1:4))
220201

221202
dev_a = AT(a)
222203
dev_b = AT(zeros(T, N))

0 commit comments

Comments
 (0)