Skip to content

Commit 73de80a

Browse files
committed
s390x: implement & test smul_overflow
1 parent b3ed4b7 commit 73de80a

2 files changed

Lines changed: 97 additions & 0 deletions

File tree

cranelift/codegen/src/isa/s390x/lower.isle

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4060,6 +4060,18 @@
40604060
(rule 1 (lower (has_type (fits_in_32 ty) (umul_overflow x y)))
40614061
(split_into_result_and_overflow ty (mul_reg $I64 (zext64_reg ty x) (zext64_reg ty y))))
40624062

4063+
;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4064+
4065+
;; Use flags generated by the add instruction to handle overflow
4066+
(rule 0 (lower (has_type $I64 (smul_overflow x y)))
4067+
;; Note: x y is backwards to avoid emitting 2 spurious lgr instructions
4068+
;; See the diff above in the umul_overflow section
4069+
(let ((intermediate RegPair (smul_wide y x)))
4070+
(output_pair (regpair_hi intermediate) (regpair_lo intermediate))))
4071+
4072+
(rule 1 (lower (has_type (fits_in_32 ty) (smul_overflow x y)))
4073+
(split_into_result_and_overflow ty (mul_reg $I64 (sext64_reg ty y) (sext64_reg ty x))))
4074+
40634075
;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
40644076

40654077
(rule (lower (return args))
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
test compile precise-output
2+
target s390x
3+
4+
function %f2(i8, i8) -> i8, i8 {
5+
block0(v0: i8, v1: i8):
6+
v2, v3 = smul_overflow v0, v1
7+
return v2, v3
8+
}
9+
10+
; VCode:
11+
; block0:
12+
; lgbr %r5, %r3
13+
; lgbr %r3, %r2
14+
; msgrkc %r2, %r5, %r3
15+
; srlk %r3, %r2, 8
16+
; br %r14
17+
;
18+
; Disassembled:
19+
; block0: ; offset 0x0
20+
; lgbr %r5, %r3
21+
; lgbr %r3, %r2
22+
; msgrkc %r2, %r5, %r3
23+
; srlk %r3, %r2, 8
24+
; br %r14
25+
26+
function %f2(i16, i16) -> i16, i8 {
27+
block0(v0: i16, v1: i16):
28+
v2, v3 = smul_overflow v0, v1
29+
return v2, v3
30+
}
31+
32+
; VCode:
33+
; block0:
34+
; lghr %r5, %r3
35+
; lghr %r3, %r2
36+
; msgrkc %r2, %r5, %r3
37+
; srlk %r3, %r2, 16
38+
; br %r14
39+
;
40+
; Disassembled:
41+
; block0: ; offset 0x0
42+
; lghr %r5, %r3
43+
; lghr %r3, %r2
44+
; msgrkc %r2, %r5, %r3
45+
; srlk %r3, %r2, 0x10
46+
; br %r14
47+
48+
function %f2(i32, i32) -> i32, i8 {
49+
block0(v0: i32, v1: i32):
50+
v2, v3 = smul_overflow v0, v1
51+
return v2, v3
52+
}
53+
54+
; VCode:
55+
; block0:
56+
; lgfr %r5, %r3
57+
; lgfr %r3, %r2
58+
; msgrkc %r2, %r5, %r3
59+
; srlg %r3, %r2, 32
60+
; br %r14
61+
;
62+
; Disassembled:
63+
; block0: ; offset 0x0
64+
; lgfr %r5, %r3
65+
; lgfr %r3, %r2
66+
; msgrkc %r2, %r5, %r3
67+
; srlg %r3, %r2, 0x20
68+
; br %r14
69+
70+
function %f4(i64, i64) -> i64, i8 {
71+
block0(v0: i64, v1: i64):
72+
v2, v3 = smul_overflow v0, v1
73+
return v2, v3
74+
}
75+
76+
; VCode:
77+
; block0:
78+
; mgrk %r2, %r3, %r2
79+
; br %r14
80+
;
81+
; Disassembled:
82+
; block0: ; offset 0x0
83+
; mgrk %r2, %r3, %r2
84+
; br %r14
85+

0 commit comments

Comments
 (0)