Skip to content

Commit d06fd9e

Browse files
S390x: implement arith overflow (#12523)
* s390x: implement & test uadd_overflow for fits-in-16 & i128 integers * s390x: implement & test usub_overflow * s390x: implement & test sadd_overflow * s390x: implement & test ssub_overflow * s390x: implement & test umul_overflow * s390x: implement & test smul_overflow
1 parent f8d0aee commit d06fd9e

File tree

17 files changed

+922
-22
lines changed

17 files changed

+922
-22
lines changed

cranelift/codegen/src/isa/s390x/inst.isle

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,8 +1115,10 @@
11151115
(SubLogical64)
11161116
(SubLogical64Ext32)
11171117
(Mul32)
1118+
(Mul32CC)
11181119
(Mul32Ext16)
11191120
(Mul64)
1121+
(Mul64CC)
11201122
(Mul64Ext16)
11211123
(Mul64Ext32)
11221124
(And32)
@@ -1205,11 +1207,13 @@
12051207
(Add32x4)
12061208
(Add64x2)
12071209
(Add128)
1210+
(Add128Cout)
12081211
(Sub8x16)
12091212
(Sub16x8)
12101213
(Sub32x4)
12111214
(Sub64x2)
12121215
(Sub128)
1216+
(Sub128Cout)
12131217
;; Multiplication
12141218
(Mul8x16)
12151219
(Mul16x8)
@@ -3872,6 +3876,31 @@
38723876
(intcc_as_cond (IntCC.SignedLessThan)))))
38733877
(select_bool_reg $I64 l_cond l_bound ub)))
38743878

3879+
;; Helpers for generating integer arithmetic instructions with overflow outputs ;;;
3880+
3881+
;; For fit-in-16 bit integers, we shift them into the most significant positions of their
3882+
;; 32-bit registers, use the codition codes for the overflow, and shift back into the
3883+
;; expected least-significant position to generate the result
3884+
(decl type_shift_up (Type) u8)
3885+
(rule (type_shift_up $I8) 24)
3886+
(rule (type_shift_up $I16) 16)
3887+
3888+
(decl overflow_and_result_from_shifted (Type ALUOp Reg Reg Cond) InstOutput)
3889+
(rule (overflow_and_result_from_shifted (fits_in_16 ty) op x y cond)
3890+
(let ((x_shifted Reg (lshl_imm $I32 x (type_shift_up ty)))
3891+
(y_shifted Reg (lshl_imm $I32 y (type_shift_up ty)))
3892+
(producer ProducesFlags (alu_rrr_with_flags_paired ty op x_shifted y_shifted))
3893+
(overflow Reg (lower_bool $I8 (bool (produces_flags_ignore producer) cond)))
3894+
(out Reg (lshr_imm $I32 (produces_flags_get_reg producer) (type_shift_up ty))))
3895+
(output_pair out overflow)))
3896+
3897+
;; Generate the paired overflow result from the generated condition codes
3898+
(decl overflow_and_result_from_producer (ProducesFlags Cond) InstOutput)
3899+
(rule (overflow_and_result_from_producer producer cond)
3900+
(output_pair
3901+
(produces_flags_get_reg producer)
3902+
(lower_bool $I8 (bool (produces_flags_ignore producer) cond))))
3903+
38753904

38763905
;; Helpers for generating `add` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
38773906

@@ -3892,6 +3921,10 @@
38923921
(decl add_reg (Type Reg Reg) Reg)
38933922
(rule (add_reg ty x y) (alu_rrr ty (aluop_add ty) x y))
38943923

3924+
(decl add_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
3925+
(rule (add_reg_with_flags_paired ty x y)
3926+
(alu_rrr_with_flags_paired ty (aluop_add ty) x y))
3927+
38953928
(decl add_reg_sext32 (Type Reg Reg) Reg)
38963929
(rule (add_reg_sext32 ty x y) (alu_rr ty (aluop_add_sext32 ty) x y))
38973930

@@ -3965,6 +3998,11 @@
39653998
(rule (add_logical_mem_zext32_with_flags_paired ty x y)
39663999
(alu_rx_with_flags_paired ty (aluop_add_logical_zext32 ty) x y))
39674000

4001+
(decl vecop_add_logical_cout (Type) VecBinaryOp)
4002+
(rule (vecop_add_logical_cout $I128) (VecBinaryOp.Add128Cout))
4003+
4004+
(decl vec_add_logical_cout (Type Reg Reg) Reg)
4005+
(rule (vec_add_logical_cout ty x y) (vec_rrr ty (vecop_add_logical_cout ty) x y))
39684006

39694007
;; Helpers for generating `sub` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
39704008

@@ -3985,6 +4023,10 @@
39854023
(decl sub_reg (Type Reg Reg) Reg)
39864024
(rule (sub_reg ty x y) (alu_rrr ty (aluop_sub ty) x y))
39874025

4026+
(decl sub_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
4027+
(rule (sub_reg_with_flags_paired ty x y)
4028+
(alu_rrr_with_flags_paired ty (aluop_sub ty) x y))
4029+
39884030
(decl sub_reg_sext32 (Type Reg Reg) Reg)
39894031
(rule (sub_reg_sext32 ty x y) (alu_rr ty (aluop_sub_sext32 ty) x y))
39904032

@@ -4020,6 +4062,10 @@
40204062
(decl sub_logical_reg (Type Reg Reg) Reg)
40214063
(rule (sub_logical_reg ty x y) (alu_rrr ty (aluop_sub_logical ty) x y))
40224064

4065+
(decl sub_logical_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
4066+
(rule (sub_logical_reg_with_flags_paired ty x y)
4067+
(alu_rrr_with_flags_paired ty (aluop_sub_logical ty) x y))
4068+
40234069
(decl sub_logical_reg_zext32 (Type Reg Reg) Reg)
40244070
(rule (sub_logical_reg_zext32 ty x y) (alu_rr ty (aluop_sub_logical_zext32 ty) x y))
40254071

@@ -4032,6 +4078,11 @@
40324078
(decl sub_logical_mem_zext32 (Type Reg MemArg) Reg)
40334079
(rule (sub_logical_mem_zext32 ty x y) (alu_rx ty (aluop_sub_logical ty) x y))
40344080

4081+
(decl vecop_sub_logical_cout (Type) VecBinaryOp)
4082+
(rule (vecop_sub_logical_cout $I128) (VecBinaryOp.Sub128Cout))
4083+
4084+
(decl vec_sub_logical_cout (Type Reg Reg) Reg)
4085+
(rule (vec_sub_logical_cout ty x y) (vec_rrr ty (vecop_sub_logical_cout ty) x y))
40354086

40364087
;; Helpers for generating `mul` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
40374088

@@ -4041,6 +4092,10 @@
40414092
(rule (aluop_mul $I32) (ALUOp.Mul32))
40424093
(rule (aluop_mul $I64) (ALUOp.Mul64))
40434094

4095+
(decl aluop_mul_cc (Type) ALUOp)
4096+
(rule (aluop_mul_cc $I32) (ALUOp.Mul32CC))
4097+
(rule (aluop_mul_cc $I64) (ALUOp.Mul64CC))
4098+
40444099
(decl aluop_mul_sext16 (Type) ALUOp)
40454100
(rule (aluop_mul_sext16 $I16) (ALUOp.Mul32Ext16))
40464101
(rule (aluop_mul_sext16 $I32) (ALUOp.Mul32Ext16))
@@ -4052,6 +4107,10 @@
40524107
(decl mul_reg (Type Reg Reg) Reg)
40534108
(rule (mul_reg ty x y) (alu_rrr ty (aluop_mul ty) x y))
40544109

4110+
(decl mul_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
4111+
(rule (mul_reg_with_flags_paired ty x y)
4112+
(alu_rrr_with_flags_paired ty (aluop_mul_cc ty) x y))
4113+
40554114
(decl mul_reg_sext32 (Type Reg Reg) Reg)
40564115
(rule (mul_reg_sext32 ty x y) (alu_rr ty (aluop_mul_sext32 ty) x y))
40574116

cranelift/codegen/src/isa/s390x/inst/emit.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1516,7 +1516,9 @@ impl Inst {
15161516
ALUOp::SubLogical32 => (0xb9fb, true), // SLRK
15171517
ALUOp::SubLogical64 => (0xb9eb, true), // SLGRK
15181518
ALUOp::Mul32 => (0xb9fd, true), // MSRKC
1519+
ALUOp::Mul32CC => (0xb9fd, false), // MSRKC
15191520
ALUOp::Mul64 => (0xb9ed, true), // MSGRKC
1521+
ALUOp::Mul64CC => (0xb9ed, false), // MSGRKC
15201522
ALUOp::And32 => (0xb9f4, true), // NRK
15211523
ALUOp::And64 => (0xb9e4, true), // NGRK
15221524
ALUOp::Orr32 => (0xb9f6, true), // ORK
@@ -2745,11 +2747,13 @@ impl Inst {
27452747
VecBinaryOp::Add32x4 => (0xe7f3, 2), // VAF
27462748
VecBinaryOp::Add64x2 => (0xe7f3, 3), // VAG
27472749
VecBinaryOp::Add128 => (0xe7f3, 4), // VAQ
2750+
VecBinaryOp::Add128Cout => (0xe7f1, 4), // VACCQ
27482751
VecBinaryOp::Sub8x16 => (0xe7f7, 0), // VSB
27492752
VecBinaryOp::Sub16x8 => (0xe7f7, 1), // VSH
27502753
VecBinaryOp::Sub32x4 => (0xe7f7, 2), // VSF
27512754
VecBinaryOp::Sub64x2 => (0xe7f7, 3), // VSG
27522755
VecBinaryOp::Sub128 => (0xe7f7, 4), // VSQ
2756+
VecBinaryOp::Sub128Cout => (0xe7f5, 4), // VSCBI
27532757
VecBinaryOp::Mul8x16 => (0xe7a2, 0), // VMLB
27542758
VecBinaryOp::Mul16x8 => (0xe7a2, 1), // VMLHW
27552759
VecBinaryOp::Mul32x4 => (0xe7a2, 2), // VMLF

cranelift/codegen/src/isa/s390x/inst/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,7 +1314,9 @@ impl Inst {
13141314
ALUOp::SubLogical32 => ("slrk", true),
13151315
ALUOp::SubLogical64 => ("slgrk", true),
13161316
ALUOp::Mul32 => ("msrkc", true),
1317+
ALUOp::Mul32CC => ("msrkc", false),
13171318
ALUOp::Mul64 => ("msgrkc", true),
1319+
ALUOp::Mul64CC => ("msgrkc", false),
13181320
ALUOp::And32 => ("nrk", true),
13191321
ALUOp::And64 => ("ngrk", true),
13201322
ALUOp::Orr32 => ("ork", true),
@@ -2529,11 +2531,13 @@ impl Inst {
25292531
VecBinaryOp::Add32x4 => "vaf",
25302532
VecBinaryOp::Add64x2 => "vag",
25312533
VecBinaryOp::Add128 => "vaq",
2534+
VecBinaryOp::Add128Cout => "vaccq",
25322535
VecBinaryOp::Sub8x16 => "vsb",
25332536
VecBinaryOp::Sub16x8 => "vsh",
25342537
VecBinaryOp::Sub32x4 => "vsf",
25352538
VecBinaryOp::Sub64x2 => "vsg",
25362539
VecBinaryOp::Sub128 => "vsq",
2540+
VecBinaryOp::Sub128Cout => "vscbiq",
25372541
VecBinaryOp::Mul8x16 => "vmlb",
25382542
VecBinaryOp::Mul16x8 => "vmlhw",
25392543
VecBinaryOp::Mul32x4 => "vmlf",

cranelift/codegen/src/isa/s390x/lower.isle

Lines changed: 110 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4326,12 +4326,116 @@
43264326

43274327
;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43284328

4329-
(rule 0 (lower (has_type (ty_32_or_64 ty) (uadd_overflow _ x y)))
4330-
(let ((sum Reg (add_reg ty x y))
4331-
(overflow Reg
4332-
(lower_bool $I8
4333-
(bool (icmpu_reg ty sum x) (intcc_as_cond (IntCC.UnsignedLessThan))))))
4334-
(output_pair sum overflow)))
4329+
(rule 1 (lower (uadd_overflow (fits_in_16 ty) x y))
4330+
(overflow_and_result_from_shifted ty (aluop_add_logical $I32) x y (mask_as_cond 3)))
4331+
4332+
(rule 0 (lower (uadd_overflow (ty_32_or_64 ty) x y))
4333+
(overflow_and_result_from_producer (add_logical_reg_with_flags_paired ty x y) (mask_as_cond 3)))
4334+
4335+
(rule 2 (lower (uadd_overflow $I128 x y))
4336+
(output_pair
4337+
(vec_add $I128 x y)
4338+
(vec_extract_lane $I64X2 (vec_add_logical_cout $I128 x y) 1 (zero_reg))))
4339+
4340+
;;;; Rules for `usub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4341+
4342+
;; Note: s390x stores and computes the borrow bit as a 0 when an overflow is present
4343+
;; so all of the conditions and computed borrows are inverted
4344+
4345+
(rule 1 (lower (usub_overflow (fits_in_16 ty) x y))
4346+
(overflow_and_result_from_shifted ty (aluop_sub_logical $I32) x y
4347+
(invert_cond (mask_as_cond 3))))
4348+
4349+
(rule 0 (lower (usub_overflow (ty_32_or_64 ty) x y))
4350+
(overflow_and_result_from_producer
4351+
(sub_logical_reg_with_flags_paired ty x y)
4352+
(invert_cond (mask_as_cond 3))))
4353+
4354+
(rule 2 (lower (usub_overflow $I128 x y))
4355+
(output_pair
4356+
(vec_sub $I128 x y)
4357+
(xor_uimm32shifted $I8
4358+
(vec_extract_lane $I64X2 (vec_sub_logical_cout $I128 x y) 1 (zero_reg))
4359+
(uimm32shifted 1 0))))
4360+
4361+
;;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4362+
4363+
(rule 1 (lower (sadd_overflow (fits_in_16 ty) x y))
4364+
(overflow_and_result_from_shifted ty (aluop_add $I32) x y (mask_as_cond 1)))
4365+
4366+
(rule 0 (lower (sadd_overflow (ty_32_or_64 ty) x y))
4367+
(overflow_and_result_from_producer (add_reg_with_flags_paired ty x y) (mask_as_cond 1)))
4368+
4369+
(rule 2 (lower (sadd_overflow $I128 x y))
4370+
(let ((res Reg (vec_add $I128 x y))
4371+
(res_hi Reg (vec_extract_lane $I64X2 res 0 (zero_reg)))
4372+
(x_hi Reg (vec_extract_lane $I64X2 x 0 (zero_reg)))
4373+
(y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
4374+
(of_in_sign Reg
4375+
(and_reg $I64
4376+
(xor_reg $I64 x_hi res_hi)
4377+
(xor_reg $I64 y_hi res_hi))))
4378+
(output_pair res (lshr_imm $I64 of_in_sign 63))))
4379+
4380+
;;;; Rules for `ssub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4381+
4382+
(rule 1 (lower (ssub_overflow (fits_in_16 ty) x y))
4383+
(overflow_and_result_from_shifted ty (aluop_sub $I32) x y (mask_as_cond 1)))
4384+
4385+
;; Use flags generated by the add instruction to handle overflow
4386+
(rule 0 (lower (ssub_overflow (ty_32_or_64 ty) x y))
4387+
(overflow_and_result_from_producer (sub_reg_with_flags_paired ty x y) (mask_as_cond 1)))
4388+
4389+
(rule 2 (lower (ssub_overflow $I128 x y))
4390+
(let ((res Reg (vec_sub $I128 x y))
4391+
(res_hi Reg (vec_extract_lane $I64X2 res 0 (zero_reg)))
4392+
(x_hi Reg (vec_extract_lane $I64X2 x 0 (zero_reg)))
4393+
(y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
4394+
(of_in_sign Reg
4395+
(and_reg $I64
4396+
(xor_reg $I64 x_hi res_hi)
4397+
(xor_reg $I64 x_hi y_hi))))
4398+
(output_pair res (lshr_imm $I64 of_in_sign 63))))
4399+
4400+
;;;; Rules for `umul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4401+
4402+
(rule 1 (lower (umul_overflow (fits_in_32 ty) x y))
4403+
(let ((result Reg (mul_reg $I64 (zext64_reg ty y) (zext64_reg ty x)))
4404+
(of Reg (lower_bool $I8 (invert_bool (bool
4405+
(icmps_simm16 $I32 (lshr_imm $I64 result (ty_bits ty)) 0)
4406+
(intcc_as_cond (IntCC.Equal)))))))
4407+
(output_pair result of)))
4408+
4409+
(rule 0 (lower (umul_overflow ty @ $I64 x y))
4410+
(let ((mul_out RegPair (umul_wide y x))
4411+
(result Reg (regpair_lo mul_out))
4412+
(of Reg (lower_bool $I8 (invert_bool (bool
4413+
(icmps_simm16 ty (regpair_hi mul_out) 0)
4414+
(intcc_as_cond (IntCC.Equal)))))))
4415+
(output_pair result of)))
4416+
4417+
;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4418+
4419+
;; For fit-in-16 bit integers, we shift just the rhs into the most
4420+
;; significant positions of its 32-bit register, use the codition
4421+
;; codes for the overflow, and shifting back into the expected
4422+
;; least-significant position to generate the result.
4423+
(rule 1 (lower (smul_overflow (fits_in_16 ty) x y))
4424+
(let ((y_ext Reg (sext32_reg ty y))
4425+
(x_shifted Reg (lshl_imm $I32 x (type_shift_up ty)))
4426+
(producer ProducesFlags
4427+
(mul_reg_with_flags_paired $I32 x_shifted y_ext))
4428+
(overflow Reg (lower_bool $I8 (bool
4429+
(produces_flags_ignore producer)
4430+
(mask_as_cond 1))))
4431+
(out Reg (lshr_imm $I32
4432+
(produces_flags_get_reg producer)
4433+
(type_shift_up ty))))
4434+
(output_pair out overflow)))
4435+
4436+
;; Use flags generated by the add instruction to handle overflow
4437+
(rule 0 (lower (smul_overflow (ty_32_or_64 ty) x y))
4438+
(overflow_and_result_from_producer (mul_reg_with_flags_paired ty x y) (mask_as_cond 1)))
43354439

43364440
;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
43374441

0 commit comments

Comments
 (0)