bytecodealliance
diff --git a/‎cranelift/codegen/src/isa/s390x/inst.isle‎
Lines changed: 59 additions & 0 deletions b/‎cranelift/codegen/src/isa/s390x/inst.isle‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎cranelift/codegen/src/isa/s390x/inst/emit.rs‎
Lines changed: 4 additions & 0 deletions b/‎cranelift/codegen/src/isa/s390x/inst/emit.rs‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎cranelift/codegen/src/isa/s390x/inst/mod.rs‎
Lines changed: 4 additions & 0 deletions b/‎cranelift/codegen/src/isa/s390x/inst/mod.rs‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎cranelift/codegen/src/isa/s390x/lower.isle‎
Lines changed: 110 additions & 6 deletions b/‎cranelift/codegen/src/isa/s390x/lower.isle‎
Lines changed: 110 additions & 6 deletions
@@ -1115,8 +1115,10 @@
     (SubLogical64)
     (SubLogical64Ext32)
     (Mul32)
+    (Mul32CC)
     (Mul32Ext16)
     (Mul64)
+    (Mul64CC)
     (Mul64Ext16)
     (Mul64Ext32)
     (And32)
@@ -1205,11 +1207,13 @@
     (Add32x4)
     (Add64x2)
     (Add128)
+    (Add128Cout)
     (Sub8x16)
     (Sub16x8)
     (Sub32x4)
     (Sub64x2)
     (Sub128)
+    (Sub128Cout)
     ;; Multiplication
     (Mul8x16)
     (Mul16x8)
@@ -3872,6 +3876,31 @@
                     (intcc_as_cond (IntCC.SignedLessThan)))))
         (select_bool_reg $I64 l_cond l_bound ub)))
 
+;; Helpers for generating integer arithmetic instructions with overflow outputs ;;;
+
+;; For fit-in-16 bit integers, we shift them into the most significant positions of their
+;; 32-bit registers, use the codition codes for the overflow, and shift back into the
+;; expected least-significant position to generate the result
+(decl type_shift_up (Type) u8)
+(rule (type_shift_up $I8) 24)
+(rule (type_shift_up $I16) 16)
+
+(decl overflow_and_result_from_shifted (Type ALUOp Reg Reg Cond) InstOutput)
+(rule (overflow_and_result_from_shifted (fits_in_16 ty) op x y cond)
+      (let ((x_shifted Reg (lshl_imm $I32 x (type_shift_up ty)))
+            (y_shifted Reg (lshl_imm $I32 y (type_shift_up ty)))
+            (producer ProducesFlags (alu_rrr_with_flags_paired ty op x_shifted y_shifted))
+            (overflow Reg (lower_bool $I8 (bool (produces_flags_ignore producer) cond)))
+            (out Reg (lshr_imm $I32 (produces_flags_get_reg producer) (type_shift_up ty))))
+        (output_pair out overflow)))
+
+;; Generate the paired overflow result from the generated condition codes
+(decl overflow_and_result_from_producer (ProducesFlags Cond) InstOutput)
+(rule (overflow_and_result_from_producer producer cond)
+      (output_pair
+        (produces_flags_get_reg producer)
+        (lower_bool $I8 (bool (produces_flags_ignore producer) cond))))
+
 
 ;; Helpers for generating `add` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -3892,6 +3921,10 @@
 (decl add_reg (Type Reg Reg) Reg)
 (rule (add_reg ty x y) (alu_rrr ty (aluop_add ty) x y))
 
+(decl add_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
+(rule (add_reg_with_flags_paired ty x y)
+      (alu_rrr_with_flags_paired ty (aluop_add ty) x y))
+
 (decl add_reg_sext32 (Type Reg Reg) Reg)
 (rule (add_reg_sext32 ty x y) (alu_rr ty (aluop_add_sext32 ty) x y))
 
@@ -3965,6 +3998,11 @@
 (rule (add_logical_mem_zext32_with_flags_paired ty x y)
       (alu_rx_with_flags_paired ty (aluop_add_logical_zext32 ty) x y))
 
+(decl vecop_add_logical_cout (Type) VecBinaryOp)
+(rule (vecop_add_logical_cout $I128) (VecBinaryOp.Add128Cout))
+
+(decl vec_add_logical_cout (Type Reg Reg) Reg)
+(rule (vec_add_logical_cout ty x y) (vec_rrr ty (vecop_add_logical_cout ty) x y))
 
 ;; Helpers for generating `sub` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -3985,6 +4023,10 @@
 (decl sub_reg (Type Reg Reg) Reg)
 (rule (sub_reg ty x y) (alu_rrr ty (aluop_sub ty) x y))
 
+(decl sub_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
+(rule (sub_reg_with_flags_paired ty x y)
+      (alu_rrr_with_flags_paired ty (aluop_sub ty) x y))
+
 (decl sub_reg_sext32 (Type Reg Reg) Reg)
 (rule (sub_reg_sext32 ty x y) (alu_rr ty (aluop_sub_sext32 ty) x y))
 
@@ -4020,6 +4062,10 @@
 (decl sub_logical_reg (Type Reg Reg) Reg)
 (rule (sub_logical_reg ty x y) (alu_rrr ty (aluop_sub_logical ty) x y))
 
+(decl sub_logical_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
+(rule (sub_logical_reg_with_flags_paired ty x y)
+      (alu_rrr_with_flags_paired ty (aluop_sub_logical ty) x y))
+
 (decl sub_logical_reg_zext32 (Type Reg Reg) Reg)
 (rule (sub_logical_reg_zext32 ty x y) (alu_rr ty (aluop_sub_logical_zext32 ty) x y))
 
@@ -4032,6 +4078,11 @@
 (decl sub_logical_mem_zext32 (Type Reg MemArg) Reg)
 (rule (sub_logical_mem_zext32 ty x y) (alu_rx ty (aluop_sub_logical ty) x y))
 
+(decl vecop_sub_logical_cout (Type) VecBinaryOp)
+(rule (vecop_sub_logical_cout $I128) (VecBinaryOp.Sub128Cout))
+
+(decl vec_sub_logical_cout (Type Reg Reg) Reg)
+(rule (vec_sub_logical_cout ty x y) (vec_rrr ty (vecop_sub_logical_cout ty) x y))
 
 ;; Helpers for generating `mul` instructions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
@@ -4041,6 +4092,10 @@
 (rule (aluop_mul $I32) (ALUOp.Mul32))
 (rule (aluop_mul $I64) (ALUOp.Mul64))
 
+(decl aluop_mul_cc (Type) ALUOp)
+(rule (aluop_mul_cc $I32) (ALUOp.Mul32CC))
+(rule (aluop_mul_cc $I64) (ALUOp.Mul64CC))
+
 (decl aluop_mul_sext16 (Type) ALUOp)
 (rule (aluop_mul_sext16 $I16) (ALUOp.Mul32Ext16))
 (rule (aluop_mul_sext16 $I32) (ALUOp.Mul32Ext16))
@@ -4052,6 +4107,10 @@
 (decl mul_reg (Type Reg Reg) Reg)
 (rule (mul_reg ty x y) (alu_rrr ty (aluop_mul ty) x y))
 
+(decl mul_reg_with_flags_paired (Type Reg Reg) ProducesFlags)
+(rule (mul_reg_with_flags_paired ty x y)
+     (alu_rrr_with_flags_paired ty (aluop_mul_cc ty) x y))
+
 (decl mul_reg_sext32 (Type Reg Reg) Reg)
 (rule (mul_reg_sext32 ty x y) (alu_rr ty (aluop_mul_sext32 ty) x y))
 
 
@@ -1516,7 +1516,9 @@ impl Inst {
                     ALUOp::SubLogical32 => (0xb9fb, true), // SLRK
                     ALUOp::SubLogical64 => (0xb9eb, true), // SLGRK
                     ALUOp::Mul32 => (0xb9fd, true),        // MSRKC
+                    ALUOp::Mul32CC => (0xb9fd, false),     // MSRKC
                     ALUOp::Mul64 => (0xb9ed, true),        // MSGRKC
+                    ALUOp::Mul64CC => (0xb9ed, false),     // MSGRKC
                     ALUOp::And32 => (0xb9f4, true),        // NRK
                     ALUOp::And64 => (0xb9e4, true),        // NGRK
                     ALUOp::Orr32 => (0xb9f6, true),        // ORK
@@ -2745,11 +2747,13 @@ impl Inst {
                     VecBinaryOp::Add32x4 => (0xe7f3, 2),       // VAF
                     VecBinaryOp::Add64x2 => (0xe7f3, 3),       // VAG
                     VecBinaryOp::Add128 => (0xe7f3, 4),        // VAQ
+                    VecBinaryOp::Add128Cout => (0xe7f1, 4),    // VACCQ
                     VecBinaryOp::Sub8x16 => (0xe7f7, 0),       // VSB
                     VecBinaryOp::Sub16x8 => (0xe7f7, 1),       // VSH
                     VecBinaryOp::Sub32x4 => (0xe7f7, 2),       // VSF
                     VecBinaryOp::Sub64x2 => (0xe7f7, 3),       // VSG
                     VecBinaryOp::Sub128 => (0xe7f7, 4),        // VSQ
+                    VecBinaryOp::Sub128Cout => (0xe7f5, 4),    // VSCBI
                     VecBinaryOp::Mul8x16 => (0xe7a2, 0),       // VMLB
                     VecBinaryOp::Mul16x8 => (0xe7a2, 1),       // VMLHW
                     VecBinaryOp::Mul32x4 => (0xe7a2, 2),       // VMLF
 
@@ -1314,7 +1314,9 @@ impl Inst {
                     ALUOp::SubLogical32 => ("slrk", true),
                     ALUOp::SubLogical64 => ("slgrk", true),
                     ALUOp::Mul32 => ("msrkc", true),
+                    ALUOp::Mul32CC => ("msrkc", false),
                     ALUOp::Mul64 => ("msgrkc", true),
+                    ALUOp::Mul64CC => ("msgrkc", false),
                     ALUOp::And32 => ("nrk", true),
                     ALUOp::And64 => ("ngrk", true),
                     ALUOp::Orr32 => ("ork", true),
@@ -2529,11 +2531,13 @@ impl Inst {
                     VecBinaryOp::Add32x4 => "vaf",
                     VecBinaryOp::Add64x2 => "vag",
                     VecBinaryOp::Add128 => "vaq",
+                    VecBinaryOp::Add128Cout => "vaccq",
                     VecBinaryOp::Sub8x16 => "vsb",
                     VecBinaryOp::Sub16x8 => "vsh",
                     VecBinaryOp::Sub32x4 => "vsf",
                     VecBinaryOp::Sub64x2 => "vsg",
                     VecBinaryOp::Sub128 => "vsq",
+                    VecBinaryOp::Sub128Cout => "vscbiq",
                     VecBinaryOp::Mul8x16 => "vmlb",
                     VecBinaryOp::Mul16x8 => "vmlhw",
                     VecBinaryOp::Mul32x4 => "vmlf",
 
@@ -4326,12 +4326,116 @@
 
 ;;;; Rules for `uadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule 0 (lower (has_type (ty_32_or_64 ty) (uadd_overflow _ x y)))
-      (let ((sum Reg (add_reg ty x y))
-            (overflow Reg
-              (lower_bool $I8
-                          (bool (icmpu_reg ty sum x) (intcc_as_cond (IntCC.UnsignedLessThan))))))
-        (output_pair sum overflow)))
+(rule 1 (lower (uadd_overflow (fits_in_16 ty) x y))
+    (overflow_and_result_from_shifted ty (aluop_add_logical $I32) x y (mask_as_cond 3)))
+
+(rule 0 (lower (uadd_overflow (ty_32_or_64 ty) x y))
+      (overflow_and_result_from_producer (add_logical_reg_with_flags_paired ty x y) (mask_as_cond 3)))
+
+(rule 2 (lower (uadd_overflow $I128 x y))
+      (output_pair
+        (vec_add $I128 x y)
+        (vec_extract_lane $I64X2 (vec_add_logical_cout $I128 x y) 1 (zero_reg))))
+
+;;;; Rules for `usub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; Note: s390x stores and computes the borrow bit as a 0 when an overflow is present
+;; so all of the conditions and computed borrows are inverted
+
+(rule 1 (lower (usub_overflow (fits_in_16 ty) x y))
+      (overflow_and_result_from_shifted ty (aluop_sub_logical $I32) x y
+        (invert_cond (mask_as_cond 3))))
+
+(rule 0 (lower (usub_overflow (ty_32_or_64 ty) x y))
+      (overflow_and_result_from_producer
+        (sub_logical_reg_with_flags_paired ty x y)
+        (invert_cond (mask_as_cond 3))))
+
+(rule 2 (lower (usub_overflow $I128 x y))
+      (output_pair
+        (vec_sub $I128 x y)
+        (xor_uimm32shifted $I8
+          (vec_extract_lane $I64X2 (vec_sub_logical_cout $I128 x y) 1 (zero_reg))
+          (uimm32shifted 1 0))))
+
+;;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 1 (lower (sadd_overflow (fits_in_16 ty) x y))
+      (overflow_and_result_from_shifted ty (aluop_add $I32) x y (mask_as_cond 1)))
+
+(rule 0 (lower (sadd_overflow (ty_32_or_64 ty) x y))
+      (overflow_and_result_from_producer (add_reg_with_flags_paired ty x y) (mask_as_cond 1)))
+
+(rule 2 (lower (sadd_overflow $I128 x y))
+      (let ((res Reg (vec_add $I128 x y))
+            (res_hi Reg (vec_extract_lane $I64X2 res 0 (zero_reg)))
+            (x_hi Reg (vec_extract_lane $I64X2 x 0 (zero_reg)))
+            (y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
+            (of_in_sign Reg
+               (and_reg $I64
+                 (xor_reg $I64 x_hi res_hi)
+                 (xor_reg $I64 y_hi res_hi))))
+        (output_pair res (lshr_imm $I64 of_in_sign 63))))
+
+;;;; Rules for `ssub_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 1 (lower (ssub_overflow (fits_in_16 ty) x y))
+      (overflow_and_result_from_shifted ty (aluop_sub $I32) x y (mask_as_cond 1)))
+
+;; Use flags generated by the add instruction to handle overflow
+(rule 0 (lower (ssub_overflow (ty_32_or_64 ty) x y))
+      (overflow_and_result_from_producer (sub_reg_with_flags_paired ty x y) (mask_as_cond 1)))
+
+(rule 2 (lower (ssub_overflow $I128 x y))
+      (let ((res Reg (vec_sub $I128 x y))
+            (res_hi Reg (vec_extract_lane $I64X2 res 0 (zero_reg)))
+            (x_hi Reg (vec_extract_lane $I64X2 x 0 (zero_reg)))
+            (y_hi Reg (vec_extract_lane $I64X2 y 0 (zero_reg)))
+            (of_in_sign Reg
+               (and_reg $I64
+                 (xor_reg $I64 x_hi res_hi)
+                 (xor_reg $I64 x_hi y_hi))))
+        (output_pair res (lshr_imm $I64 of_in_sign 63))))
+
+;;;; Rules for `umul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule 1 (lower (umul_overflow (fits_in_32 ty) x y))
+      (let ((result Reg (mul_reg $I64 (zext64_reg ty y) (zext64_reg ty x)))
+            (of Reg (lower_bool $I8 (invert_bool (bool
+              (icmps_simm16 $I32 (lshr_imm $I64 result (ty_bits ty)) 0)
+              (intcc_as_cond (IntCC.Equal)))))))
+        (output_pair result of)))
+
+(rule 0 (lower (umul_overflow ty @ $I64 x y))
+      (let ((mul_out RegPair (umul_wide y x))
+            (result Reg (regpair_lo mul_out))
+            (of Reg (lower_bool $I8 (invert_bool (bool
+              (icmps_simm16 ty (regpair_hi mul_out) 0)
+              (intcc_as_cond (IntCC.Equal)))))))
+        (output_pair result of)))
+
+;;;; Rules for `smul_overflow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; For fit-in-16 bit integers, we shift just the rhs into the most
+;; significant positions of its 32-bit register, use the codition
+;; codes for the overflow, and shifting back into the expected
+;; least-significant position to generate the result.
+(rule 1 (lower (smul_overflow (fits_in_16 ty) x y))
+      (let ((y_ext Reg (sext32_reg ty y))
+            (x_shifted Reg (lshl_imm $I32 x (type_shift_up ty)))
+            (producer ProducesFlags
+              (mul_reg_with_flags_paired $I32 x_shifted y_ext))
+            (overflow Reg (lower_bool $I8 (bool
+              (produces_flags_ignore producer)
+              (mask_as_cond 1))))
+            (out Reg (lshr_imm $I32
+              (produces_flags_get_reg producer)
+              (type_shift_up ty))))
+        (output_pair out overflow)))
+
+;; Use flags generated by the add instruction to handle overflow
+(rule 0 (lower (smul_overflow (ty_32_or_64 ty) x y))
+      (overflow_and_result_from_producer (mul_reg_with_flags_paired ty x y) (mask_as_cond 1)))
 
 ;;;; Rules for `return` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;