RISC-V: Implement IFN SAT_ADD for both the scalar and vector

The patch implement the SAT_ADD in the riscv backend as the sample for both the scalar and vector. Given below vector as example: void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n) { unsigned i; for (i = 0; i < n; i++) out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i])); } Before this patch: vec_sat_add_u64: ... vsetvli a5,a3,e64,m1,ta,ma vle64.v v0,0(a1) vle64.v v1,0(a2) slli a4,a5,3 sub a3,a3,a5 add a1,a1,a4 add a2,a2,a4 vadd.vv v1,v0,v1 vmsgtu.vv v0,v0,v1 vmerge.vim v1,v1,-1,v0 vse64.v v1,0(a0) ... After this patch: vec_sat_add_u64: ... vsetvli a5,a3,e64,m1,ta,ma vle64.v v1,0(a1) vle64.v v2,0(a2) slli a4,a5,3 sub a3,a3,a5 add a1,a1,a4 add a2,a2,a4 vsaddu.vv v1,v1,v2 <= Vector Single-Width Saturating Add vse64.v v1,0(a0) ... The below test suites are passed for this patch. * The riscv fully regression tests. * The aarch64 fully regression tests. * The x86 bootstrap tests. * The x86 fully regression tests. PR target/51492 PR target/112600 gcc/ChangeLog: * config/riscv/autovec.md (usadd<mode>3): New pattern expand for the unsigned SAT_ADD in vector mode. * config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl to expand usadd<mode>3 pattern. (expand_vec_usadd): Ditto but for vector. * config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit the vsadd insn. (expand_vec_usadd): New func impl to expand usadd<mode>3 for vector. * config/riscv/riscv.cc (riscv_expand_usadd): New func impl to expand usadd<mode>3 for scalar. * config/riscv/riscv.md (usadd<mode>3): New pattern expand for the unsigned SAT_ADD in scalar mode. * config/riscv/vector.md: Allow VLS mode for vsaddu. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New test. * gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New test. * gcc.target/riscv/sat_arith.h: New test. * gcc.target/riscv/sat_u_add-1.c: New test. * gcc.target/riscv/sat_u_add-2.c: New test. * gcc.target/riscv/sat_u_add-3.c: New test. * gcc.target/riscv/sat_u_add-4.c: New test. * gcc.target/riscv/sat_u_add-run-1.c: New test. * gcc.target/riscv/sat_u_add-run-2.c: New test. * gcc.target/riscv/sat_u_add-run-3.c: New test. * gcc.target/riscv/sat_u_add-run-4.c: New test. * gcc.target/riscv/scalar_sat_binary.h: New test. Signed-off-by: Pan Li <pan2.li@intel.com>
2024-05-17 18:49:46 +08:00 · 2024-05-17 18:49:46 +08:00 · 34ed2b4593
commit 34ed2b4593
parent 5812e1bbb1
25 changed files with 755 additions and 6 deletions
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@ -2613,6 +2613,23 @@
  }
 )

+;; =========================================================================
+;; == [INT] Saturation ALU.
+;; =========================================================================
+;; Includes:
+;; - add
+;; =========================================================================
+(define_expand "usadd<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], <MODE>mode);
+    DONE;
+  }
+)
+
 ;; =========================================================================
 ;; == Early break auto-vectorization patterns
 ;; =========================================================================
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@ -133,6 +133,7 @@ extern void riscv_asm_output_external (FILE *, const tree, const char *);
 extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
+extern void riscv_expand_usadd (rtx, rtx, rtx);

 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@ -633,6 +634,7 @@ void expand_vec_lrint (rtx, rtx, machine_mode, machine_mode, machine_mode);
 void expand_vec_lround (rtx, rtx, machine_mode, machine_mode, machine_mode);
 void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx), enum avl_type);
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@ -4635,6 +4635,16 @@ emit_vec_cvt_x_f_rtz (rtx op_dest, rtx op_src, rtx mask,
    }
 }

+static void
+emit_vec_saddu (rtx op_dest, rtx op_1, rtx op_2, insn_type type,
+		machine_mode vec_mode)
+{
+  rtx ops[] = {op_dest, op_1, op_2};
+  insn_code icode = code_for_pred (US_PLUS, vec_mode);
+
+  emit_vlmax_insn (icode, type, ops);
+}
+
 void
 expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 		 machine_mode vec_int_mode)
@ -4862,6 +4872,15 @@ expand_vec_lfloor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
 				vec_int_mode);
 }

+/* Expand the standard name usadd<mode>3 for vector mode,  we can leverage
+   the vector fixed point vector single-width saturating add directly.  */
+
+void
+expand_vec_usadd (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
+{
+  emit_vec_saddu (op_0, op_1, op_2, BINARY_OP, vec_mode);
+}
+
 /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
   well.  */
 void
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@ -11295,6 +11295,61 @@ riscv_get_raw_result_mode (int regno)
  return default_get_reg_raw_mode (regno);
 }

+/* Implements the unsigned saturation add standard name usadd for int mode.
+
+   z = SAT_ADD(x, y).
+   =>
+   1. sum = x + y.
+   2. sum = truncate (sum) for QI and HI only.
+   3. lt = sum < x.
+   4. lt = -lt.
+   5. z = sum | lt.  */
+
+void
+riscv_expand_usadd (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  rtx xmode_sum = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_x = gen_lowpart (Xmode, x);
+  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+
+  /* Step-1: sum = x + y  */
+  if (mode == SImode && mode != Xmode)
+    { /* Take addw to avoid the sum truncate.  */
+      rtx simode_sum = gen_reg_rtx (SImode);
+      riscv_emit_binary (PLUS, simode_sum, x, y);
+      emit_move_insn (xmode_sum, gen_lowpart (Xmode, simode_sum));
+    }
+  else
+    riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
+
+  /* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI.  */
+  if (mode == HImode || mode == QImode)
+    {
+      int shift_bits = GET_MODE_BITSIZE (Xmode)
+	- GET_MODE_BITSIZE (mode).to_constant ();
+
+      gcc_assert (shift_bits > 0);
+
+      riscv_emit_binary (ASHIFT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
+      riscv_emit_binary (LSHIFTRT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
+    }
+
+  /* Step-2: lt = sum < x  */
+  riscv_emit_binary (LTU, xmode_lt, xmode_sum, xmode_x);
+
+  /* Step-3: lt = -lt  */
+  riscv_emit_unary (NEG, xmode_lt, xmode_lt);
+
+  /* Step-4: xmode_dest = sum | lt  */
+  riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_sum);
+
+  /* Step-5: dest = xmode_dest */
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@ -4160,6 +4160,17 @@
  "{ operands[6] = gen_lowpart (SImode, operands[5]); }"
  [(set_attr "type" "arith")])

+(define_expand "usadd<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "register_operand")
+   (match_operand:ANYI 2 "register_operand")]
+  ""
+  {
+    riscv_expand_usadd (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (include "bitmanip.md")
 (include "crypto.md")
 (include "sync.md")
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@ -4062,8 +4062,8 @@

 ;; Saturating Add and Subtract
 (define_insn "@pred_<optab><mode>"
-  [(set (match_operand:VI 0 "register_operand"           "=vd, vd, vr, vr, vd, vd, vr, vr")
-	(if_then_else:VI
+  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vd, vr, vr, vd, vd, vr, vr")
+	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1, vm, vm,Wc1,Wc1")
 	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK, rK, rK, rK, rK")
@ -4072,10 +4072,10 @@
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i,  i,  i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (any_sat_int_binop:VI
-	    (match_operand:VI 3 "<binop_rhs1_predicate>" " vr, vr, vr, vr, vr, vr, vr, vr")
-	    (match_operand:VI 4 "<binop_rhs2_predicate>" "<binop_rhs2_constraint>"))
-	  (match_operand:VI 2 "vector_merge_operand"     " vu,  0, vu,  0, vu,  0, vu,  0")))]
+	  (any_sat_int_binop:V_VLSI
+	    (match_operand:V_VLSI 3 "<binop_rhs1_predicate>" " vr, vr, vr, vr, vr, vr, vr, vr")
+	    (match_operand:V_VLSI 4 "<binop_rhs2_predicate>" "<binop_rhs2_constraint>"))
+	  (match_operand:V_VLSI 2 "vector_merge_operand"     " vu,  0, vu,  0, vu,  0, vu,  0")))]
  "TARGET_VECTOR"
  "@
   v<insn>.vv\t%0,%3,%4%p1
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h
@ -0,0 +1,33 @@
+#ifndef HAVE_DEFINED_VEC_SAT_BINARY
+#define HAVE_DEFINED_VEC_SAT_BINARY
+
+/* To leverage this header files for run test, you need to:
+   1. define T as the type, for example uint8_t,
+   2. defint N as the test array size, for example 16.
+   3. define RUN_VEC_SAT_BINARY as run function.
+   4. prepare the test_data for test cases.
+ */
+
+int
+main ()
+{
+  unsigned i, k;
+  T out[N];
+
+  for (i = 0; i < sizeof (test_data) / sizeof (test_data[0]); i++)
+    {
+      T *op_1 = test_data[i][0];
+      T *op_2 = test_data[i][1];
+      T *expect = test_data[i][2];
+
+      RUN_VEC_SAT_BINARY (T, out, op_1, op_2, N);
+
+      for (k = 0; k < N; k++)
+	if (out[k] != expect[k])
+	  __builtin_abort ();
+    }
+
+  return 0;
+}
+
+#endif
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c
@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint8_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c
@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint16_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
+** ...
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c
@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint32_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
+** ...
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c
@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-skip-if "" { *-*-* } { "-flto" } } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "../../../sat_arith.h"
+
+/*
+** vec_sat_u_add_uint64_t_fmt_1:
+** ...
+** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
+** ...
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
+** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
+** ...
+*/
+DEF_VEC_SAT_U_ADD_FMT_1(uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c
@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T                  uint8_t
+#define N                  16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+  {
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_0 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_1 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* expect */
+  },
+  {
+    {
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+    },
+    {
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+    },
+    {
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+    },
+  },
+  {
+    {
+	0,   0,   1,   0,
+	1,   2,   3,   0,
+	1,   2,   3,   4,
+	5, 254, 255,   9,
+    },
+    {
+	0,   1,   1, 254,
+      254, 254, 254, 255,
+      255, 255, 255, 255,
+      255, 255, 255,   9,
+    },
+    {
+	0,   1,   2, 254,
+      255, 255, 255, 255,
+      255, 255, 255, 255,
+      255, 255, 255,  18,
+    },
+  },
+};
+
+#include "vec_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c
@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T                  uint16_t
+#define N                  16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+  {
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_0 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_1 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* expect */
+  },
+  {
+    {
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+    },
+    {
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+    },
+    {
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+    },
+  },
+  {
+    {
+	  0,     0,     1,     0,
+	  1,     2,     3,     0,
+	  1,     2,     3,     4,
+	  5, 65534, 65535,     9,
+    },
+    {
+	  0,     1,     1, 65534,
+      65534, 65534, 65534, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535,     9,
+    },
+    {
+	  0,     1,     2, 65534,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535, 65535,
+      65535, 65535, 65535,    18,
+    },
+  },
+};
+
+#include "vec_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c
@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T                  uint32_t
+#define N                  16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+  {
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_0 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_1 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* expect */
+  },
+  {
+    {
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+    },
+    {
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+    },
+    {
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+    },
+  },
+  {
+    {
+	       0,          0,          1,          0,
+	       1,          2,          3,          0,
+	       1,          2,          3,          4,
+	       5, 4294967294, 4294967295,          9,
+    },
+    {
+	       0,          1,          1, 4294967294,
+      4294967294, 4294967294, 4294967294, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295,          9,
+    },
+    {
+	       0,          1,          2, 4294967294,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295, 4294967295,
+      4294967295, 4294967295, 4294967295,         18,
+    },
+  },
+};
+
+#include "vec_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c
@ -0,0 +1,75 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "../../../sat_arith.h"
+
+#define T                  uint64_t
+#define N                  16
+#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
+
+DEF_VEC_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3][N] = {
+  {
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_0 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* arg_1 */
+    {
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+      0, 0, 0, 0,
+    }, /* expect */
+  },
+  {
+    {
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+    },
+    {
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+    },
+    {
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+    },
+  },
+  {
+    {
+			  0,                     0,                     1,                     0,
+			  1,                     2,                     3,                     0,
+			  1,                     2,                     3,                     4,
+			  5, 18446744073709551614u, 18446744073709551615u,                     9,
+    },
+    {
+			  0,                     1,                     1, 18446744073709551614u,
+      18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u,                     9,
+    },
+    {
+			  0,                     1,                     2, 18446744073709551614u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
+      18446744073709551615u, 18446744073709551615u, 18446744073709551615u,                    18,
+    },
+  },
+};
+
+#include "vec_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/sat_arith.h
+++ b/gcc/testsuite/gcc.target/riscv/sat_arith.h
@ -0,0 +1,31 @@
+#ifndef HAVE_SAT_ARITH
+#define HAVE_SAT_ARITH
+
+#include <stdint-gcc.h>
+
+#define DEF_SAT_U_ADD_FMT_1(T)             \
+T __attribute__((noinline))                \
+sat_u_add_##T##_fmt_1 (T x, T y)           \
+{                                          \
+  return (x + y) | (-(T)((T)(x + y) < x)); \
+}
+
+#define DEF_VEC_SAT_U_ADD_FMT_1(T)                                   \
+void __attribute__((noinline))                                       \
+vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
+{                                                                    \
+  unsigned i;                                                        \
+  for (i = 0; i < limit; i++)                                        \
+    {                                                                \
+      T x = op_1[i];                                                 \
+      T y = op_2[i];                                                 \
+      out[i] = (x + y) | (-(T)((T)(x + y) < x));                     \
+    }                                                                \
+}
+
+#define RUN_SAT_U_ADD_FMT_1(T, x, y) sat_u_add_##T##_fmt_1(x, y)
+
+#define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
+  vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
+
+#endif
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-1.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-1.c
@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint8_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** andi\s+a0,\s*a0,\s*0xff
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint8_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-2.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-2.c
@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint16_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** slli\s+a0,\s*a0,\s*48
+** srli\s+a0,\s*a0,\s*48
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint16_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-3.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-3.c
@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint32_t_fmt_1:
+** addw\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** sext.w\s+a0,\s*a0
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint32_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-4.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-4.c
@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include "sat_arith.h"
+
+/*
+** sat_u_add_uint64_t_fmt_1:
+** add\s+[atx][0-9]+,\s*a0,\s*a1
+** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
+** neg\s+[atx][0-9]+,\s*[atx][0-9]+
+** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
+** ret
+*/
+DEF_SAT_U_ADD_FMT_1(uint64_t)
+
+/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-run-1.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-run-1.c
@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T              uint8_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+  /* arg_0, arg_1, expect */
+  {      0,     0,      0, },
+  {      0,     1,      1, },
+  {      1,     1,      2, },
+  {      0,   254,    254, },
+  {      1,   254,    255, },
+  {      2,   254,    255, },
+  {      0,   255,    255, },
+  {      1,   255,    255, },
+  {      2,   255,    255, },
+  {    255,   255,    255, },
+};
+
+#include "scalar_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-run-2.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-run-2.c
@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T              uint16_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+  /* arg_0, arg_1, expect */
+  {      0,     0,      0, },
+  {      0,     1,      1, },
+  {      1,     1,      2, },
+  {      0, 65534,  65534, },
+  {      1, 65534,  65535, },
+  {      2, 65534,  65535, },
+  {      0, 65535,  65535, },
+  {      1, 65535,  65535, },
+  {      2, 65535,  65535, },
+  {  65535, 65535,  65535, },
+};
+
+#include "scalar_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-run-3.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-run-3.c
@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T              uint32_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+  /*     arg_0,      arg_1,      expect */
+  {          0,          0,           0, },
+  {          0,          1,           1, },
+  {          1,          1,           2, },
+  {          0, 4294967294,  4294967294, },
+  {          1, 4294967294,  4294967295, },
+  {          2, 4294967294,  4294967295, },
+  {          0, 4294967295,  4294967295, },
+  {          1, 4294967295,  4294967295, },
+  {          2, 4294967295,  4294967295, },
+  { 4294967295, 4294967295,  4294967295, },
+};
+
+#include "scalar_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/sat_u_add-run-4.c
+++ b/gcc/testsuite/gcc.target/riscv/sat_u_add-run-4.c
@ -0,0 +1,25 @@
+/* { dg-do run { target { riscv_v } } } */
+/* { dg-additional-options "-std=c99" } */
+
+#include "sat_arith.h"
+
+#define T              uint64_t
+#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
+
+DEF_SAT_U_ADD_FMT_1(T)
+
+T test_data[][3] = {
+  /*                arg_0,                 arg_1,                 expect */
+  {                     0,                     0,                      0, },
+  {                     0,                     1,                      1, },
+  {                     1,                     1,                      2, },
+  {                     0, 18446744073709551614u,  18446744073709551614u, },
+  {                     1, 18446744073709551614u,  18446744073709551615u, },
+  {                     2, 18446744073709551614u,  18446744073709551615u, },
+  {                     0, 18446744073709551615u,  18446744073709551615u, },
+  {                     1, 18446744073709551615u,  18446744073709551615u, },
+  {                     2, 18446744073709551615u,  18446744073709551615u, },
+  { 18446744073709551615u, 18446744073709551615u,  18446744073709551615u, },
+};
+
+#include "scalar_sat_binary.h"
--- a/gcc/testsuite/gcc.target/riscv/scalar_sat_binary.h
+++ b/gcc/testsuite/gcc.target/riscv/scalar_sat_binary.h
@ -0,0 +1,27 @@
+#ifndef HAVE_DEFINED_SCALAR_SAT_BINARY
+#define HAVE_DEFINED_SCALAR_SAT_BINARY
+
+/* To leverage this header files for run test, you need to:
+   1. define T as the type, for example uint8_t,
+   2. define RUN_SAT_BINARY as run function.
+   3. prepare the test_data for test cases.
+ */
+
+int
+main ()
+{
+  unsigned i;
+  T *d;
+
+  for (i = 0; i < sizeof (test_data) / sizeof (test_data[0]); i++)
+    {
+      d = test_data[i];
+
+      if (RUN_SAT_BINARY (T, d[0], d[1]) != d[2])
+	__builtin_abort ();
+    }
+
+  return 0;
+}
+
+#endif