RISC-V: Implement IFN SAT_ADD for both the scalar and vector

The patch implement the SAT_ADD in the riscv backend as the
sample for both the scalar and vector.  Given below vector
as example:

void vec_sat_add_u64 (uint64_t *out, uint64_t *x, uint64_t *y, unsigned n)
{
  unsigned i;

  for (i = 0; i < n; i++)
    out[i] = (x[i] + y[i]) | (- (uint64_t)((uint64_t)(x[i] + y[i]) < x[i]));
}

Before this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v0,0(a1)
  vle64.v v1,0(a2)
  slli    a4,a5,3
  sub     a3,a3,a5
  add     a1,a1,a4
  add     a2,a2,a4
  vadd.vv v1,v0,v1
  vmsgtu.vv       v0,v0,v1
  vmerge.vim      v1,v1,-1,v0
  vse64.v v1,0(a0)
  ...

After this patch:
vec_sat_add_u64:
  ...
  vsetvli a5,a3,e64,m1,ta,ma
  vle64.v v1,0(a1)
  vle64.v v2,0(a2)
  slli    a4,a5,3
  sub     a3,a3,a5
  add     a1,a1,a4
  add     a2,a2,a4
  vsaddu.vv       v1,v1,v2  <=  Vector Single-Width Saturating Add
  vse64.v v1,0(a0)
  ...

The below test suites are passed for this patch.
* The riscv fully regression tests.
* The aarch64 fully regression tests.
* The x86 bootstrap tests.
* The x86 fully regression tests.

	PR target/51492
	PR target/112600

gcc/ChangeLog:

	* config/riscv/autovec.md (usadd<mode>3): New pattern expand for
	the unsigned SAT_ADD in vector mode.
	* config/riscv/riscv-protos.h (riscv_expand_usadd): New func decl
	to expand usadd<mode>3 pattern.
	(expand_vec_usadd): Ditto but for vector.
	* config/riscv/riscv-v.cc (emit_vec_saddu): New func impl to emit
	the vsadd insn.
	(expand_vec_usadd): New func impl to expand usadd<mode>3 for vector.
	* config/riscv/riscv.cc (riscv_expand_usadd): New func impl to
	expand usadd<mode>3 for scalar.
	* config/riscv/riscv.md (usadd<mode>3): New pattern expand for
	the unsigned SAT_ADD in scalar mode.
	* config/riscv/vector.md: Allow VLS mode for vsaddu.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/binop/vec_sat_binary.h: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-1.c: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-2.c: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-3.c: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-4.c: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-2.c: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-3.c: New test.
	* gcc.target/riscv/rvv/autovec/binop/vec_sat_u_add-run-4.c: New test.
	* gcc.target/riscv/sat_arith.h: New test.
	* gcc.target/riscv/sat_u_add-1.c: New test.
	* gcc.target/riscv/sat_u_add-2.c: New test.
	* gcc.target/riscv/sat_u_add-3.c: New test.
	* gcc.target/riscv/sat_u_add-4.c: New test.
	* gcc.target/riscv/sat_u_add-run-1.c: New test.
	* gcc.target/riscv/sat_u_add-run-2.c: New test.
	* gcc.target/riscv/sat_u_add-run-3.c: New test.
	* gcc.target/riscv/sat_u_add-run-4.c: New test.
	* gcc.target/riscv/scalar_sat_binary.h: New test.

Signed-off-by: Pan Li <pan2.li@intel.com>
This commit is contained in:
Pan Li 2024-05-17 18:49:46 +08:00
parent 5812e1bbb1
commit 34ed2b4593
25 changed files with 755 additions and 6 deletions

View file

@ -2613,6 +2613,23 @@
}
)
;; =========================================================================
;; == [INT] Saturation ALU.
;; =========================================================================
;; Includes:
;; - add
;; =========================================================================
(define_expand "usadd<mode>3"
[(match_operand:V_VLSI 0 "register_operand")
(match_operand:V_VLSI 1 "register_operand")
(match_operand:V_VLSI 2 "register_operand")]
"TARGET_VECTOR"
{
riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], <MODE>mode);
DONE;
}
)
;; =========================================================================
;; == Early break auto-vectorization patterns
;; =========================================================================

View file

@ -133,6 +133,7 @@ extern void riscv_asm_output_external (FILE *, const tree, const char *);
extern bool
riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
extern void riscv_expand_usadd (rtx, rtx, rtx);
#ifdef RTX_CODE
extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@ -633,6 +634,7 @@ void expand_vec_lrint (rtx, rtx, machine_mode, machine_mode, machine_mode);
void expand_vec_lround (rtx, rtx, machine_mode, machine_mode, machine_mode);
void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode);
void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode);
void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx), enum avl_type);

View file

@ -4635,6 +4635,16 @@ emit_vec_cvt_x_f_rtz (rtx op_dest, rtx op_src, rtx mask,
}
}
static void
emit_vec_saddu (rtx op_dest, rtx op_1, rtx op_2, insn_type type,
machine_mode vec_mode)
{
rtx ops[] = {op_dest, op_1, op_2};
insn_code icode = code_for_pred (US_PLUS, vec_mode);
emit_vlmax_insn (icode, type, ops);
}
void
expand_vec_ceil (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
machine_mode vec_int_mode)
@ -4862,6 +4872,15 @@ expand_vec_lfloor (rtx op_0, rtx op_1, machine_mode vec_fp_mode,
vec_int_mode);
}
/* Expand the standard name usadd<mode>3 for vector mode, we can leverage
the vector fixed point vector single-width saturating add directly. */
void
expand_vec_usadd (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
{
emit_vec_saddu (op_0, op_1, op_2, BINARY_OP, vec_mode);
}
/* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
well. */
void

View file

@ -11295,6 +11295,61 @@ riscv_get_raw_result_mode (int regno)
return default_get_reg_raw_mode (regno);
}
/* Implements the unsigned saturation add standard name usadd for int mode.
z = SAT_ADD(x, y).
=>
1. sum = x + y.
2. sum = truncate (sum) for QI and HI only.
3. lt = sum < x.
4. lt = -lt.
5. z = sum | lt. */
void
riscv_expand_usadd (rtx dest, rtx x, rtx y)
{
machine_mode mode = GET_MODE (dest);
rtx xmode_sum = gen_reg_rtx (Xmode);
rtx xmode_lt = gen_reg_rtx (Xmode);
rtx xmode_x = gen_lowpart (Xmode, x);
rtx xmode_y = gen_lowpart (Xmode, y);
rtx xmode_dest = gen_reg_rtx (Xmode);
/* Step-1: sum = x + y */
if (mode == SImode && mode != Xmode)
{ /* Take addw to avoid the sum truncate. */
rtx simode_sum = gen_reg_rtx (SImode);
riscv_emit_binary (PLUS, simode_sum, x, y);
emit_move_insn (xmode_sum, gen_lowpart (Xmode, simode_sum));
}
else
riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
/* Step-1.1: truncate sum for HI and QI as we have no insn for add QI/HI. */
if (mode == HImode || mode == QImode)
{
int shift_bits = GET_MODE_BITSIZE (Xmode)
- GET_MODE_BITSIZE (mode).to_constant ();
gcc_assert (shift_bits > 0);
riscv_emit_binary (ASHIFT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
riscv_emit_binary (LSHIFTRT, xmode_sum, xmode_sum, GEN_INT (shift_bits));
}
/* Step-2: lt = sum < x */
riscv_emit_binary (LTU, xmode_lt, xmode_sum, xmode_x);
/* Step-3: lt = -lt */
riscv_emit_unary (NEG, xmode_lt, xmode_lt);
/* Step-4: xmode_dest = sum | lt */
riscv_emit_binary (IOR, xmode_dest, xmode_lt, xmode_sum);
/* Step-5: dest = xmode_dest */
emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"

View file

@ -4160,6 +4160,17 @@
"{ operands[6] = gen_lowpart (SImode, operands[5]); }"
[(set_attr "type" "arith")])
(define_expand "usadd<mode>3"
[(match_operand:ANYI 0 "register_operand")
(match_operand:ANYI 1 "register_operand")
(match_operand:ANYI 2 "register_operand")]
""
{
riscv_expand_usadd (operands[0], operands[1], operands[2]);
DONE;
}
)
(include "bitmanip.md")
(include "crypto.md")
(include "sync.md")

View file

@ -4062,8 +4062,8 @@
;; Saturating Add and Subtract
(define_insn "@pred_<optab><mode>"
[(set (match_operand:VI 0 "register_operand" "=vd, vd, vr, vr, vd, vd, vr, vr")
(if_then_else:VI
[(set (match_operand:V_VLSI 0 "register_operand" "=vd, vd, vr, vr, vd, vd, vr, vr")
(if_then_else:V_VLSI
(unspec:<VM>
[(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1, vm, vm,Wc1,Wc1")
(match_operand 5 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK")
@ -4072,10 +4072,10 @@
(match_operand 8 "const_int_operand" " i, i, i, i, i, i, i, i")
(reg:SI VL_REGNUM)
(reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
(any_sat_int_binop:VI
(match_operand:VI 3 "<binop_rhs1_predicate>" " vr, vr, vr, vr, vr, vr, vr, vr")
(match_operand:VI 4 "<binop_rhs2_predicate>" "<binop_rhs2_constraint>"))
(match_operand:VI 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0, vu, 0")))]
(any_sat_int_binop:V_VLSI
(match_operand:V_VLSI 3 "<binop_rhs1_predicate>" " vr, vr, vr, vr, vr, vr, vr, vr")
(match_operand:V_VLSI 4 "<binop_rhs2_predicate>" "<binop_rhs2_constraint>"))
(match_operand:V_VLSI 2 "vector_merge_operand" " vu, 0, vu, 0, vu, 0, vu, 0")))]
"TARGET_VECTOR"
"@
v<insn>.vv\t%0,%3,%4%p1

View file

@ -0,0 +1,33 @@
#ifndef HAVE_DEFINED_VEC_SAT_BINARY
#define HAVE_DEFINED_VEC_SAT_BINARY
/* To leverage this header files for run test, you need to:
1. define T as the type, for example uint8_t,
2. defint N as the test array size, for example 16.
3. define RUN_VEC_SAT_BINARY as run function.
4. prepare the test_data for test cases.
*/
int
main ()
{
unsigned i, k;
T out[N];
for (i = 0; i < sizeof (test_data) / sizeof (test_data[0]); i++)
{
T *op_1 = test_data[i][0];
T *op_2 = test_data[i][1];
T *expect = test_data[i][2];
RUN_VEC_SAT_BINARY (T, out, op_1, op_2, N);
for (k = 0; k < N; k++)
if (out[k] != expect[k])
__builtin_abort ();
}
return 0;
}
#endif

View file

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-skip-if "" { *-*-* } { "-flto" } } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "../../../sat_arith.h"
/*
** vec_sat_u_add_uint8_t_fmt_1:
** ...
** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e8,\s*m1,\s*ta,\s*ma
** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vle8\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
** ...
*/
DEF_VEC_SAT_U_ADD_FMT_1(uint8_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */

View file

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-skip-if "" { *-*-* } { "-flto" } } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "../../../sat_arith.h"
/*
** vec_sat_u_add_uint16_t_fmt_1:
** ...
** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e16,\s*m1,\s*ta,\s*ma
** ...
** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vle16\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
** ...
*/
DEF_VEC_SAT_U_ADD_FMT_1(uint16_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */

View file

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-skip-if "" { *-*-* } { "-flto" } } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "../../../sat_arith.h"
/*
** vec_sat_u_add_uint32_t_fmt_1:
** ...
** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e32,\s*m1,\s*ta,\s*ma
** ...
** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vle32\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
** ...
*/
DEF_VEC_SAT_U_ADD_FMT_1(uint32_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */

View file

@ -0,0 +1,20 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3 -ftree-vectorize -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-skip-if "" { *-*-* } { "-flto" } } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "../../../sat_arith.h"
/*
** vec_sat_u_add_uint64_t_fmt_1:
** ...
** vsetvli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*e64,\s*m1,\s*ta,\s*ma
** ...
** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vle64\.v\s+v[0-9]+,\s*0\([atx][0-9]+\)
** vsaddu\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+
** ...
*/
DEF_VEC_SAT_U_ADD_FMT_1(uint64_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 4 "expand" } } */

View file

@ -0,0 +1,75 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "../../../sat_arith.h"
#define T uint8_t
#define N 16
#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
DEF_VEC_SAT_U_ADD_FMT_1(T)
T test_data[][3][N] = {
{
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_0 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_1 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* expect */
},
{
{
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255,
},
{
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255,
},
{
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 255,
},
},
{
{
0, 0, 1, 0,
1, 2, 3, 0,
1, 2, 3, 4,
5, 254, 255, 9,
},
{
0, 1, 1, 254,
254, 254, 254, 255,
255, 255, 255, 255,
255, 255, 255, 9,
},
{
0, 1, 2, 254,
255, 255, 255, 255,
255, 255, 255, 255,
255, 255, 255, 18,
},
},
};
#include "vec_sat_binary.h"

View file

@ -0,0 +1,75 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "../../../sat_arith.h"
#define T uint16_t
#define N 16
#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
DEF_VEC_SAT_U_ADD_FMT_1(T)
T test_data[][3][N] = {
{
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_0 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_1 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* expect */
},
{
{
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
},
{
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
},
{
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
},
},
{
{
0, 0, 1, 0,
1, 2, 3, 0,
1, 2, 3, 4,
5, 65534, 65535, 9,
},
{
0, 1, 1, 65534,
65534, 65534, 65534, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 9,
},
{
0, 1, 2, 65534,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 65535,
65535, 65535, 65535, 18,
},
},
};
#include "vec_sat_binary.h"

View file

@ -0,0 +1,75 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "../../../sat_arith.h"
#define T uint32_t
#define N 16
#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
DEF_VEC_SAT_U_ADD_FMT_1(T)
T test_data[][3][N] = {
{
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_0 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_1 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* expect */
},
{
{
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
},
{
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
},
{
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
},
},
{
{
0, 0, 1, 0,
1, 2, 3, 0,
1, 2, 3, 4,
5, 4294967294, 4294967295, 9,
},
{
0, 1, 1, 4294967294,
4294967294, 4294967294, 4294967294, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 9,
},
{
0, 1, 2, 4294967294,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 4294967295,
4294967295, 4294967295, 4294967295, 18,
},
},
};
#include "vec_sat_binary.h"

View file

@ -0,0 +1,75 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "../../../sat_arith.h"
#define T uint64_t
#define N 16
#define RUN_VEC_SAT_BINARY RUN_VEC_SAT_U_ADD_FMT_1
DEF_VEC_SAT_U_ADD_FMT_1(T)
T test_data[][3][N] = {
{
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_0 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* arg_1 */
{
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
}, /* expect */
},
{
{
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
},
{
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
},
{
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
},
},
{
{
0, 0, 1, 0,
1, 2, 3, 0,
1, 2, 3, 4,
5, 18446744073709551614u, 18446744073709551615u, 9,
},
{
0, 1, 1, 18446744073709551614u,
18446744073709551614u, 18446744073709551614u, 18446744073709551614u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 9,
},
{
0, 1, 2, 18446744073709551614u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18446744073709551615u,
18446744073709551615u, 18446744073709551615u, 18446744073709551615u, 18,
},
},
};
#include "vec_sat_binary.h"

View file

@ -0,0 +1,31 @@
#ifndef HAVE_SAT_ARITH
#define HAVE_SAT_ARITH
#include <stdint-gcc.h>
#define DEF_SAT_U_ADD_FMT_1(T) \
T __attribute__((noinline)) \
sat_u_add_##T##_fmt_1 (T x, T y) \
{ \
return (x + y) | (-(T)((T)(x + y) < x)); \
}
#define DEF_VEC_SAT_U_ADD_FMT_1(T) \
void __attribute__((noinline)) \
vec_sat_u_add_##T##_fmt_1 (T *out, T *op_1, T *op_2, unsigned limit) \
{ \
unsigned i; \
for (i = 0; i < limit; i++) \
{ \
T x = op_1[i]; \
T y = op_2[i]; \
out[i] = (x + y) | (-(T)((T)(x + y) < x)); \
} \
}
#define RUN_SAT_U_ADD_FMT_1(T, x, y) sat_u_add_##T##_fmt_1(x, y)
#define RUN_VEC_SAT_U_ADD_FMT_1(T, out, op_1, op_2, N) \
vec_sat_u_add_##T##_fmt_1(out, op_1, op_2, N)
#endif

View file

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "sat_arith.h"
/*
** sat_u_add_uint8_t_fmt_1:
** add\s+[atx][0-9]+,\s*a0,\s*a1
** andi\s+[atx][0-9]+,\s*[atx][0-9]+,\s*0xff
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** neg\s+[atx][0-9]+,\s*[atx][0-9]+
** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** andi\s+a0,\s*a0,\s*0xff
** ret
*/
DEF_SAT_U_ADD_FMT_1(uint8_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */

View file

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "sat_arith.h"
/*
** sat_u_add_uint16_t_fmt_1:
** add\s+[atx][0-9]+,\s*a0,\s*a1
** slli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
** srli\s+[atx][0-9]+,\s*[atx][0-9]+,\s*48
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** neg\s+[atx][0-9]+,\s*[atx][0-9]+
** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** slli\s+a0,\s*a0,\s*48
** srli\s+a0,\s*a0,\s*48
** ret
*/
DEF_SAT_U_ADD_FMT_1(uint16_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */

View file

@ -0,0 +1,18 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "sat_arith.h"
/*
** sat_u_add_uint32_t_fmt_1:
** addw\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** neg\s+[atx][0-9]+,\s*[atx][0-9]+
** or\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** sext.w\s+a0,\s*a0
** ret
*/
DEF_SAT_U_ADD_FMT_1(uint32_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */

View file

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc -mabi=lp64d -O3 -fdump-rtl-expand-details -fno-schedule-insns -fno-schedule-insns2" } */
/* { dg-final { check-function-bodies "**" "" } } */
#include "sat_arith.h"
/*
** sat_u_add_uint64_t_fmt_1:
** add\s+[atx][0-9]+,\s*a0,\s*a1
** sltu\s+[atx][0-9]+,\s*[atx][0-9]+,\s*[atx][0-9]+
** neg\s+[atx][0-9]+,\s*[atx][0-9]+
** or\s+a0,\s*[atx][0-9]+,\s*[atx][0-9]+
** ret
*/
DEF_SAT_U_ADD_FMT_1(uint64_t)
/* { dg-final { scan-rtl-dump-times ".SAT_ADD " 2 "expand" } } */

View file

@ -0,0 +1,25 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "sat_arith.h"
#define T uint8_t
#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
DEF_SAT_U_ADD_FMT_1(T)
T test_data[][3] = {
/* arg_0, arg_1, expect */
{ 0, 0, 0, },
{ 0, 1, 1, },
{ 1, 1, 2, },
{ 0, 254, 254, },
{ 1, 254, 255, },
{ 2, 254, 255, },
{ 0, 255, 255, },
{ 1, 255, 255, },
{ 2, 255, 255, },
{ 255, 255, 255, },
};
#include "scalar_sat_binary.h"

View file

@ -0,0 +1,25 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "sat_arith.h"
#define T uint16_t
#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
DEF_SAT_U_ADD_FMT_1(T)
T test_data[][3] = {
/* arg_0, arg_1, expect */
{ 0, 0, 0, },
{ 0, 1, 1, },
{ 1, 1, 2, },
{ 0, 65534, 65534, },
{ 1, 65534, 65535, },
{ 2, 65534, 65535, },
{ 0, 65535, 65535, },
{ 1, 65535, 65535, },
{ 2, 65535, 65535, },
{ 65535, 65535, 65535, },
};
#include "scalar_sat_binary.h"

View file

@ -0,0 +1,25 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "sat_arith.h"
#define T uint32_t
#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
DEF_SAT_U_ADD_FMT_1(T)
T test_data[][3] = {
/* arg_0, arg_1, expect */
{ 0, 0, 0, },
{ 0, 1, 1, },
{ 1, 1, 2, },
{ 0, 4294967294, 4294967294, },
{ 1, 4294967294, 4294967295, },
{ 2, 4294967294, 4294967295, },
{ 0, 4294967295, 4294967295, },
{ 1, 4294967295, 4294967295, },
{ 2, 4294967295, 4294967295, },
{ 4294967295, 4294967295, 4294967295, },
};
#include "scalar_sat_binary.h"

View file

@ -0,0 +1,25 @@
/* { dg-do run { target { riscv_v } } } */
/* { dg-additional-options "-std=c99" } */
#include "sat_arith.h"
#define T uint64_t
#define RUN_SAT_BINARY RUN_SAT_U_ADD_FMT_1
DEF_SAT_U_ADD_FMT_1(T)
T test_data[][3] = {
/* arg_0, arg_1, expect */
{ 0, 0, 0, },
{ 0, 1, 1, },
{ 1, 1, 2, },
{ 0, 18446744073709551614u, 18446744073709551614u, },
{ 1, 18446744073709551614u, 18446744073709551615u, },
{ 2, 18446744073709551614u, 18446744073709551615u, },
{ 0, 18446744073709551615u, 18446744073709551615u, },
{ 1, 18446744073709551615u, 18446744073709551615u, },
{ 2, 18446744073709551615u, 18446744073709551615u, },
{ 18446744073709551615u, 18446744073709551615u, 18446744073709551615u, },
};
#include "scalar_sat_binary.h"

View file

@ -0,0 +1,27 @@
#ifndef HAVE_DEFINED_SCALAR_SAT_BINARY
#define HAVE_DEFINED_SCALAR_SAT_BINARY
/* To leverage this header files for run test, you need to:
1. define T as the type, for example uint8_t,
2. define RUN_SAT_BINARY as run function.
3. prepare the test_data for test cases.
*/
int
main ()
{
unsigned i;
T *d;
for (i = 0; i < sizeof (test_data) / sizeof (test_data[0]); i++)
{
d = test_data[i];
if (RUN_SAT_BINARY (T, d[0], d[1]) != d[2])
__builtin_abort ();
}
return 0;
}
#endif